Merge 3.13-rc5 into staging-next

We want these fixes here to handle some merge issues. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2013-12-24 12:43:21 -0500
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2013-12-24 12:43:21 -0500
commit: 5bd2010fbe027b224db2e74a4fdfec9a7b7918d2 (patch)
tree: 59106aae3930a3608409c101ec32d68742c8d168 /fs
parent: 41f107266b19d100c1bcef9e1e1aef00692c1209 (diff)
parent: 413541dd66d51f791a0b169d9b9014e4f56be13c (diff)
27 files changed, 409 insertions, 269 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6cb22e..062a5f6a1448 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
        int i;
        for (i = 0; i < ctx->nr_pages; i++) {
+                struct page *page;
                pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
                                page_count(ctx->ring_pages[i]));
-                put_page(ctx->ring_pages[i]);
+                page = ctx->ring_pages[i];
+                if (!page)
+                        continue;
+                ctx->ring_pages[i] = NULL;
+                put_page(page);
        }
        put_aio_ring_file(ctx);
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
        unsigned long flags;
        int rc;
+        rc = 0;
+        /* Make sure the old page hasn't already been changed */
+        spin_lock(&mapping->private_lock);
+        ctx = mapping->private_data;
+        if (ctx) {
+                pgoff_t idx;
+                spin_lock_irqsave(&ctx->completion_lock, flags);
+                idx = old->index;
+                if (idx < (pgoff_t)ctx->nr_pages) {
+                        if (ctx->ring_pages[idx] != old)
+                                rc = -EAGAIN;
+                } else
+                        rc = -EINVAL;
+                spin_unlock_irqrestore(&ctx->completion_lock, flags);
+        } else
+                rc = -EINVAL;
+        spin_unlock(&mapping->private_lock);
+        if (rc != 0)
+                return rc;
        /* Writeback must be complete */
        BUG_ON(PageWriteback(old));
-        put_page(old);
+        get_page(new);
-        rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+        rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
        if (rc != MIGRATEPAGE_SUCCESS) {
-                get_page(old);
+                put_page(new);
                return rc;
        }
-        get_page(new);
        /* We can potentially race against kioctx teardown here.  Use the
         * address_space's private data lock to protect the mapping's
         * private_data.
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
                spin_lock_irqsave(&ctx->completion_lock, flags);
                migrate_page_copy(new, old);
                idx = old->index;
-                if (idx < (pgoff_t)ctx->nr_pages)
+                if (idx < (pgoff_t)ctx->nr_pages) {
-                        ctx->ring_pages[idx] = new;
+                        /* And only do the move if things haven't changed */
+                        if (ctx->ring_pages[idx] == old)
+                                ctx->ring_pages[idx] = new;
+                        else
+                                rc = -EAGAIN;
+                } else
+                        rc = -EINVAL;
                spin_unlock_irqrestore(&ctx->completion_lock, flags);
        } else
                rc = -EBUSY;
        spin_unlock(&mapping->private_lock);
+        if (rc == MIGRATEPAGE_SUCCESS)
+                put_page(old);
+        else
+                put_page(new);
        return rc;
 }
 #endif
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
        struct aio_ring *ring;
        unsigned nr_events = ctx->max_reqs;
        struct mm_struct *mm = current->mm;
-        unsigned long size, populate;
+        unsigned long size, unused;
        int nr_pages;
        int i;
        struct file *file;
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx)
                return -EAGAIN;
        }
+        ctx->aio_ring_file = file;
+        nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
+                        / sizeof(struct io_event);
+        ctx->ring_pages = ctx->internal_pages;
+        if (nr_pages > AIO_RING_PAGES) {
+                ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+                                          GFP_KERNEL);
+                if (!ctx->ring_pages) {
+                        put_aio_ring_file(ctx);
+                        return -ENOMEM;
+                }
+        }
        for (i = 0; i < nr_pages; i++) {
                struct page *page;
                page = find_or_create_page(file->f_inode->i_mapping,
@@ -358,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
                SetPageUptodate(page);
                SetPageDirty(page);
                unlock_page(page);
+                ctx->ring_pages[i] = page;
        }
-        ctx->aio_ring_file = file;
+        ctx->nr_pages = i;
-        nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
-                        / sizeof(struct io_event);
-        ctx->ring_pages = ctx->internal_pages;
+        if (unlikely(i != nr_pages)) {
-        if (nr_pages > AIO_RING_PAGES) {
+                aio_free_ring(ctx);
-                ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+                return -EAGAIN;
-                                          GFP_KERNEL);
-                if (!ctx->ring_pages) {
-                        put_aio_ring_file(ctx);
-                        return -ENOMEM;
-                }
        }
        ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -379,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
        down_write(&mm->mmap_sem);
        ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
                                       PROT_READ | PROT_WRITE,
-                                       MAP_SHARED | MAP_POPULATE, 0, &populate);
+                                       MAP_SHARED, 0, &unused);
+        up_write(&mm->mmap_sem);
        if (IS_ERR((void *)ctx->mmap_base)) {
-                up_write(&mm->mmap_sem);
                ctx->mmap_size = 0;
                aio_free_ring(ctx);
                return -EAGAIN;
@@ -389,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
        pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
-        /* We must do this while still holding mmap_sem for write, as we
-         * need to be protected against userspace attempting to mremap()
-         * or munmap() the ring buffer.
-         */
-        ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
-                                       1, 0, ctx->ring_pages, NULL);
-        /* Dropping the reference here is safe as the page cache will hold
-         * onto the pages for us.  It is also required so that page migration
-         * can unmap the pages and get the right reference count.
-         */
-        for (i = 0; i < ctx->nr_pages; i++)
-                put_page(ctx->ring_pages[i]);
-        up_write(&mm->mmap_sem);
-        if (unlikely(ctx->nr_pages != nr_pages)) {
-                aio_free_ring(ctx);
-                return -EAGAIN;
-        }
        ctx->user_id = ctx->mmap_base;
        ctx->nr_events = nr_events; /* trusted copy */
@@ -652,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        aio_nr += ctx->max_reqs;
        spin_unlock(&aio_nr_lock);
-        percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+        percpu_ref_get(&ctx->users);    /* io_setup() will drop this ref */
+        percpu_ref_get(&ctx->reqs);     /* free_ioctx_users() will drop this */
        err = ioctx_add_table(ctx, mm);
        if (err)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 45d98d01028f..9c01509dd8ab 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -767,20 +767,19 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
-        if (metadata) {
-                key.objectid = bytenr;
-                key.type = BTRFS_METADATA_ITEM_KEY;
-                key.offset = offset;
-        } else {
-                key.objectid = bytenr;
-                key.type = BTRFS_EXTENT_ITEM_KEY;
-                key.offset = offset;
-        }
        if (!trans) {
                path->skip_locking = 1;
                path->search_commit_root = 1;
        }
+search_again:
+        key.objectid = bytenr;
+        key.offset = offset;
+        if (metadata)
+                key.type = BTRFS_METADATA_ITEM_KEY;
+        else
+                key.type = BTRFS_EXTENT_ITEM_KEY;
 again:
        ret = btrfs_search_slot(trans, root->fs_info->extent_root,
                                &key, path, 0, 0);
@@ -788,7 +787,6 @@ again:
                goto out_free;
        if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
-                metadata = 0;
                if (path->slots[0]) {
                        path->slots[0]--;
                        btrfs_item_key_to_cpu(path->nodes[0], &key,
@@ -855,7 +853,7 @@ again:
                        mutex_lock(&head->mutex);
                        mutex_unlock(&head->mutex);
                        btrfs_put_delayed_ref(&head->node);
-                        goto again;
+                        goto search_again;
                }
                if (head->extent_op && head->extent_op->update_flags)
                        extent_flags |= head->extent_op->flags_to_set;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a111622598b0..21da5762b0b1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2121,7 +2121,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
        if (err == -EINTR)
-                goto out;
+                goto out_drop_write;
        dentry = lookup_one_len(vol_args->name, parent, namelen);
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
@@ -2284,6 +2284,7 @@ out_dput:
        dput(dentry);
 out_unlock_dir:
        mutex_unlock(&dir->i_mutex);
+out_drop_write:
        mnt_drop_write_file(file);
 out:
        kfree(vol_args);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ce459a7cb16d..429c73c374b8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -571,7 +571,9 @@ static int is_cowonly_root(u64 root_objectid)
            root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
            root_objectid == BTRFS_DEV_TREE_OBJECTID ||
            root_objectid == BTRFS_TREE_LOG_OBJECTID ||
-            root_objectid == BTRFS_CSUM_TREE_OBJECTID)
+            root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
+            root_objectid == BTRFS_UUID_TREE_OBJECTID ||
+            root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
                return 1;
        return 0;
 }
@@ -1264,10 +1266,10 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
 }
 /*
- * helper to update/delete the 'address of tree root -> reloc tree'
+ * helper to delete the 'address of tree root -> reloc tree'
 * mapping
 */
-static int __update_reloc_root(struct btrfs_root *root, int del)
+static void __del_reloc_root(struct btrfs_root *root)
 {
        struct rb_node *rb_node;
        struct mapping_node *node = NULL;
@@ -1275,7 +1277,7 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
        spin_lock(&rc->reloc_root_tree.lock);
        rb_node = tree_search(&rc->reloc_root_tree.rb_root,
-                              root->commit_root->start);
+                              root->node->start);
        if (rb_node) {
                node = rb_entry(rb_node, struct mapping_node, rb_node);
                rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1283,23 +1285,45 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
        spin_unlock(&rc->reloc_root_tree.lock);
        if (!node)
-                return 0;
+                return;
        BUG_ON((struct btrfs_root *)node->data != root);
-        if (!del) {
+        spin_lock(&root->fs_info->trans_lock);
-                spin_lock(&rc->reloc_root_tree.lock);
+        list_del_init(&root->root_list);
-                node->bytenr = root->node->start;
+        spin_unlock(&root->fs_info->trans_lock);
-                rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
+        kfree(node);
-                                      node->bytenr, &node->rb_node);
+}
-                spin_unlock(&rc->reloc_root_tree.lock);
-                if (rb_node)
+/*
-                        backref_tree_panic(rb_node, -EEXIST, node->bytenr);
+ * helper to update the 'address of tree root -> reloc tree'
-        } else {
+ * mapping
-                spin_lock(&root->fs_info->trans_lock);
+ */
-                list_del_init(&root->root_list);
+static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
-                spin_unlock(&root->fs_info->trans_lock);
+{
-                kfree(node);
+        struct rb_node *rb_node;
+        struct mapping_node *node = NULL;
+        struct reloc_control *rc = root->fs_info->reloc_ctl;
+        spin_lock(&rc->reloc_root_tree.lock);
+        rb_node = tree_search(&rc->reloc_root_tree.rb_root,
+                              root->node->start);
+        if (rb_node) {
+                node = rb_entry(rb_node, struct mapping_node, rb_node);
+                rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
        }
+        spin_unlock(&rc->reloc_root_tree.lock);
+        if (!node)
+                return 0;
+        BUG_ON((struct btrfs_root *)node->data != root);
+        spin_lock(&rc->reloc_root_tree.lock);
+        node->bytenr = new_bytenr;
+        rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
+                              node->bytenr, &node->rb_node);
+        spin_unlock(&rc->reloc_root_tree.lock);
+        if (rb_node)
+                backref_tree_panic(rb_node, -EEXIST, node->bytenr);
        return 0;
 }
@@ -1420,7 +1444,6 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
 {
        struct btrfs_root *reloc_root;
        struct btrfs_root_item *root_item;
-        int del = 0;
        int ret;
        if (!root->reloc_root)
@@ -1432,11 +1455,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
        if (root->fs_info->reloc_ctl->merge_reloc_tree &&
            btrfs_root_refs(root_item) == 0) {
                root->reloc_root = NULL;
-                del = 1;
+                __del_reloc_root(reloc_root);
        }
-        __update_reloc_root(reloc_root, del);
        if (reloc_root->commit_root != reloc_root->node) {
                btrfs_set_root_node(root_item, reloc_root->node);
                free_extent_buffer(reloc_root->commit_root);
@@ -2287,7 +2308,7 @@ void free_reloc_roots(struct list_head *list)
        while (!list_empty(list)) {
                reloc_root = list_entry(list->next, struct btrfs_root,
                                        root_list);
-                __update_reloc_root(reloc_root, 1);
+                __del_reloc_root(reloc_root);
                free_extent_buffer(reloc_root->node);
                free_extent_buffer(reloc_root->commit_root);
                kfree(reloc_root);
@@ -2332,7 +2353,7 @@ again:
                        ret = merge_reloc_root(rc, root);
                        if (ret) {
-                                __update_reloc_root(reloc_root, 1);
+                                __del_reloc_root(reloc_root);
                                free_extent_buffer(reloc_root->node);
                                free_extent_buffer(reloc_root->commit_root);
                                kfree(reloc_root);
@@ -2388,6 +2409,13 @@ out:
                btrfs_std_error(root->fs_info, ret);
                if (!list_empty(&reloc_roots))
                        free_reloc_roots(&reloc_roots);
+                /* new reloc root may be added */
+                mutex_lock(&root->fs_info->reloc_mutex);
+                list_splice_init(&rc->reloc_roots, &reloc_roots);
+                mutex_unlock(&root->fs_info->reloc_mutex);
+                if (!list_empty(&reloc_roots))
+                        free_reloc_roots(&reloc_roots);
        }
        BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
@@ -4522,6 +4550,11 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
        BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
               root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
+        if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+                if (buf == root->node)
+                        __update_reloc_root(root, cow->start);
+        }
        level = btrfs_header_level(buf);
        if (btrfs_header_generation(buf) <=
            btrfs_root_last_snapshot(&root->root_item))
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6837fe87f3a6..945d1db98f26 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4723,8 +4723,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
        }
        if (!access_ok(VERIFY_READ, arg->clone_sources,
-                        sizeof(*arg->clone_sources *
+                        sizeof(*arg->clone_sources) *
-                        arg->clone_sources_count))) {
+                        arg->clone_sources_count)) {
                ret = -EFAULT;
                goto out;
        }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2d8ac1bf0cf9..d71a11d13dfa 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -432,7 +432,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        } else {
                                printk(KERN_INFO "btrfs: setting nodatacow\n");
                        }
-                        info->compress_type = BTRFS_COMPRESS_NONE;
                        btrfs_clear_opt(info->mount_opt, COMPRESS);
                        btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                        btrfs_set_opt(info->mount_opt, NODATACOW);
@@ -461,7 +460,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_set_fs_incompat(info, COMPRESS_LZO);
                        } else if (strncmp(args[0].from, "no", 2) == 0) {
                                compress_type = "no";
-                                info->compress_type = BTRFS_COMPRESS_NONE;
                                btrfs_clear_opt(info->mount_opt, COMPRESS);
                                btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
                                compress_force = false;
@@ -474,9 +472,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
                                pr_info("btrfs: force %s compression\n",
                                        compress_type);
-                        } else
+                        } else if (btrfs_test_opt(root, COMPRESS)) {
                                pr_info("btrfs: use %s compression\n",
                                        compress_type);
+                        }
                        break;
                case Opt_ssd:
                        printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e561c059539..ec3ba43b9faa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -210,9 +210,13 @@ static int readpage_nounlock(struct file *filp, struct page *page)
        if (err < 0) {
                SetPageError(page);
                goto out;
-        } else if (err < PAGE_CACHE_SIZE) {
+        } else {
+                if (err < PAGE_CACHE_SIZE) {
                /* zero fill remainder of page */
-                zero_user_segment(page, err, PAGE_CACHE_SIZE);
+                        zero_user_segment(page, err, PAGE_CACHE_SIZE);
+                } else {
+                        flush_dcache_page(page);
+                }
        }
        SetPageUptodate(page);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9a8e396aed89..278fd2891288 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -978,7 +978,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
        struct ceph_mds_reply_inode *ininfo;
        struct ceph_vino vino;
        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-        int i = 0;
        int err = 0;
        dout("fill_trace %p is_dentry %d is_target %d\n", req,
@@ -1039,6 +1038,29 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                }
        }
+        if (rinfo->head->is_target) {
+                vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+                vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+                in = ceph_get_inode(sb, vino);
+                if (IS_ERR(in)) {
+                        err = PTR_ERR(in);
+                        goto done;
+                }
+                req->r_target_inode = in;
+                err = fill_inode(in, &rinfo->targeti, NULL,
+                                session, req->r_request_started,
+                                (le32_to_cpu(rinfo->head->result) == 0) ?
+                                req->r_fmode : -1,
+                                &req->r_caps_reservation);
+                if (err < 0) {
+                        pr_err("fill_inode badness %p %llx.%llx\n",
+                                in, ceph_vinop(in));
+                        goto done;
+                }
+        }
        /*
         * ignore null lease/binding on snapdir ENOENT, or else we
         * will have trouble splicing in the virtual snapdir later
@@ -1108,7 +1130,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                             ceph_dentry(req->r_old_dentry)->offset);
                        dn = req->r_old_dentry;  /* use old_dentry */
-                        in = dn->d_inode;
                }
                /* null dentry? */
@@ -1130,44 +1151,28 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                }
                /* attach proper inode */
-                ininfo = rinfo->targeti.in;
+                if (!dn->d_inode) {
-                vino.ino = le64_to_cpu(ininfo->ino);
+                        ihold(in);
-                vino.snap = le64_to_cpu(ininfo->snapid);
-                in = dn->d_inode;
-                if (!in) {
-                        in = ceph_get_inode(sb, vino);
-                        if (IS_ERR(in)) {
-                                pr_err("fill_trace bad get_inode "
-                                       "%llx.%llx\n", vino.ino, vino.snap);
-                                err = PTR_ERR(in);
-                                d_drop(dn);
-                                goto done;
-                        }
                        dn = splice_dentry(dn, in, &have_lease, true);
                        if (IS_ERR(dn)) {
                                err = PTR_ERR(dn);
                                goto done;
                        }
                        req->r_dentry = dn;  /* may have spliced */
-                        ihold(in);
+                } else if (dn->d_inode && dn->d_inode != in) {
-                } else if (ceph_ino(in) == vino.ino &&
-                           ceph_snap(in) == vino.snap) {
-                        ihold(in);
-                } else {
                        dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
-                             dn, in, ceph_ino(in), ceph_snap(in),
+                             dn, dn->d_inode, ceph_vinop(dn->d_inode),
-                             vino.ino, vino.snap);
+                             ceph_vinop(in));
                        have_lease = false;
-                        in = NULL;
                }
                if (have_lease)
                        update_dentry_lease(dn, rinfo->dlease, session,
                                            req->r_request_started);
                dout(" final dn %p\n", dn);
-                i++;
+        } else if (!req->r_aborted &&
-        } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+                   (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
-                   req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) {
+                    req->r_op == CEPH_MDS_OP_MKSNAP)) {
                struct dentry *dn = req->r_dentry;
                /* fill out a snapdir LOOKUPSNAP dentry */
@@ -1177,52 +1182,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                ininfo = rinfo->targeti.in;
                vino.ino = le64_to_cpu(ininfo->ino);
                vino.snap = le64_to_cpu(ininfo->snapid);
-                in = ceph_get_inode(sb, vino);
-                if (IS_ERR(in)) {
-                        pr_err("fill_inode get_inode badness %llx.%llx\n",
-                               vino.ino, vino.snap);
-                        err = PTR_ERR(in);
-                        d_delete(dn);
-                        goto done;
-                }
                dout(" linking snapped dir %p to dn %p\n", in, dn);
+                ihold(in);
                dn = splice_dentry(dn, in, NULL, true);
                if (IS_ERR(dn)) {
                        err = PTR_ERR(dn);
                        goto done;
                }
                req->r_dentry = dn;  /* may have spliced */
-                ihold(in);
-                rinfo->head->is_dentry = 1;  /* fool notrace handlers */
-        }
-        if (rinfo->head->is_target) {
-                vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-                vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
-                if (in == NULL || ceph_ino(in) != vino.ino ||
-                    ceph_snap(in) != vino.snap) {
-                        in = ceph_get_inode(sb, vino);
-                        if (IS_ERR(in)) {
-                                err = PTR_ERR(in);
-                                goto done;
-                        }
-                }
-                req->r_target_inode = in;
-                err = fill_inode(in,
-                                 &rinfo->targeti, NULL,
-                                 session, req->r_request_started,
-                                 (le32_to_cpu(rinfo->head->result) == 0) ?
-                                 req->r_fmode : -1,
-                                 &req->r_caps_reservation);
-                if (err < 0) {
-                        pr_err("fill_inode badness %p %llx.%llx\n",
-                               in, ceph_vinop(in));
-                        goto done;
-                }
        }
 done:
        dout("fill_trace done err=%d\n", err);
        return err;
@@ -1272,7 +1240,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        struct qstr dname;
        struct dentry *dn;
        struct inode *in;
-        int err = 0, i;
+        int err = 0, ret, i;
        struct inode *snapdir = NULL;
        struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
        struct ceph_dentry_info *di;
@@ -1305,6 +1273,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                        ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
        }
+        /* FIXME: release caps/leases if error occurs */
        for (i = 0; i < rinfo->dir_nr; i++) {
                struct ceph_vino vino;
@@ -1329,9 +1298,10 @@ retry_lookup:
                                err = -ENOMEM;
                                goto out;
                        }
-                        err = ceph_init_dentry(dn);
+                        ret = ceph_init_dentry(dn);
-                        if (err < 0) {
+                        if (ret < 0) {
                                dput(dn);
+                                err = ret;
                                goto out;
                        }
                } else if (dn->d_inode &&
@@ -1351,9 +1321,6 @@ retry_lookup:
                        spin_unlock(&parent->d_lock);
                }
-                di = dn->d_fsdata;
-                di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
                /* inode */
                if (dn->d_inode) {
                        in = dn->d_inode;
@@ -1366,26 +1333,39 @@ retry_lookup:
                                err = PTR_ERR(in);
                                goto out;
                        }
-                        dn = splice_dentry(dn, in, NULL, false);
-                        if (IS_ERR(dn))
-                                dn = NULL;
                }
                if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
                               req->r_request_started, -1,
                               &req->r_caps_reservation) < 0) {
                        pr_err("fill_inode badness on %p\n", in);
+                        if (!dn->d_inode)
+                                iput(in);
+                        d_drop(dn);
                        goto next_item;
                }
-                if (dn)
-                        update_dentry_lease(dn, rinfo->dir_dlease[i],
+                if (!dn->d_inode) {
-                                            req->r_session,
+                        dn = splice_dentry(dn, in, NULL, false);
-                                            req->r_request_started);
+                        if (IS_ERR(dn)) {
+                                err = PTR_ERR(dn);
+                                dn = NULL;
+                                goto next_item;
+                        }
+                }
+                di = dn->d_fsdata;
+                di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
+                update_dentry_lease(dn, rinfo->dir_dlease[i],
+                                    req->r_session,
+                                    req->r_request_started);
 next_item:
                if (dn)
                        dput(dn);
        }
-        req->r_did_prepopulate = true;
+        if (err == 0)
+                req->r_did_prepopulate = true;
 out:
        if (snapdir) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 4bdb300b16e2..6055d61811d3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -192,7 +192,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
                if (!tcount)
                        return 0;
        }
-        mask = ~(~0ul << tcount*8);
+        mask = bytemask_from_count(tcount);
        return unlikely(!!((a ^ b) & mask));
 }
diff --git a/fs/namei.c b/fs/namei.c
index c53d3a9547f9..3531deebad30 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1598,11 +1598,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
 *   do a "get_unaligned()" if this helps and is sufficiently
 *   fast.
 *
- * - Little-endian machines (so that we can generate the mask
- *   of low bytes efficiently). Again, we *could* do a byte
- *   swapping load on big-endian architectures if that is not
- *   expensive enough to make the optimization worthless.
- *
 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
 *   do not trap on the (extremely unlikely) case of a page
 *   crossing operation.
@@ -1646,7 +1641,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
                if (!len)
                        goto done;
        }
-        mask = ~(~0ul << len*8);
+        mask = bytemask_from_count(len);
        hash += mask & a;
 done:
        return fold_hash(hash);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 9186c7ce0b14..b6af150c96b8 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -132,6 +132,13 @@ nfsd_reply_cache_alloc(void)
 }
 static void
+nfsd_reply_cache_unhash(struct svc_cacherep *rp)
+{
+        hlist_del_init(&rp->c_hash);
+        list_del_init(&rp->c_lru);
+}
+static void
 nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 {
        if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
@@ -417,7 +424,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
                rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
                if (nfsd_cache_entry_expired(rp) ||
                    num_drc_entries >= max_drc_entries) {
-                        lru_put_end(rp);
+                        nfsd_reply_cache_unhash(rp);
                        prune_cache_entries();
                        goto search_cache;
                }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 28955d4b7218..124fc43c7090 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -292,16 +292,20 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
 {
        struct proc_dir_entry *pde = PDE(file_inode(file));
        unsigned long rv = -EIO;
-        unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
-                                  unsigned long, unsigned long) = NULL;
        if (use_pde(pde)) {
+                typeof(proc_reg_get_unmapped_area) *get_area;
+                get_area = pde->proc_fops->get_unmapped_area;
 #ifdef CONFIG_MMU
-                get_area = current->mm->get_unmapped_area;
+                if (!get_area)
+                        get_area = current->mm->get_unmapped_area;
 #endif
-                if (pde->proc_fops->get_unmapped_area)
-                        get_area = pde->proc_fops->get_unmapped_area;
                if (get_area)
                        rv = get_area(file, orig_addr, len, pgoff, flags);
+                else
+                        rv = orig_addr;
                unuse_pde(pde);
        }
        return rv;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b8e93a40a5d3..78c3c2097787 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi)
                pstore_get_records(0);
        kmsg_dump_register(&pstore_dumper);
-        pstore_register_console();
-        pstore_register_ftrace();
+        if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
+                pstore_register_console();
+                pstore_register_ftrace();
+        }
        if (pstore_update_ms >= 0) {
                pstore_timer.expires = jiffies +
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3ef11b22e750..3b2c14b6f0fb 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1635,7 +1635,7 @@ xfs_bmap_last_extent(
 * blocks at the end of the file which do not start at the previous data block,
 * we will try to align the new blocks at stripe unit boundaries.
 *
- * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
+ * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
 * at, or past the EOF.
 */
 STATIC int
@@ -1650,9 +1650,14 @@ xfs_bmap_isaeof(
        bma->aeof = 0;
        error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
                                     &is_empty);
-        if (error || is_empty)
+        if (error)
                return error;
+        if (is_empty) {
+                bma->aeof = 1;
+                return 0;
+        }
        /*
         * Check if we are allocation or past the last extent, or at least into
         * the last delayed allocated extent.
@@ -3643,10 +3648,19 @@ xfs_bmap_btalloc(
        int             isaligned;
        int             tryagain;
        int             error;
+        int             stripe_align;
        ASSERT(ap->length);
        mp = ap->ip->i_mount;
+        /* stripe alignment for allocation is determined by mount parameters */
+        stripe_align = 0;
+        if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+                stripe_align = mp->m_swidth;
+        else if (mp->m_dalign)
+                stripe_align = mp->m_dalign;
        align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
        if (unlikely(align)) {
                error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3655,6 +3669,8 @@ xfs_bmap_btalloc(
                ASSERT(!error);
                ASSERT(ap->length);
        }
        nullfb = *ap->firstblock == NULLFSBLOCK;
        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
        if (nullfb) {
@@ -3730,7 +3746,7 @@ xfs_bmap_btalloc(
         */
        if (!ap->flist->xbf_low && ap->aeof) {
                if (!ap->offset) {
-                        args.alignment = mp->m_dalign;
+                        args.alignment = stripe_align;
                        atype = args.type;
                        isaligned = 1;
                        /*
@@ -3755,13 +3771,13 @@ xfs_bmap_btalloc(
                         * of minlen+alignment+slop doesn't go up
                         * between the calls.
                         */
-                        if (blen > mp->m_dalign && blen <= args.maxlen)
+                        if (blen > stripe_align && blen <= args.maxlen)
-                                nextminlen = blen - mp->m_dalign;
+                                nextminlen = blen - stripe_align;
                        else
                                nextminlen = args.minlen;
-                        if (nextminlen + mp->m_dalign > args.minlen + 1)
+                        if (nextminlen + stripe_align > args.minlen + 1)
                                args.minalignslop =
-                                        nextminlen + mp->m_dalign -
+                                        nextminlen + stripe_align -
                                        args.minlen - 1;
                        else
                                args.minalignslop = 0;
@@ -3783,7 +3799,7 @@ xfs_bmap_btalloc(
                 */
                args.type = atype;
                args.fsbno = ap->blkno;
-                args.alignment = mp->m_dalign;
+                args.alignment = stripe_align;
                args.minlen = nextminlen;
                args.minalignslop = 0;
                isaligned = 1;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5887e41c0323..1394106ed22d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1187,7 +1187,12 @@ xfs_zero_remaining_bytes(
                XFS_BUF_UNWRITE(bp);
                XFS_BUF_READ(bp);
                XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
-                xfsbdstrat(mp, bp);
+                if (XFS_FORCED_SHUTDOWN(mp)) {
+                        error = XFS_ERROR(EIO);
+                        break;
+                }
+                xfs_buf_iorequest(bp);
                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_buf_ioerror_alert(bp,
@@ -1200,7 +1205,12 @@ xfs_zero_remaining_bytes(
                XFS_BUF_UNDONE(bp);
                XFS_BUF_UNREAD(bp);
                XFS_BUF_WRITE(bp);
-                xfsbdstrat(mp, bp);
+                if (XFS_FORCED_SHUTDOWN(mp)) {
+                        error = XFS_ERROR(EIO);
+                        break;
+                }
+                xfs_buf_iorequest(bp);
                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_buf_ioerror_alert(bp,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c7f0b77dcb00..afe7645e4b2b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -698,7 +698,11 @@ xfs_buf_read_uncached(
        bp->b_flags |= XBF_READ;
        bp->b_ops = ops;
-        xfsbdstrat(target->bt_mount, bp);
+        if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
+                xfs_buf_relse(bp);
+                return NULL;
+        }
+        xfs_buf_iorequest(bp);
        xfs_buf_iowait(bp);
        return bp;
 }
@@ -1089,7 +1093,7 @@ xfs_bioerror(
 * This is meant for userdata errors; metadata bufs come with
 * iodone functions attached, so that we can track down errors.
 */
-STATIC int
+int
 xfs_bioerror_relse(
        struct xfs_buf  *bp)
 {
@@ -1152,7 +1156,7 @@ xfs_bwrite(
        ASSERT(xfs_buf_islocked(bp));
        bp->b_flags |= XBF_WRITE;
-        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
        xfs_bdstrat_cb(bp);
@@ -1164,25 +1168,6 @@ xfs_bwrite(
        return error;
 }
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem.  Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
-        struct xfs_mount        *mp,
-        struct xfs_buf          *bp)
-{
-        if (XFS_FORCED_SHUTDOWN(mp)) {
-                trace_xfs_bdstrat_shut(bp, _RET_IP_);
-                xfs_bioerror_relse(bp);
-                return;
-        }
-        xfs_buf_iorequest(bp);
-}
 STATIC void
 _xfs_buf_ioend(
        xfs_buf_t               *bp,
@@ -1516,6 +1501,12 @@ xfs_wait_buftarg(
                        struct xfs_buf *bp;
                        bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
                        list_del_init(&bp->b_lru);
+                        if (bp->b_flags & XBF_WRITE_FAIL) {
+                                xfs_alert(btp->bt_mount,
+"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
+"Please run xfs_repair to determine the extent of the problem.",
+                                        (long long)bp->b_bn);
+                        }
                        xfs_buf_rele(bp);
                }
                if (loop++ != 0)
@@ -1799,7 +1790,7 @@ __xfs_buf_delwri_submit(
        blk_start_plug(&plug);
        list_for_each_entry_safe(bp, n, io_list, b_list) {
-                bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
+                bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
                bp->b_flags |= XBF_WRITE;
                if (!wait) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e65683361017..1cf21a4a9f22 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -45,6 +45,7 @@ typedef enum {
 #define XBF_ASYNC        (1 << 4) /* initiator will not wait for completion */
 #define XBF_DONE         (1 << 5) /* all pages in the buffer uptodate */
 #define XBF_STALE        (1 << 6) /* buffer has been staled, do not find it */
+#define XBF_WRITE_FAIL   (1 << 24)/* async writes have failed on this buffer */
 /* I/O hints for the BIO layer */
 #define XBF_SYNCIO       (1 << 10)/* treat this buffer as synchronous I/O */
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_ASYNC,            "ASYNC" }, \
        { XBF_DONE,             "DONE" }, \
        { XBF_STALE,            "STALE" }, \
+        { XBF_WRITE_FAIL,       "WRITE_FAIL" }, \
        { XBF_SYNCIO,           "SYNCIO" }, \
        { XBF_FUA,              "FUA" }, \
        { XBF_FLUSH,            "FLUSH" }, \
@@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t;
        { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
        { _XBF_COMPOUND,        "COMPOUND" }
 /*
 * Internal state flags.
 */
@@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_buf *bp);
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
 extern void xfs_buf_ioend(xfs_buf_t *,  int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
@@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
 #define xfs_buf_zero(bp, off, len) \
            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+extern int xfs_bioerror_relse(struct xfs_buf *);
 static inline int xfs_buf_geterror(xfs_buf_t *bp)
 {
        return bp ? bp->b_error : ENOMEM;
@@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void);
 #define XFS_BUF_ZEROFLAGS(bp) \
        ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
-                            XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+                            XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
+                            XBF_WRITE_FAIL))
 void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNSTALE(bp)     ((bp)->b_flags &= ~XBF_STALE)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a64f67ba25d3..2227b9b050bb 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -496,6 +496,14 @@ xfs_buf_item_unpin(
        }
 }
+/*
+ * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
+ * seconds so as to not spam logs too much on repeated detection of the same
+ * buffer being bad..
+ */
+DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
 STATIC uint
 xfs_buf_item_push(
        struct xfs_log_item     *lip,
@@ -524,6 +532,14 @@ xfs_buf_item_push(
        trace_xfs_buf_item_push(bip);
+        /* has a previous flush failed due to IO errors? */
+        if ((bp->b_flags & XBF_WRITE_FAIL) &&
+            ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
+                xfs_warn(bp->b_target->bt_mount,
+"Detected failing async write on buffer block 0x%llx. Retrying async write.\n",
+                         (long long)bp->b_bn);
+        }
        if (!xfs_buf_delwri_queue(bp, buffer_list))
                rval = XFS_ITEM_FLUSHING;
        xfs_buf_unlock(bp);
@@ -1096,8 +1112,9 @@ xfs_buf_iodone_callbacks(
                xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
-                if (!XFS_BUF_ISSTALE(bp)) {
+                if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
-                        bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
+                        bp->b_flags |= XBF_WRITE | XBF_ASYNC |
+                                       XBF_DONE | XBF_WRITE_FAIL;
                        xfs_buf_iorequest(bp);
                } else {
                        xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56369d4509d5..48c7d18f68c3 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup(
 */
 int                                             /* error */
 xfs_dir2_node_removename(
-        xfs_da_args_t           *args)          /* operation arguments */
+        struct xfs_da_args      *args)          /* operation arguments */
 {
-        xfs_da_state_blk_t      *blk;           /* leaf block */
+        struct xfs_da_state_blk *blk;           /* leaf block */
        int                     error;          /* error return value */
        int                     rval;           /* operation return value */
-        xfs_da_state_t          *state;         /* btree cursor */
+        struct xfs_da_state     *state;         /* btree cursor */
        trace_xfs_dir2_node_removename(args);
@@ -2084,19 +2084,18 @@ xfs_dir2_node_removename(
        state->mp = args->dp->i_mount;
        state->blocksize = state->mp->m_dirblksize;
        state->node_ents = state->mp->m_dir_node_ents;
-        /*
-         * Look up the entry we're deleting, set up the cursor.
+        /* Look up the entry we're deleting, set up the cursor. */
-         */
        error = xfs_da3_node_lookup_int(state, &rval);
        if (error)
-                rval = error;
+                goto out_free;
-        /*
-         * Didn't find it, upper layer screwed up.
+        /* Didn't find it, upper layer screwed up. */
-         */
        if (rval != EEXIST) {
-                xfs_da_state_free(state);
+                error = rval;
-                return rval;
+                goto out_free;
        }
        blk = &state->path.blk[state->path.active - 1];
        ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
        ASSERT(state->extravalid);
@@ -2107,7 +2106,7 @@ xfs_dir2_node_removename(
        error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
                &state->extrablk, &rval);
        if (error)
-                return error;
+                goto out_free;
        /*
         * Fix the hash values up the btree.
         */
@@ -2122,6 +2121,7 @@ xfs_dir2_node_removename(
         */
        if (!error)
                error = xfs_dir2_node_to_leaf(state);
+out_free:
        xfs_da_state_free(state);
        return error;
 }
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 8367d6dc18c9..4f11ef011139 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -157,7 +157,7 @@ xfs_ioc_trim(
        struct xfs_mount                *mp,
        struct fstrim_range __user      *urange)
 {
-        struct request_queue    *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+        struct request_queue    *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev);
        unsigned int            granularity = q->limits.discard_granularity;
        struct fstrim_range     range;
        xfs_daddr_t             start, end, minlen;
@@ -180,7 +180,8 @@ xfs_ioc_trim(
         * matter as trimming blocks is an advisory interface.
         */
        if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
-            range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
+            range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
+            range.len < mp->m_sb.sb_blocksize)
                return -XFS_ERROR(EINVAL);
        start = BTOBB(range.start);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a6e54b3319bd..02fb943cbf22 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -220,6 +220,8 @@ xfs_growfs_data_private(
         */
        nfree = 0;
        for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
+                __be32  *agfl_bno;
                /*
                 * AG freespace header block
                 */
@@ -279,8 +281,10 @@ xfs_growfs_data_private(
                        agfl->agfl_seqno = cpu_to_be32(agno);
                        uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
                }
+                agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
                for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
-                        agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
+                        agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
                error = xfs_bwrite(bp);
                xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 4d613401a5e0..33ad9a77791f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -442,7 +442,8 @@ xfs_attrlist_by_handle(
                return -XFS_ERROR(EPERM);
        if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
                return -XFS_ERROR(EFAULT);
-        if (al_hreq.buflen > XATTR_LIST_MAX)
+        if (al_hreq.buflen < sizeof(struct attrlist) ||
+            al_hreq.buflen > XATTR_LIST_MAX)
                return -XFS_ERROR(EINVAL);
        /*
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index e8fb1231db81..a7992f8de9d3 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -356,7 +356,8 @@ xfs_compat_attrlist_by_handle(
        if (copy_from_user(&al_hreq, arg,
                           sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
                return -XFS_ERROR(EFAULT);
-        if (al_hreq.buflen > XATTR_LIST_MAX)
+        if (al_hreq.buflen < sizeof(struct attrlist) ||
+            al_hreq.buflen > XATTR_LIST_MAX)
                return -XFS_ERROR(EINVAL);
        /*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 27e0e544e963..104455b8046c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -618,7 +618,8 @@ xfs_setattr_nonsize(
                }
                if (!gid_eq(igid, gid)) {
                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-                                ASSERT(!XFS_IS_PQUOTA_ON(mp));
+                                ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
+                                       !XFS_IS_PQUOTA_ON(mp));
                                ASSERT(mask & ATTR_GID);
                                ASSERT(gdqp);
                                olddquot2 = xfs_qm_vop_chown(tp, ip,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b6b669df40f3..eae16920655b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,7 +193,10 @@ xlog_bread_noalign(
        bp->b_io_length = nbblks;
        bp->b_error = 0;
-        xfsbdstrat(log->l_mp, bp);
+        if (XFS_FORCED_SHUTDOWN(log->l_mp))
+                return XFS_ERROR(EIO);
+        xfs_buf_iorequest(bp);
        error = xfs_buf_iowait(bp);
        if (error)
                xfs_buf_ioerror_alert(bp, __func__);
@@ -4397,7 +4400,13 @@ xlog_do_recover(
        XFS_BUF_READ(bp);
        XFS_BUF_UNASYNC(bp);
        bp->b_ops = &xfs_sb_buf_ops;
-        xfsbdstrat(log->l_mp, bp);
+        if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
+                xfs_buf_relse(bp);
+                return XFS_ERROR(EIO);
+        }
+        xfs_buf_iorequest(bp);
        error = xfs_buf_iowait(bp);
        if (error) {
                xfs_buf_ioerror_alert(bp, __func__);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996cfec6..dd88f0e27bd8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,8 +134,6 @@ xfs_qm_dqpurge(
 {
        struct xfs_mount        *mp = dqp->q_mount;
        struct xfs_quotainfo    *qi = mp->m_quotainfo;
-        struct xfs_dquot        *gdqp = NULL;
-        struct xfs_dquot        *pdqp = NULL;
        xfs_dqlock(dqp);
        if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
@@ -143,21 +141,6 @@ xfs_qm_dqpurge(
                return EAGAIN;
        }
-        /*
-         * If this quota has a hint attached, prepare for releasing it now.
-         */
-        gdqp = dqp->q_gdquot;
-        if (gdqp) {
-                xfs_dqlock(gdqp);
-                dqp->q_gdquot = NULL;
-        }
-        pdqp = dqp->q_pdquot;
-        if (pdqp) {
-                xfs_dqlock(pdqp);
-                dqp->q_pdquot = NULL;
-        }
        dqp->dq_flags |= XFS_DQ_FREEING;
        xfs_dqflock(dqp);
@@ -206,11 +189,47 @@ xfs_qm_dqpurge(
        XFS_STATS_DEC(xs_qm_dquot_unused);
        xfs_qm_dqdestroy(dqp);
+        return 0;
+}
+/*
+ * Release the group or project dquot pointers the user dquots maybe carrying
+ * around as a hint, and proceed to purge the user dquot cache if requested.
+*/
+STATIC int
+xfs_qm_dqpurge_hints(
+        struct xfs_dquot        *dqp,
+        void                    *data)
+{
+        struct xfs_dquot        *gdqp = NULL;
+        struct xfs_dquot        *pdqp = NULL;
+        uint                    flags = *((uint *)data);
+        xfs_dqlock(dqp);
+        if (dqp->dq_flags & XFS_DQ_FREEING) {
+                xfs_dqunlock(dqp);
+                return EAGAIN;
+        }
+        /* If this quota has a hint attached, prepare for releasing it now */
+        gdqp = dqp->q_gdquot;
+        if (gdqp)
+                dqp->q_gdquot = NULL;
+        pdqp = dqp->q_pdquot;
+        if (pdqp)
+                dqp->q_pdquot = NULL;
+        xfs_dqunlock(dqp);
        if (gdqp)
-                xfs_qm_dqput(gdqp);
+                xfs_qm_dqrele(gdqp);
        if (pdqp)
-                xfs_qm_dqput(pdqp);
+                xfs_qm_dqrele(pdqp);
+        if (flags & XFS_QMOPT_UQUOTA)
+                return xfs_qm_dqpurge(dqp, NULL);
        return 0;
 }
@@ -222,8 +241,18 @@ xfs_qm_dqpurge_all(
        struct xfs_mount        *mp,
        uint                    flags)
 {
-        if (flags & XFS_QMOPT_UQUOTA)
+        /*
-                xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
+         * We have to release group/project dquot hint(s) from the user dquot
+         * at first if they are there, otherwise we would run into an infinite
+         * loop while walking through radix tree to purge other type of dquots
+         * since their refcount is not zero if the user dquot refers to them
+         * as hint.
+         *
+         * Call the special xfs_qm_dqpurge_hints() will end up go through the
+         * general xfs_qm_dqpurge() against user dquot cache if requested.
+         */
+        xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
        if (flags & XFS_QMOPT_GQUOTA)
                xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
        if (flags & XFS_QMOPT_PQUOTA)
@@ -2082,24 +2111,21 @@ xfs_qm_vop_create_dqattach(
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-        if (udqp) {
+        if (udqp && XFS_IS_UQUOTA_ON(mp)) {
                ASSERT(ip->i_udquot == NULL);
-                ASSERT(XFS_IS_UQUOTA_ON(mp));
                ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
                ip->i_udquot = xfs_qm_dqhold(udqp);
                xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
-        if (gdqp) {
+        if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
                ASSERT(ip->i_gdquot == NULL);
-                ASSERT(XFS_IS_GQUOTA_ON(mp));
                ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
                ip->i_gdquot = xfs_qm_dqhold(gdqp);
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
-        if (pdqp) {
+        if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
                ASSERT(ip->i_pdquot == NULL);
-                ASSERT(XFS_IS_PQUOTA_ON(mp));
                ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
                ip->i_pdquot = xfs_qm_dqhold(pdqp);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c035d11b7734..647b6f1d8923 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -314,7 +314,18 @@ xfs_trans_read_buf_map(
                        ASSERT(bp->b_iodone == NULL);
                        XFS_BUF_READ(bp);
                        bp->b_ops = ops;
-                        xfsbdstrat(tp->t_mountp, bp);
+                        /*
+                         * XXX(hch): clean up the error handling here to be less
+                         * of a mess..
+                         */
+                        if (XFS_FORCED_SHUTDOWN(mp)) {
+                                trace_xfs_bdstrat_shut(bp, _RET_IP_);
+                                xfs_bioerror_relse(bp);
+                        } else {
+                                xfs_buf_iorequest(bp);
+                        }
                        error = xfs_buf_iowait(bp);
                        if (error) {
                                xfs_buf_ioerror_alert(bp, __func__);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2013-12-24 12:43:21 -0500
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2013-12-24 12:43:21 -0500
commit	5bd2010fbe027b224db2e74a4fdfec9a7b7918d2 (patch)
tree	59106aae3930a3608409c101ec32d68742c8d168 /fs
parent	41f107266b19d100c1bcef9e1e1aef00692c1209 (diff)
parent	413541dd66d51f791a0b169d9b9014e4f56be13c (diff)