Merge tag 'omap-for-v3.13/intc-ldp-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap into fixes

From Tony Lindgren: Fix a regression for wrong interrupt numbers for some devices after the sparse IRQ conversion, fix DRA7 console output for earlyprintk, and fix the LDP LCD backlight when DSS is built into the kernel and not as a loadable module. * tag 'omap-for-v3.13/intc-ldp-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap: ARM: OMAP2+: Fix LCD panel backlight regression for LDP legacy booting ARM: OMAP2+: hwmod_data: fix missing OMAP_INTC_START in irq data ARM: DRA7: hwmod: Fix boot crash with DEBUG_LL + v3.13-rc5 Signed-off-by: Olof Johansson <olof@lixom.net>
author: Olof Johansson <olof@lixom.net> 2013-12-28 18:38:32 -0500
committer: Olof Johansson <olof@lixom.net> 2013-12-28 18:38:32 -0500
commit: 9b17c16525552b247cb2d9bb8eeadc87950b36ff (patch)
tree: 449426f2b37bb340e286fe0def9bf9cef29ec2a7 /fs
parent: 4cff6123536236fa84f826d9b93452903b90fe2e (diff)
parent: 82f4fe707836e80b0fcaae9a4a1756e6e89c5e62 (diff)
15 files changed, 308 insertions, 214 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6cb22e..062a5f6a1448 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
        int i;
        for (i = 0; i < ctx->nr_pages; i++) {
+                struct page *page;
                pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
                                page_count(ctx->ring_pages[i]));
-                put_page(ctx->ring_pages[i]);
+                page = ctx->ring_pages[i];
+                if (!page)
+                        continue;
+                ctx->ring_pages[i] = NULL;
+                put_page(page);
        }
        put_aio_ring_file(ctx);
@@ -280,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
        unsigned long flags;
        int rc;
+        rc = 0;
+        /* Make sure the old page hasn't already been changed */
+        spin_lock(&mapping->private_lock);
+        ctx = mapping->private_data;
+        if (ctx) {
+                pgoff_t idx;
+                spin_lock_irqsave(&ctx->completion_lock, flags);
+                idx = old->index;
+                if (idx < (pgoff_t)ctx->nr_pages) {
+                        if (ctx->ring_pages[idx] != old)
+                                rc = -EAGAIN;
+                } else
+                        rc = -EINVAL;
+                spin_unlock_irqrestore(&ctx->completion_lock, flags);
+        } else
+                rc = -EINVAL;
+        spin_unlock(&mapping->private_lock);
+        if (rc != 0)
+                return rc;
        /* Writeback must be complete */
        BUG_ON(PageWriteback(old));
-        put_page(old);
+        get_page(new);
-        rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+        rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
        if (rc != MIGRATEPAGE_SUCCESS) {
-                get_page(old);
+                put_page(new);
                return rc;
        }
-        get_page(new);
        /* We can potentially race against kioctx teardown here.  Use the
         * address_space's private data lock to protect the mapping's
         * private_data.
@@ -303,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
                spin_lock_irqsave(&ctx->completion_lock, flags);
                migrate_page_copy(new, old);
                idx = old->index;
-                if (idx < (pgoff_t)ctx->nr_pages)
+                if (idx < (pgoff_t)ctx->nr_pages) {
-                        ctx->ring_pages[idx] = new;
+                        /* And only do the move if things haven't changed */
+                        if (ctx->ring_pages[idx] == old)
+                                ctx->ring_pages[idx] = new;
+                        else
+                                rc = -EAGAIN;
+                } else
+                        rc = -EINVAL;
                spin_unlock_irqrestore(&ctx->completion_lock, flags);
        } else
                rc = -EBUSY;
        spin_unlock(&mapping->private_lock);
+        if (rc == MIGRATEPAGE_SUCCESS)
+                put_page(old);
+        else
+                put_page(new);
        return rc;
 }
 #endif
@@ -326,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
        struct aio_ring *ring;
        unsigned nr_events = ctx->max_reqs;
        struct mm_struct *mm = current->mm;
-        unsigned long size, populate;
+        unsigned long size, unused;
        int nr_pages;
        int i;
        struct file *file;
@@ -347,6 +383,20 @@ static int aio_setup_ring(struct kioctx *ctx)
                return -EAGAIN;
        }
+        ctx->aio_ring_file = file;
+        nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
+                        / sizeof(struct io_event);
+        ctx->ring_pages = ctx->internal_pages;
+        if (nr_pages > AIO_RING_PAGES) {
+                ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+                                          GFP_KERNEL);
+                if (!ctx->ring_pages) {
+                        put_aio_ring_file(ctx);
+                        return -ENOMEM;
+                }
+        }
        for (i = 0; i < nr_pages; i++) {
                struct page *page;
                page = find_or_create_page(file->f_inode->i_mapping,
@@ -358,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
                SetPageUptodate(page);
                SetPageDirty(page);
                unlock_page(page);
+                ctx->ring_pages[i] = page;
        }
-        ctx->aio_ring_file = file;
+        ctx->nr_pages = i;
-        nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
-                        / sizeof(struct io_event);
-        ctx->ring_pages = ctx->internal_pages;
+        if (unlikely(i != nr_pages)) {
-        if (nr_pages > AIO_RING_PAGES) {
+                aio_free_ring(ctx);
-                ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+                return -EAGAIN;
-                                          GFP_KERNEL);
-                if (!ctx->ring_pages) {
-                        put_aio_ring_file(ctx);
-                        return -ENOMEM;
-                }
        }
        ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -379,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
        down_write(&mm->mmap_sem);
        ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
                                       PROT_READ | PROT_WRITE,
-                                       MAP_SHARED | MAP_POPULATE, 0, &populate);
+                                       MAP_SHARED, 0, &unused);
+        up_write(&mm->mmap_sem);
        if (IS_ERR((void *)ctx->mmap_base)) {
-                up_write(&mm->mmap_sem);
                ctx->mmap_size = 0;
                aio_free_ring(ctx);
                return -EAGAIN;
@@ -389,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
        pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
-        /* We must do this while still holding mmap_sem for write, as we
-         * need to be protected against userspace attempting to mremap()
-         * or munmap() the ring buffer.
-         */
-        ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
-                                       1, 0, ctx->ring_pages, NULL);
-        /* Dropping the reference here is safe as the page cache will hold
-         * onto the pages for us.  It is also required so that page migration
-         * can unmap the pages and get the right reference count.
-         */
-        for (i = 0; i < ctx->nr_pages; i++)
-                put_page(ctx->ring_pages[i]);
-        up_write(&mm->mmap_sem);
-        if (unlikely(ctx->nr_pages != nr_pages)) {
-                aio_free_ring(ctx);
-                return -EAGAIN;
-        }
        ctx->user_id = ctx->mmap_base;
        ctx->nr_events = nr_events; /* trusted copy */
@@ -652,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        aio_nr += ctx->max_reqs;
        spin_unlock(&aio_nr_lock);
-        percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+        percpu_ref_get(&ctx->users);    /* io_setup() will drop this ref */
+        percpu_ref_get(&ctx->reqs);     /* free_ioctx_users() will drop this */
        err = ioctx_add_table(ctx, mm);
        if (err)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e561c059539..ec3ba43b9faa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -210,9 +210,13 @@ static int readpage_nounlock(struct file *filp, struct page *page)
        if (err < 0) {
                SetPageError(page);
                goto out;
-        } else if (err < PAGE_CACHE_SIZE) {
+        } else {
+                if (err < PAGE_CACHE_SIZE) {
                /* zero fill remainder of page */
-                zero_user_segment(page, err, PAGE_CACHE_SIZE);
+                        zero_user_segment(page, err, PAGE_CACHE_SIZE);
+                } else {
+                        flush_dcache_page(page);
+                }
        }
        SetPageUptodate(page);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9a8e396aed89..278fd2891288 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -978,7 +978,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
        struct ceph_mds_reply_inode *ininfo;
        struct ceph_vino vino;
        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-        int i = 0;
        int err = 0;
        dout("fill_trace %p is_dentry %d is_target %d\n", req,
@@ -1039,6 +1038,29 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                }
        }
+        if (rinfo->head->is_target) {
+                vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+                vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+                in = ceph_get_inode(sb, vino);
+                if (IS_ERR(in)) {
+                        err = PTR_ERR(in);
+                        goto done;
+                }
+                req->r_target_inode = in;
+                err = fill_inode(in, &rinfo->targeti, NULL,
+                                session, req->r_request_started,
+                                (le32_to_cpu(rinfo->head->result) == 0) ?
+                                req->r_fmode : -1,
+                                &req->r_caps_reservation);
+                if (err < 0) {
+                        pr_err("fill_inode badness %p %llx.%llx\n",
+                                in, ceph_vinop(in));
+                        goto done;
+                }
+        }
        /*
         * ignore null lease/binding on snapdir ENOENT, or else we
         * will have trouble splicing in the virtual snapdir later
@@ -1108,7 +1130,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                             ceph_dentry(req->r_old_dentry)->offset);
                        dn = req->r_old_dentry;  /* use old_dentry */
-                        in = dn->d_inode;
                }
                /* null dentry? */
@@ -1130,44 +1151,28 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                }
                /* attach proper inode */
-                ininfo = rinfo->targeti.in;
+                if (!dn->d_inode) {
-                vino.ino = le64_to_cpu(ininfo->ino);
+                        ihold(in);
-                vino.snap = le64_to_cpu(ininfo->snapid);
-                in = dn->d_inode;
-                if (!in) {
-                        in = ceph_get_inode(sb, vino);
-                        if (IS_ERR(in)) {
-                                pr_err("fill_trace bad get_inode "
-                                       "%llx.%llx\n", vino.ino, vino.snap);
-                                err = PTR_ERR(in);
-                                d_drop(dn);
-                                goto done;
-                        }
                        dn = splice_dentry(dn, in, &have_lease, true);
                        if (IS_ERR(dn)) {
                                err = PTR_ERR(dn);
                                goto done;
                        }
                        req->r_dentry = dn;  /* may have spliced */
-                        ihold(in);
+                } else if (dn->d_inode && dn->d_inode != in) {
-                } else if (ceph_ino(in) == vino.ino &&
-                           ceph_snap(in) == vino.snap) {
-                        ihold(in);
-                } else {
                        dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
-                             dn, in, ceph_ino(in), ceph_snap(in),
+                             dn, dn->d_inode, ceph_vinop(dn->d_inode),
-                             vino.ino, vino.snap);
+                             ceph_vinop(in));
                        have_lease = false;
-                        in = NULL;
                }
                if (have_lease)
                        update_dentry_lease(dn, rinfo->dlease, session,
                                            req->r_request_started);
                dout(" final dn %p\n", dn);
-                i++;
+        } else if (!req->r_aborted &&
-        } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+                   (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
-                   req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) {
+                    req->r_op == CEPH_MDS_OP_MKSNAP)) {
                struct dentry *dn = req->r_dentry;
                /* fill out a snapdir LOOKUPSNAP dentry */
@@ -1177,52 +1182,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                ininfo = rinfo->targeti.in;
                vino.ino = le64_to_cpu(ininfo->ino);
                vino.snap = le64_to_cpu(ininfo->snapid);
-                in = ceph_get_inode(sb, vino);
-                if (IS_ERR(in)) {
-                        pr_err("fill_inode get_inode badness %llx.%llx\n",
-                               vino.ino, vino.snap);
-                        err = PTR_ERR(in);
-                        d_delete(dn);
-                        goto done;
-                }
                dout(" linking snapped dir %p to dn %p\n", in, dn);
+                ihold(in);
                dn = splice_dentry(dn, in, NULL, true);
                if (IS_ERR(dn)) {
                        err = PTR_ERR(dn);
                        goto done;
                }
                req->r_dentry = dn;  /* may have spliced */
-                ihold(in);
-                rinfo->head->is_dentry = 1;  /* fool notrace handlers */
-        }
-        if (rinfo->head->is_target) {
-                vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-                vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
-                if (in == NULL || ceph_ino(in) != vino.ino ||
-                    ceph_snap(in) != vino.snap) {
-                        in = ceph_get_inode(sb, vino);
-                        if (IS_ERR(in)) {
-                                err = PTR_ERR(in);
-                                goto done;
-                        }
-                }
-                req->r_target_inode = in;
-                err = fill_inode(in,
-                                 &rinfo->targeti, NULL,
-                                 session, req->r_request_started,
-                                 (le32_to_cpu(rinfo->head->result) == 0) ?
-                                 req->r_fmode : -1,
-                                 &req->r_caps_reservation);
-                if (err < 0) {
-                        pr_err("fill_inode badness %p %llx.%llx\n",
-                               in, ceph_vinop(in));
-                        goto done;
-                }
        }
 done:
        dout("fill_trace done err=%d\n", err);
        return err;
@@ -1272,7 +1240,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
        struct qstr dname;
        struct dentry *dn;
        struct inode *in;
-        int err = 0, i;
+        int err = 0, ret, i;
        struct inode *snapdir = NULL;
        struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
        struct ceph_dentry_info *di;
@@ -1305,6 +1273,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                        ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
        }
+        /* FIXME: release caps/leases if error occurs */
        for (i = 0; i < rinfo->dir_nr; i++) {
                struct ceph_vino vino;
@@ -1329,9 +1298,10 @@ retry_lookup:
                                err = -ENOMEM;
                                goto out;
                        }
-                        err = ceph_init_dentry(dn);
+                        ret = ceph_init_dentry(dn);
-                        if (err < 0) {
+                        if (ret < 0) {
                                dput(dn);
+                                err = ret;
                                goto out;
                        }
                } else if (dn->d_inode &&
@@ -1351,9 +1321,6 @@ retry_lookup:
                        spin_unlock(&parent->d_lock);
                }
-                di = dn->d_fsdata;
-                di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
                /* inode */
                if (dn->d_inode) {
                        in = dn->d_inode;
@@ -1366,26 +1333,39 @@ retry_lookup:
                                err = PTR_ERR(in);
                                goto out;
                        }
-                        dn = splice_dentry(dn, in, NULL, false);
-                        if (IS_ERR(dn))
-                                dn = NULL;
                }
                if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
                               req->r_request_started, -1,
                               &req->r_caps_reservation) < 0) {
                        pr_err("fill_inode badness on %p\n", in);
+                        if (!dn->d_inode)
+                                iput(in);
+                        d_drop(dn);
                        goto next_item;
                }
-                if (dn)
-                        update_dentry_lease(dn, rinfo->dir_dlease[i],
+                if (!dn->d_inode) {
-                                            req->r_session,
+                        dn = splice_dentry(dn, in, NULL, false);
-                                            req->r_request_started);
+                        if (IS_ERR(dn)) {
+                                err = PTR_ERR(dn);
+                                dn = NULL;
+                                goto next_item;
+                        }
+                }
+                di = dn->d_fsdata;
+                di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
+                update_dentry_lease(dn, rinfo->dir_dlease[i],
+                                    req->r_session,
+                                    req->r_request_started);
 next_item:
                if (dn)
                        dput(dn);
        }
-        req->r_did_prepopulate = true;
+        if (err == 0)
+                req->r_did_prepopulate = true;
 out:
        if (snapdir) {
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b8e93a40a5d3..78c3c2097787 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -443,8 +443,11 @@ int pstore_register(struct pstore_info *psi)
                pstore_get_records(0);
        kmsg_dump_register(&pstore_dumper);
-        pstore_register_console();
-        pstore_register_ftrace();
+        if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) {
+                pstore_register_console();
+                pstore_register_ftrace();
+        }
        if (pstore_update_ms >= 0) {
                pstore_timer.expires = jiffies +
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b94f93685093..35e7d08fe629 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -609,7 +609,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
        struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
        struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
        struct sysfs_open_file *of;
-        bool has_read, has_write, has_mmap;
+        bool has_read, has_write;
        int error = -EACCES;
        /* need attr_sd for attr and ops, its parent for kobj */
@@ -621,7 +621,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
                has_read = battr->read || battr->mmap;
                has_write = battr->write || battr->mmap;
-                has_mmap = battr->mmap;
        } else {
                const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
@@ -633,7 +632,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
                has_read = ops->show;
                has_write = ops->store;
-                has_mmap = false;
        }
        /* check perms and supported operations */
@@ -661,9 +659,9 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
         * open file has a separate mutex, it's okay as long as those don't
         * happen on the same file.  At this point, we can't easily give
         * each file a separate locking class.  Let's differentiate on
-         * whether the file has mmap or not for now.
+         * whether the file is bin or not for now.
         */
-        if (has_mmap)
+        if (sysfs_is_bin(attr_sd))
                mutex_init(&of->mutex);
        else
                mutex_init(&of->mutex);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3ef11b22e750..3b2c14b6f0fb 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1635,7 +1635,7 @@ xfs_bmap_last_extent(
 * blocks at the end of the file which do not start at the previous data block,
 * we will try to align the new blocks at stripe unit boundaries.
 *
- * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
+ * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
 * at, or past the EOF.
 */
 STATIC int
@@ -1650,9 +1650,14 @@ xfs_bmap_isaeof(
        bma->aeof = 0;
        error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
                                     &is_empty);
-        if (error || is_empty)
+        if (error)
                return error;
+        if (is_empty) {
+                bma->aeof = 1;
+                return 0;
+        }
        /*
         * Check if we are allocation or past the last extent, or at least into
         * the last delayed allocated extent.
@@ -3643,10 +3648,19 @@ xfs_bmap_btalloc(
        int             isaligned;
        int             tryagain;
        int             error;
+        int             stripe_align;
        ASSERT(ap->length);
        mp = ap->ip->i_mount;
+        /* stripe alignment for allocation is determined by mount parameters */
+        stripe_align = 0;
+        if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
+                stripe_align = mp->m_swidth;
+        else if (mp->m_dalign)
+                stripe_align = mp->m_dalign;
        align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
        if (unlikely(align)) {
                error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3655,6 +3669,8 @@ xfs_bmap_btalloc(
                ASSERT(!error);
                ASSERT(ap->length);
        }
        nullfb = *ap->firstblock == NULLFSBLOCK;
        fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
        if (nullfb) {
@@ -3730,7 +3746,7 @@ xfs_bmap_btalloc(
         */
        if (!ap->flist->xbf_low && ap->aeof) {
                if (!ap->offset) {
-                        args.alignment = mp->m_dalign;
+                        args.alignment = stripe_align;
                        atype = args.type;
                        isaligned = 1;
                        /*
@@ -3755,13 +3771,13 @@ xfs_bmap_btalloc(
                         * of minlen+alignment+slop doesn't go up
                         * between the calls.
                         */
-                        if (blen > mp->m_dalign && blen <= args.maxlen)
+                        if (blen > stripe_align && blen <= args.maxlen)
-                                nextminlen = blen - mp->m_dalign;
+                                nextminlen = blen - stripe_align;
                        else
                                nextminlen = args.minlen;
-                        if (nextminlen + mp->m_dalign > args.minlen + 1)
+                        if (nextminlen + stripe_align > args.minlen + 1)
                                args.minalignslop =
-                                        nextminlen + mp->m_dalign -
+                                        nextminlen + stripe_align -
                                        args.minlen - 1;
                        else
                                args.minalignslop = 0;
@@ -3783,7 +3799,7 @@ xfs_bmap_btalloc(
                 */
                args.type = atype;
                args.fsbno = ap->blkno;
-                args.alignment = mp->m_dalign;
+                args.alignment = stripe_align;
                args.minlen = nextminlen;
                args.minalignslop = 0;
                isaligned = 1;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5887e41c0323..1394106ed22d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1187,7 +1187,12 @@ xfs_zero_remaining_bytes(
                XFS_BUF_UNWRITE(bp);
                XFS_BUF_READ(bp);
                XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
-                xfsbdstrat(mp, bp);
+                if (XFS_FORCED_SHUTDOWN(mp)) {
+                        error = XFS_ERROR(EIO);
+                        break;
+                }
+                xfs_buf_iorequest(bp);
                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_buf_ioerror_alert(bp,
@@ -1200,7 +1205,12 @@ xfs_zero_remaining_bytes(
                XFS_BUF_UNDONE(bp);
                XFS_BUF_UNREAD(bp);
                XFS_BUF_WRITE(bp);
-                xfsbdstrat(mp, bp);
+                if (XFS_FORCED_SHUTDOWN(mp)) {
+                        error = XFS_ERROR(EIO);
+                        break;
+                }
+                xfs_buf_iorequest(bp);
                error = xfs_buf_iowait(bp);
                if (error) {
                        xfs_buf_ioerror_alert(bp,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index c7f0b77dcb00..afe7645e4b2b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -698,7 +698,11 @@ xfs_buf_read_uncached(
        bp->b_flags |= XBF_READ;
        bp->b_ops = ops;
-        xfsbdstrat(target->bt_mount, bp);
+        if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
+                xfs_buf_relse(bp);
+                return NULL;
+        }
+        xfs_buf_iorequest(bp);
        xfs_buf_iowait(bp);
        return bp;
 }
@@ -1089,7 +1093,7 @@ xfs_bioerror(
 * This is meant for userdata errors; metadata bufs come with
 * iodone functions attached, so that we can track down errors.
 */
-STATIC int
+int
 xfs_bioerror_relse(
        struct xfs_buf  *bp)
 {
@@ -1152,7 +1156,7 @@ xfs_bwrite(
        ASSERT(xfs_buf_islocked(bp));
        bp->b_flags |= XBF_WRITE;
-        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
        xfs_bdstrat_cb(bp);
@@ -1164,25 +1168,6 @@ xfs_bwrite(
        return error;
 }
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem.  Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
-        struct xfs_mount        *mp,
-        struct xfs_buf          *bp)
-{
-        if (XFS_FORCED_SHUTDOWN(mp)) {
-                trace_xfs_bdstrat_shut(bp, _RET_IP_);
-                xfs_bioerror_relse(bp);
-                return;
-        }
-        xfs_buf_iorequest(bp);
-}
 STATIC void
 _xfs_buf_ioend(
        xfs_buf_t               *bp,
@@ -1516,6 +1501,12 @@ xfs_wait_buftarg(
                        struct xfs_buf *bp;
                        bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
                        list_del_init(&bp->b_lru);
+                        if (bp->b_flags & XBF_WRITE_FAIL) {
+                                xfs_alert(btp->bt_mount,
+"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
+"Please run xfs_repair to determine the extent of the problem.",
+                                        (long long)bp->b_bn);
+                        }
                        xfs_buf_rele(bp);
                }
                if (loop++ != 0)
@@ -1799,7 +1790,7 @@ __xfs_buf_delwri_submit(
        blk_start_plug(&plug);
        list_for_each_entry_safe(bp, n, io_list, b_list) {
-                bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
+                bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
                bp->b_flags |= XBF_WRITE;
                if (!wait) {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index e65683361017..1cf21a4a9f22 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -45,6 +45,7 @@ typedef enum {
 #define XBF_ASYNC        (1 << 4) /* initiator will not wait for completion */
 #define XBF_DONE         (1 << 5) /* all pages in the buffer uptodate */
 #define XBF_STALE        (1 << 6) /* buffer has been staled, do not find it */
+#define XBF_WRITE_FAIL   (1 << 24)/* async writes have failed on this buffer */
 /* I/O hints for the BIO layer */
 #define XBF_SYNCIO       (1 << 10)/* treat this buffer as synchronous I/O */
@@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_ASYNC,            "ASYNC" }, \
        { XBF_DONE,             "DONE" }, \
        { XBF_STALE,            "STALE" }, \
+        { XBF_WRITE_FAIL,       "WRITE_FAIL" }, \
        { XBF_SYNCIO,           "SYNCIO" }, \
        { XBF_FUA,              "FUA" }, \
        { XBF_FLUSH,            "FLUSH" }, \
@@ -80,6 +82,7 @@ typedef unsigned int xfs_buf_flags_t;
        { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
        { _XBF_COMPOUND,        "COMPOUND" }
 /*
 * Internal state flags.
 */
@@ -269,9 +272,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_buf *bp);
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
 extern void xfs_buf_ioend(xfs_buf_t *,  int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
 extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
@@ -282,6 +282,8 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
 #define xfs_buf_zero(bp, off, len) \
            xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+extern int xfs_bioerror_relse(struct xfs_buf *);
 static inline int xfs_buf_geterror(xfs_buf_t *bp)
 {
        return bp ? bp->b_error : ENOMEM;
@@ -301,7 +303,8 @@ extern void xfs_buf_terminate(void);
 #define XFS_BUF_ZEROFLAGS(bp) \
        ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
-                            XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+                            XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
+                            XBF_WRITE_FAIL))
 void xfs_buf_stale(struct xfs_buf *bp);
 #define XFS_BUF_UNSTALE(bp)     ((bp)->b_flags &= ~XBF_STALE)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a64f67ba25d3..2227b9b050bb 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -496,6 +496,14 @@ xfs_buf_item_unpin(
        }
 }
+/*
+ * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
+ * seconds so as to not spam logs too much on repeated detection of the same
+ * buffer being bad..
+ */
+DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
 STATIC uint
 xfs_buf_item_push(
        struct xfs_log_item     *lip,
@@ -524,6 +532,14 @@ xfs_buf_item_push(
        trace_xfs_buf_item_push(bip);
+        /* has a previous flush failed due to IO errors? */
+        if ((bp->b_flags & XBF_WRITE_FAIL) &&
+            ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
+                xfs_warn(bp->b_target->bt_mount,
+"Detected failing async write on buffer block 0x%llx. Retrying async write.\n",
+                         (long long)bp->b_bn);
+        }
        if (!xfs_buf_delwri_queue(bp, buffer_list))
                rval = XFS_ITEM_FLUSHING;
        xfs_buf_unlock(bp);
@@ -1096,8 +1112,9 @@ xfs_buf_iodone_callbacks(
                xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
-                if (!XFS_BUF_ISSTALE(bp)) {
+                if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
-                        bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
+                        bp->b_flags |= XBF_WRITE | XBF_ASYNC |
+                                       XBF_DONE | XBF_WRITE_FAIL;
                        xfs_buf_iorequest(bp);
                } else {
                        xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 56369d4509d5..48c7d18f68c3 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup(
 */
 int                                             /* error */
 xfs_dir2_node_removename(
-        xfs_da_args_t           *args)          /* operation arguments */
+        struct xfs_da_args      *args)          /* operation arguments */
 {
-        xfs_da_state_blk_t      *blk;           /* leaf block */
+        struct xfs_da_state_blk *blk;           /* leaf block */
        int                     error;          /* error return value */
        int                     rval;           /* operation return value */
-        xfs_da_state_t          *state;         /* btree cursor */
+        struct xfs_da_state     *state;         /* btree cursor */
        trace_xfs_dir2_node_removename(args);
@@ -2084,19 +2084,18 @@ xfs_dir2_node_removename(
        state->mp = args->dp->i_mount;
        state->blocksize = state->mp->m_dirblksize;
        state->node_ents = state->mp->m_dir_node_ents;
-        /*
-         * Look up the entry we're deleting, set up the cursor.
+        /* Look up the entry we're deleting, set up the cursor. */
-         */
        error = xfs_da3_node_lookup_int(state, &rval);
        if (error)
-                rval = error;
+                goto out_free;
-        /*
-         * Didn't find it, upper layer screwed up.
+        /* Didn't find it, upper layer screwed up. */
-         */
        if (rval != EEXIST) {
-                xfs_da_state_free(state);
+                error = rval;
-                return rval;
+                goto out_free;
        }
        blk = &state->path.blk[state->path.active - 1];
        ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
        ASSERT(state->extravalid);
@@ -2107,7 +2106,7 @@ xfs_dir2_node_removename(
        error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
                &state->extrablk, &rval);
        if (error)
-                return error;
+                goto out_free;
        /*
         * Fix the hash values up the btree.
         */
@@ -2122,6 +2121,7 @@ xfs_dir2_node_removename(
         */
        if (!error)
                error = xfs_dir2_node_to_leaf(state);
+out_free:
        xfs_da_state_free(state);
        return error;
 }
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 27e0e544e963..104455b8046c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -618,7 +618,8 @@ xfs_setattr_nonsize(
                }
                if (!gid_eq(igid, gid)) {
                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-                                ASSERT(!XFS_IS_PQUOTA_ON(mp));
+                                ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
+                                       !XFS_IS_PQUOTA_ON(mp));
                                ASSERT(mask & ATTR_GID);
                                ASSERT(gdqp);
                                olddquot2 = xfs_qm_vop_chown(tp, ip,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b6b669df40f3..eae16920655b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -193,7 +193,10 @@ xlog_bread_noalign(
        bp->b_io_length = nbblks;
        bp->b_error = 0;
-        xfsbdstrat(log->l_mp, bp);
+        if (XFS_FORCED_SHUTDOWN(log->l_mp))
+                return XFS_ERROR(EIO);
+        xfs_buf_iorequest(bp);
        error = xfs_buf_iowait(bp);
        if (error)
                xfs_buf_ioerror_alert(bp, __func__);
@@ -4397,7 +4400,13 @@ xlog_do_recover(
        XFS_BUF_READ(bp);
        XFS_BUF_UNASYNC(bp);
        bp->b_ops = &xfs_sb_buf_ops;
-        xfsbdstrat(log->l_mp, bp);
+        if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
+                xfs_buf_relse(bp);
+                return XFS_ERROR(EIO);
+        }
+        xfs_buf_iorequest(bp);
        error = xfs_buf_iowait(bp);
        if (error) {
                xfs_buf_ioerror_alert(bp, __func__);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 14a4996cfec6..dd88f0e27bd8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -134,8 +134,6 @@ xfs_qm_dqpurge(
 {
        struct xfs_mount        *mp = dqp->q_mount;
        struct xfs_quotainfo    *qi = mp->m_quotainfo;
-        struct xfs_dquot        *gdqp = NULL;
-        struct xfs_dquot        *pdqp = NULL;
        xfs_dqlock(dqp);
        if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
@@ -143,21 +141,6 @@ xfs_qm_dqpurge(
                return EAGAIN;
        }
-        /*
-         * If this quota has a hint attached, prepare for releasing it now.
-         */
-        gdqp = dqp->q_gdquot;
-        if (gdqp) {
-                xfs_dqlock(gdqp);
-                dqp->q_gdquot = NULL;
-        }
-        pdqp = dqp->q_pdquot;
-        if (pdqp) {
-                xfs_dqlock(pdqp);
-                dqp->q_pdquot = NULL;
-        }
        dqp->dq_flags |= XFS_DQ_FREEING;
        xfs_dqflock(dqp);
@@ -206,11 +189,47 @@ xfs_qm_dqpurge(
        XFS_STATS_DEC(xs_qm_dquot_unused);
        xfs_qm_dqdestroy(dqp);
+        return 0;
+}
+/*
+ * Release the group or project dquot pointers the user dquots maybe carrying
+ * around as a hint, and proceed to purge the user dquot cache if requested.
+*/
+STATIC int
+xfs_qm_dqpurge_hints(
+        struct xfs_dquot        *dqp,
+        void                    *data)
+{
+        struct xfs_dquot        *gdqp = NULL;
+        struct xfs_dquot        *pdqp = NULL;
+        uint                    flags = *((uint *)data);
+        xfs_dqlock(dqp);
+        if (dqp->dq_flags & XFS_DQ_FREEING) {
+                xfs_dqunlock(dqp);
+                return EAGAIN;
+        }
+        /* If this quota has a hint attached, prepare for releasing it now */
+        gdqp = dqp->q_gdquot;
+        if (gdqp)
+                dqp->q_gdquot = NULL;
+        pdqp = dqp->q_pdquot;
+        if (pdqp)
+                dqp->q_pdquot = NULL;
+        xfs_dqunlock(dqp);
        if (gdqp)
-                xfs_qm_dqput(gdqp);
+                xfs_qm_dqrele(gdqp);
        if (pdqp)
-                xfs_qm_dqput(pdqp);
+                xfs_qm_dqrele(pdqp);
+        if (flags & XFS_QMOPT_UQUOTA)
+                return xfs_qm_dqpurge(dqp, NULL);
        return 0;
 }
@@ -222,8 +241,18 @@ xfs_qm_dqpurge_all(
        struct xfs_mount        *mp,
        uint                    flags)
 {
-        if (flags & XFS_QMOPT_UQUOTA)
+        /*
-                xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
+         * We have to release group/project dquot hint(s) from the user dquot
+         * at first if they are there, otherwise we would run into an infinite
+         * loop while walking through radix tree to purge other type of dquots
+         * since their refcount is not zero if the user dquot refers to them
+         * as hint.
+         *
+         * Call the special xfs_qm_dqpurge_hints() will end up go through the
+         * general xfs_qm_dqpurge() against user dquot cache if requested.
+         */
+        xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags);
        if (flags & XFS_QMOPT_GQUOTA)
                xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
        if (flags & XFS_QMOPT_PQUOTA)
@@ -2082,24 +2111,21 @@ xfs_qm_vop_create_dqattach(
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-        if (udqp) {
+        if (udqp && XFS_IS_UQUOTA_ON(mp)) {
                ASSERT(ip->i_udquot == NULL);
-                ASSERT(XFS_IS_UQUOTA_ON(mp));
                ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
                ip->i_udquot = xfs_qm_dqhold(udqp);
                xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
-        if (gdqp) {
+        if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
                ASSERT(ip->i_gdquot == NULL);
-                ASSERT(XFS_IS_GQUOTA_ON(mp));
                ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
                ip->i_gdquot = xfs_qm_dqhold(gdqp);
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
-        if (pdqp) {
+        if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
                ASSERT(ip->i_pdquot == NULL);
-                ASSERT(XFS_IS_PQUOTA_ON(mp));
                ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
                ip->i_pdquot = xfs_qm_dqhold(pdqp);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c035d11b7734..647b6f1d8923 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -314,7 +314,18 @@ xfs_trans_read_buf_map(
                        ASSERT(bp->b_iodone == NULL);
                        XFS_BUF_READ(bp);
                        bp->b_ops = ops;
-                        xfsbdstrat(tp->t_mountp, bp);
+                        /*
+                         * XXX(hch): clean up the error handling here to be less
+                         * of a mess..
+                         */
+                        if (XFS_FORCED_SHUTDOWN(mp)) {
+                                trace_xfs_bdstrat_shut(bp, _RET_IP_);
+                                xfs_bioerror_relse(bp);
+                        } else {
+                                xfs_buf_iorequest(bp);
+                        }
                        error = xfs_buf_iowait(bp);
                        if (error) {
                                xfs_buf_ioerror_alert(bp, __func__);
author	Olof Johansson <olof@lixom.net>	2013-12-28 18:38:32 -0500
committer	Olof Johansson <olof@lixom.net>	2013-12-28 18:38:32 -0500
commit	9b17c16525552b247cb2d9bb8eeadc87950b36ff (patch)
tree	449426f2b37bb340e286fe0def9bf9cef29ec2a7 /fs
parent	4cff6123536236fa84f826d9b93452903b90fe2e (diff)
parent	82f4fe707836e80b0fcaae9a4a1756e6e89c5e62 (diff)