Merge branch 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux into drm-next

Alex writes: Pretty minor -next pull request. We some additional new bits waiting internally for release. Hopefully Monday we can get at least some of them out. The others will probably take a few more weeks. Highlights of the current request: - ELD registers for passing audio information to the sound hardware - Handle GPUVM page faults more gracefully - Misc fixes Merge radeon test * 'drm-next-3.8' of git://people.freedesktop.org/~agd5f/linux: (483 commits) drm/radeon: bump driver version for new info ioctl requests drm/radeon: fix eDP clk and lane setup for scaled modes drm/radeon: add new INFO ioctl requests drm/radeon/dce32+: use fractional fb dividers for high clocks drm/radeon: use cached memory when evicting for vram on non agp drm/radeon: add a CS flag END_OF_FRAME drm/radeon: stop page faults from hanging the system (v2) drm/radeon/dce4/5: add registers for ELD handling drm/radeon/dce3.2: add registers for ELD handling radeon: fix pll/ctrc mapping on dce2 and dce3 hardware Linux 3.7-rc7 powerpc/eeh: Do not invalidate PE properly Revert "drm/i915: enable rc6 on ilk again" ALSA: hda - Fix build without CONFIG_PM of/address: sparc: Declare of_iomap as an extern function for sparc again PM / QoS: fix wrong error-checking condition bnx2x: remove redundant warning log vxlan: fix command usage in its doc 8139cp: revert "set ring address before enabling receiver" MPI: Fix compilation on MIPS with GCC 4.4 and newer ... Conflicts: drivers/gpu/drm/exynos/exynos_drm_encoder.c drivers/gpu/drm/exynos/exynos_drm_fbdev.c drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
author: Dave Airlie <airlied@redhat.com> 2012-12-07 22:17:07 -0500
committer: Dave Airlie <airlied@redhat.com> 2012-12-10 05:03:58 -0500
commit: 1a1494def7eacbd25db05185aa2e81ef90892460 (patch)
tree: 40911f075b1fe527c6d20bf8c3070d4cdca11e97 /fs
parent: 8de9e417757fb9f130f55a38f4ee7027b60de1c7 (diff)
parent: 71bfe916ebe6d026cd3d0e41c398574fc1228e03 (diff)
38 files changed, 486 insertions, 227 deletions
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fc783e26442..0fb15bbbe43 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
 }
 static void
+cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
+{
+        memcpy(dst, src, sizeof(*dst));
+        dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
+}
+static void
 id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
                struct cifs_sid_id **psidid, char *typestr)
 {
@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
                }
        }
-        memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+        cifs_copy_sid(&(*psidid)->sid, sidptr);
        (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
        (*psidid)->refcount = 0;
@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
         * any fields of the node after a reference is put .
         */
        if (test_bit(SID_ID_MAPPED, &psidid->state)) {
-                memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+                cifs_copy_sid(ssid, &psidid->sid);
                psidid->time = jiffies; /* update ts for accessing */
                goto id_sid_out;
        }
@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
                if (IS_ERR(sidkey)) {
                        rc = -EINVAL;
                        cFYI(1, "%s: Can't map and id to a SID", __func__);
+                } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
+                        rc = -EIO;
+                        cFYI(1, "%s: Downcall contained malformed key "
+                                "(datalen=%hu)", __func__, sidkey->datalen);
                } else {
                        lsid = (struct cifs_sid *)sidkey->payload.data;
-                        memcpy(&psidid->sid, lsid,
+                        cifs_copy_sid(&psidid->sid, lsid);
-                                sidkey->datalen < sizeof(struct cifs_sid) ?
+                        cifs_copy_sid(ssid, &psidid->sid);
-                                sidkey->datalen : sizeof(struct cifs_sid));
-                        memcpy(ssid, &psidid->sid,
-                                sidkey->datalen < sizeof(struct cifs_sid) ?
-                                sidkey->datalen : sizeof(struct cifs_sid));
                        set_bit(SID_ID_MAPPED, &psidid->state);
                        key_put(sidkey);
                        kfree(psidid->sidstr);
@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
                        return rc;
                }
                if (test_bit(SID_ID_MAPPED, &psidid->state))
-                        memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+                        cifs_copy_sid(ssid, &psidid->sid);
                else
                        rc = -EINVAL;
        }
@@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
 static void copy_sec_desc(const struct cifs_ntsd *pntsd,
                                struct cifs_ntsd *pnntsd, __u32 sidsoffset)
 {
-        int i;
        struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
        struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
        owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->osidoffset));
        nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
+        cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
-        nowner_sid_ptr->revision = owner_sid_ptr->revision;
-        nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
-        for (i = 0; i < 6; i++)
-                nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
-        for (i = 0; i < 5; i++)
-                nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
        /* copy group sid */
        group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->gsidoffset));
        ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
                                        sizeof(struct cifs_sid));
+        cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
-        ngroup_sid_ptr->revision = group_sid_ptr->revision;
-        ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
-        for (i = 0; i < 6; i++)
-                ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
-        for (i = 0; i < 5; i++)
-                ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
        return;
 }
@@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                                kfree(nowner_sid_ptr);
                                return rc;
                        }
-                        memcpy(owner_sid_ptr, nowner_sid_ptr,
+                        cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
-                                        sizeof(struct cifs_sid));
                        kfree(nowner_sid_ptr);
                        *aclflag = CIFS_ACL_OWNER;
                }
@@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                                kfree(ngroup_sid_ptr);
                                return rc;
                        }
-                        memcpy(group_sid_ptr, ngroup_sid_ptr,
+                        cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
-                                        sizeof(struct cifs_sid));
                        kfree(ngroup_sid_ptr);
                        *aclflag = CIFS_ACL_GROUP;
                }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7c0a8128364..d3671f2acb2 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -398,7 +398,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
         * in network traffic in the other paths.
         */
        if (!(oflags & O_CREAT)) {
-                struct dentry *res = cifs_lookup(inode, direntry, 0);
+                struct dentry *res;
+                /*
+                 * Check for hashed negative dentry. We have already revalidated
+                 * the dentry and it is fine. No need to perform another lookup.
+                 */
+                if (!d_unhashed(direntry))
+                        return -ENOENT;
+                res = cifs_lookup(inode, direntry, 0);
                if (IS_ERR(res))
                        return PTR_ERR(res);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index da72250ddc1..cd96649bfe6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
 /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
 static inline int ep_op_has_event(int op)
 {
-        return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD;
+        return op != EPOLL_CTL_DEL;
 }
 /* Initialize the poll safe wake up structure */
@@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
        return 0;
 }
-/*
- * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
- * had no event flags set, indicating that another thread may be currently
- * handling that item's events (in the case that EPOLLONESHOT was being
- * used). Otherwise a zero result indicates that the item has been disabled
- * from receiving events. A disabled item may be re-enabled via
- * EPOLL_CTL_MOD. Must be called with "mtx" held.
- */
-static int ep_disable(struct eventpoll *ep, struct epitem *epi)
-{
-        int result = 0;
-        unsigned long flags;
-        spin_lock_irqsave(&ep->lock, flags);
-        if (epi->event.events & ~EP_PRIVATE_BITS) {
-                if (ep_is_linked(&epi->rdllink))
-                        list_del_init(&epi->rdllink);
-                /* Ensure ep_poll_callback will not add epi back onto ready
-                   list: */
-                epi->event.events &= EP_PRIVATE_BITS;
-                }
-        else
-                result = -EBUSY;
-        spin_unlock_irqrestore(&ep->lock, flags);
-        return result;
-}
 static void ep_free(struct eventpoll *ep)
 {
        struct rb_node *rbp;
@@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
        rb_insert_color(&epi->rbn, &ep->rbr);
 }
 #define PATH_ARR_SIZE 5
 /*
 * These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                } else
                        error = -ENOENT;
                break;
-        case EPOLL_CTL_DISABLE:
-                if (epi)
-                        error = ep_disable(ep, epi);
-                else
-                        error = -ENOENT;
-                break;
        }
        mutex_unlock(&ep->mtx);
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7320a66e958..22548f56197 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -2101,8 +2101,9 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
        end = start + (range->len >> sb->s_blocksize_bits) - 1;
        minlen = range->minlen >> sb->s_blocksize_bits;
-        if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) ||
+        if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
-            unlikely(start >= max_blks))
+            start >= max_blks ||
+            range->len < sb->s_blocksize)
                return -EINVAL;
        if (end >= max_blks)
                end = max_blks - 1;
diff --git a/fs/file.c b/fs/file.c
index 708d997a774..7cb71b99260 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -685,7 +685,6 @@ void do_close_on_exec(struct files_struct *files)
        struct fdtable *fdt;
        /* exec unshares first */
-        BUG_ON(atomic_read(&files->count) != 1);
        spin_lock(&files->file_lock);
        for (i = 0; ; i++) {
                unsigned long set;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 0def0504afc..e056b4ce487 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
                struct gfs2_holder i_gh;
                int error;
-                gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
-                error = gfs2_glock_nq(&i_gh);
+                                           &i_gh);
-                if (error == 0) {
-                        file_accessed(file);
-                        gfs2_glock_dq(&i_gh);
-                }
-                gfs2_holder_uninit(&i_gh);
                if (error)
                        return error;
+                /* grab lock to update inode */
+                gfs2_glock_dq_uninit(&i_gh);
+                file_accessed(file);
        }
        vma->vm_ops = &gfs2_vm_ops;
@@ -677,10 +675,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        size_t writesize = iov_length(iov, nr_segs);
        struct dentry *dentry = file->f_dentry;
        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-        struct gfs2_sbd *sdp;
        int ret;
-        sdp = GFS2_SB(file->f_mapping->host);
        ret = gfs2_rs_alloc(ip);
        if (ret)
                return ret;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ff95a2d54e..9ceccb1595a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        struct gfs2_meta_header *mh;
        struct gfs2_trans *tr;
-        lock_buffer(bd->bd_bh);
-        gfs2_log_lock(sdp);
        tr = current->journal_info;
        tr->tr_touched = 1;
        if (!list_empty(&bd->bd_list))
-                goto out;
+                return;
        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
        mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
@@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        sdp->sd_log_num_buf++;
        list_add(&bd->bd_list, &sdp->sd_log_le_buf);
        tr->tr_num_buf_new++;
-out:
-        gfs2_log_unlock(sdp);
-        unlock_buffer(bd->bd_bh);
 }
 static void gfs2_check_magic(struct buffer_head *bh)
@@ -621,7 +616,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 {
-        struct gfs2_log_descriptor *ld;
        struct gfs2_meta_header *mh;
        unsigned int offset;
        struct list_head *head = &sdp->sd_log_le_revoke;
@@ -634,7 +628,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
        length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
        page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
-        ld = page_address(page);
        offset = sizeof(struct gfs2_log_descriptor);
        list_for_each_entry(bd, head, bd_list) {
@@ -777,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        struct address_space *mapping = bd->bd_bh->b_page->mapping;
        struct gfs2_inode *ip = GFS2_I(mapping->host);
-        lock_buffer(bd->bd_bh);
-        gfs2_log_lock(sdp);
        if (tr)
                tr->tr_touched = 1;
        if (!list_empty(&bd->bd_list))
-                goto out;
+                return;
        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
        if (gfs2_is_jdata(ip)) {
@@ -793,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        } else {
                list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
        }
-out:
-        gfs2_log_unlock(sdp);
-        unlock_buffer(bd->bd_bh);
 }
 /**
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 40c4b0d42fa..c5af8e18f27 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
        struct gfs2_quota_data **qd;
        int error;
-        if (ip->i_res == NULL)
+        if (ip->i_res == NULL) {
-                gfs2_rs_alloc(ip);
+                error = gfs2_rs_alloc(ip);
+                if (error)
+                        return error;
+        }
        qd = ip->i_res->rs_qa_qd;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3cc402ce6fe..38fe18f2f05 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 */
 int gfs2_rs_alloc(struct gfs2_inode *ip)
 {
-        int error = 0;
        struct gfs2_blkreserv *res;
        if (ip->i_res)
@@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
        res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
        if (!res)
-                error = -ENOMEM;
+                return -ENOMEM;
        RB_CLEAR_NODE(&res->rs_node);
@@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
        else
                ip->i_res = res;
        up_write(&ip->i_rw_mutex);
-        return error;
+        return 0;
 }
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1263,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        int ret = 0;
        u64 amt;
        u64 trimmed = 0;
+        u64 start, end, minlen;
        unsigned int x;
+        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -1271,19 +1272,25 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
-        if (argp == NULL) {
+        if (copy_from_user(&r, argp, sizeof(r)))
-                r.start = 0;
-                r.len = ULLONG_MAX;
-                r.minlen = 0;
-        } else if (copy_from_user(&r, argp, sizeof(r)))
                return -EFAULT;
        ret = gfs2_rindex_update(sdp);
        if (ret)
                return ret;
-        rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
+        start = r.start >> bs_shift;
-        rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
+        end = start + (r.len >> bs_shift);
+        minlen = max_t(u64, r.minlen,
+                       q->limits.discard_granularity) >> bs_shift;
+        rgd = gfs2_blk2rgrpd(sdp, start, 0);
+        rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
+        if (end <= start ||
+            minlen > sdp->sd_max_rg_data ||
+            start > rgd_end->rd_data0 + rgd_end->rd_data)
+                return -EINVAL;
        while (1) {
@@ -1295,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
                        /* Trim each bitmap in the rgrp */
                        for (x = 0; x < rgd->rd_length; x++) {
                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
-                                ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
+                                ret = gfs2_rgrp_send_discards(sdp,
+                                                rgd->rd_data0, NULL, bi, minlen,
+                                                &amt);
                                if (ret) {
                                        gfs2_glock_dq_uninit(&gh);
                                        goto out;
@@ -1324,7 +1333,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 out:
        r.len = trimmed << 9;
-        if (argp && copy_to_user(argp, &r, sizeof(r)))
+        if (copy_to_user(argp, &r, sizeof(r)))
                return -EFAULT;
        return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bc737261f23..d6488674d91 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
                        return;
                }
                need_unlock = 1;
-        }
+        } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
+                return;
        if (current->journal_info == NULL) {
                ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index adbd27875ef..413627072f3 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct gfs2_bufdata *bd;
+        lock_buffer(bh);
+        gfs2_log_lock(sdp);
        bd = bh->b_private;
        if (bd)
                gfs2_assert(sdp, bd->bd_gl == gl);
        else {
+                gfs2_log_unlock(sdp);
+                unlock_buffer(bh);
                gfs2_attach_bufdata(gl, bh, meta);
                bd = bh->b_private;
+                lock_buffer(bh);
+                gfs2_log_lock(sdp);
        }
        lops_add(sdp, bd);
+        gfs2_log_unlock(sdp);
+        unlock_buffer(bh);
 }
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 60ef3fb707f..1506673c087 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
        struct page *pg;
        struct inode *inode = mapping->host;
        struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+        struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+        struct jffs2_raw_inode ri;
+        uint32_t alloc_len = 0;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        uint32_t pageofs = index << PAGE_CACHE_SHIFT;
        int ret = 0;
+        jffs2_dbg(1, "%s()\n", __func__);
+        if (pageofs > inode->i_size) {
+                ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+                                          ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+                if (ret)
+                        return ret;
+        }
+        mutex_lock(&f->sem);
        pg = grab_cache_page_write_begin(mapping, index, flags);
-        if (!pg)
+        if (!pg) {
+                if (alloc_len)
+                        jffs2_complete_reservation(c);
+                mutex_unlock(&f->sem);
                return -ENOMEM;
+        }
        *pagep = pg;
-        jffs2_dbg(1, "%s()\n", __func__);
+        if (alloc_len) {
-        if (pageofs > inode->i_size) {
                /* Make new hole frag from old EOF to new page */
-                struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
-                struct jffs2_raw_inode ri;
                struct jffs2_full_dnode *fn;
-                uint32_t alloc_len;
                jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
                          (unsigned int)inode->i_size, pageofs);
-                ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
-                                          ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
-                if (ret)
-                        goto out_page;
-                mutex_lock(&f->sem);
                memset(&ri, 0, sizeof(ri));
                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                if (IS_ERR(fn)) {
                        ret = PTR_ERR(fn);
                        jffs2_complete_reservation(c);
-                        mutex_unlock(&f->sem);
                        goto out_page;
                }
                ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                        jffs2_mark_node_obsolete(c, fn->raw);
                        jffs2_free_full_dnode(fn);
                        jffs2_complete_reservation(c);
-                        mutex_unlock(&f->sem);
                        goto out_page;
                }
                jffs2_complete_reservation(c);
                inode->i_size = pageofs;
-                mutex_unlock(&f->sem);
        }
        /*
@@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
         * case of a short-copy.
         */
        if (!PageUptodate(pg)) {
-                mutex_lock(&f->sem);
                ret = jffs2_do_readpage_nolock(inode, pg);
-                mutex_unlock(&f->sem);
                if (ret)
                        goto out_page;
        }
+        mutex_unlock(&f->sem);
        jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
        return ret;
 out_page:
        unlock_page(pg);
        page_cache_release(pg);
+        mutex_unlock(&f->sem);
        return ret;
 }
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b97e8..a5063602536 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
                        if ((old->path.mnt == new->path.mnt) &&
                            (old->path.dentry == new->path.dentry))
                                return true;
+                        break;
                case (FSNOTIFY_EVENT_NONE):
                        return true;
                default:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 721d692fa8d..6fcaeb8c902 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -258,7 +258,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
        if (ret)
                goto out_close_fd;
-        fd_install(fd, f);
+        if (fd != FAN_NOFD)
+                fd_install(fd, f);
        return fanotify_event_metadata.event_len;
 out_close_fd:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 144a96732dd..3c231adf845 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = {
        .release        = mem_release,
 };
+static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
+                            loff_t *ppos)
+{
+        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+        char buffer[PROC_NUMBUF];
+        int oom_adj = OOM_ADJUST_MIN;
+        size_t len;
+        unsigned long flags;
+        if (!task)
+                return -ESRCH;
+        if (lock_task_sighand(task, &flags)) {
+                if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+                        oom_adj = OOM_ADJUST_MAX;
+                else
+                        oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
+                                  OOM_SCORE_ADJ_MAX;
+                unlock_task_sighand(task, &flags);
+        }
+        put_task_struct(task);
+        len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
+        return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+static ssize_t oom_adj_write(struct file *file, const char __user *buf,
+                             size_t count, loff_t *ppos)
+{
+        struct task_struct *task;
+        char buffer[PROC_NUMBUF];
+        int oom_adj;
+        unsigned long flags;
+        int err;
+        memset(buffer, 0, sizeof(buffer));
+        if (count > sizeof(buffer) - 1)
+                count = sizeof(buffer) - 1;
+        if (copy_from_user(buffer, buf, count)) {
+                err = -EFAULT;
+                goto out;
+        }
+        err = kstrtoint(strstrip(buffer), 0, &oom_adj);
+        if (err)
+                goto out;
+        if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
+             oom_adj != OOM_DISABLE) {
+                err = -EINVAL;
+                goto out;
+        }
+        task = get_proc_task(file->f_path.dentry->d_inode);
+        if (!task) {
+                err = -ESRCH;
+                goto out;
+        }
+        task_lock(task);
+        if (!task->mm) {
+                err = -EINVAL;
+                goto err_task_lock;
+        }
+        if (!lock_task_sighand(task, &flags)) {
+                err = -ESRCH;
+                goto err_task_lock;
+        }
+        /*
+         * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
+         * value is always attainable.
+         */
+        if (oom_adj == OOM_ADJUST_MAX)
+                oom_adj = OOM_SCORE_ADJ_MAX;
+        else
+                oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
+        if (oom_adj < task->signal->oom_score_adj &&
+            !capable(CAP_SYS_RESOURCE)) {
+                err = -EACCES;
+                goto err_sighand;
+        }
+        /*
+         * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
+         * /proc/pid/oom_score_adj instead.
+         */
+        printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+                  current->comm, task_pid_nr(current), task_pid_nr(task),
+                  task_pid_nr(task));
+        task->signal->oom_score_adj = oom_adj;
+        trace_oom_score_adj_update(task);
+err_sighand:
+        unlock_task_sighand(task, &flags);
+err_task_lock:
+        task_unlock(task);
+        put_task_struct(task);
+out:
+        return err < 0 ? err : count;
+}
+static const struct file_operations proc_oom_adj_operations = {
+        .read           = oom_adj_read,
+        .write          = oom_adj_write,
+        .llseek         = generic_file_llseek,
+};
 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
                                        size_t count, loff_t *ppos)
 {
@@ -2598,6 +2705,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
        INF("oom_score",  S_IRUGO, proc_oom_score),
+        REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -2964,6 +3072,7 @@ static const struct pid_entry tid_base_stuff[] = {
        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
        INF("oom_score", S_IRUGO, proc_oom_score),
+        REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index a40da07e93d..947fbe06c3b 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -161,6 +161,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
        while (s < e) {
                unsigned long flags;
+                u64 id;
                if (c > psinfo->bufsize)
                        c = psinfo->bufsize;
@@ -172,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
                        spin_lock_irqsave(&psinfo->buf_lock, flags);
                }
                memcpy(psinfo->buf, s, c);
-                psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo);
+                psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, c, psinfo);
                spin_unlock_irqrestore(&psinfo->buf_lock, flags);
                s += c;
                c = e - s;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f27f01a98aa..d83736fbc26 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1782,8 +1782,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
        BUG_ON(!th->t_trans_id);
-        dquot_initialize(inode);
+        reiserfs_write_unlock(inode->i_sb);
        err = dquot_alloc_inode(inode);
+        reiserfs_write_lock(inode->i_sb);
        if (err)
                goto out_end_trans;
        if (!dir->i_nlink) {
@@ -1979,8 +1980,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
      out_end_trans:
        journal_end(th, th->t_super, th->t_blocks_allocated);
+        reiserfs_write_unlock(inode->i_sb);
        /* Drop can be outside and it needs more credits so it's better to have it outside */
        dquot_drop(inode);
+        reiserfs_write_lock(inode->i_sb);
        inode->i_flags |= S_NOQUOTA;
        make_bad_inode(inode);
@@ -3103,10 +3106,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
        /* must be turned off for recursive notify_change calls */
        ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
-        depth = reiserfs_write_lock_once(inode->i_sb);
        if (is_quota_modification(inode, attr))
                dquot_initialize(inode);
+        depth = reiserfs_write_lock_once(inode->i_sb);
        if (attr->ia_valid & ATTR_SIZE) {
                /* version 2 items will be caught by the s_maxbytes check
                 ** done for us in vmtruncate
@@ -3170,7 +3172,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
                error = journal_begin(&th, inode->i_sb, jbegin_count);
                if (error)
                        goto out;
+                reiserfs_write_unlock_once(inode->i_sb, depth);
                error = dquot_transfer(inode, attr);
+                depth = reiserfs_write_lock_once(inode->i_sb);
                if (error) {
                        journal_end(&th, inode->i_sb, jbegin_count);
                        goto out;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index f8afa4b162b..2f40a4c70a4 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
                       key2type(&(key->on_disk_key)));
 #endif
+        reiserfs_write_unlock(inode->i_sb);
        retval = dquot_alloc_space_nodirty(inode, pasted_size);
+        reiserfs_write_lock(inode->i_sb);
        if (retval) {
                pathrelse(search_path);
                return retval;
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
                               "reiserquota insert_item(): allocating %u id=%u type=%c",
                               quota_bytes, inode->i_uid, head2type(ih));
 #endif
+                reiserfs_write_unlock(inode->i_sb);
                /* We can't dirty inode here. It would be immediately written but
                 * appropriate stat item isn't inserted yet... */
                retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+                reiserfs_write_lock(inode->i_sb);
                if (retval) {
                        pathrelse(path);
                        return retval;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1078ae17999..418bdc3a57d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -298,7 +298,9 @@ static int finish_unfinished(struct super_block *s)
                        retval = remove_save_link_only(s, &save_link_key, 0);
                        continue;
                }
+                reiserfs_write_unlock(s);
                dquot_initialize(inode);
+                reiserfs_write_lock(s);
                if (truncate && S_ISDIR(inode->i_mode)) {
                        /* We got a truncate request for a dir which is impossible.
@@ -1335,7 +1337,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                                kfree(qf_names[i]);
 #endif
                err = -EINVAL;
-                goto out_err;
+                goto out_unlock;
        }
 #ifdef CONFIG_QUOTA
        handle_quota_files(s, qf_names, &qfmt);
@@ -1379,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
        if (blocks) {
                err = reiserfs_resize(s, blocks);
                if (err != 0)
-                        goto out_err;
+                        goto out_unlock;
        }
        if (*mount_flags & MS_RDONLY) {
@@ -1389,9 +1391,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                        /* it is read-only already */
                        goto out_ok;
+                /*
+                 * Drop write lock. Quota will retake it when needed and lock
+                 * ordering requires calling dquot_suspend() without it.
+                 */
+                reiserfs_write_unlock(s);
                err = dquot_suspend(s, -1);
                if (err < 0)
                        goto out_err;
+                reiserfs_write_lock(s);
                /* try to remount file system with read-only permissions */
                if (sb_umount_state(rs) == REISERFS_VALID_FS
@@ -1401,7 +1409,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                err = journal_begin(&th, s, 10);
                if (err)
-                        goto out_err;
+                        goto out_unlock;
                /* Mounting a rw partition read-only. */
                reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1416,7 +1424,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                if (reiserfs_is_journal_aborted(journal)) {
                        err = journal->j_errno;
-                        goto out_err;
+                        goto out_unlock;
                }
                handle_data_mode(s, mount_options);
@@ -1425,7 +1433,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                s->s_flags &= ~MS_RDONLY;       /* now it is safe to call journal_begin */
                err = journal_begin(&th, s, 10);
                if (err)
-                        goto out_err;
+                        goto out_unlock;
                /* Mount a partition which is read-only, read-write */
                reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1442,10 +1450,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
        SB_JOURNAL(s)->j_must_wait = 1;
        err = journal_end(&th, s, 10);
        if (err)
-                goto out_err;
+                goto out_unlock;
        if (!(*mount_flags & MS_RDONLY)) {
+                /*
+                 * Drop write lock. Quota will retake it when needed and lock
+                 * ordering requires calling dquot_resume() without it.
+                 */
+                reiserfs_write_unlock(s);
                dquot_resume(s, -1);
+                reiserfs_write_lock(s);
                finish_unfinished(s);
                reiserfs_xattr_init(s, *mount_flags);
        }
@@ -1455,9 +1469,10 @@ out_ok:
        reiserfs_write_unlock(s);
        return 0;
+out_unlock:
+        reiserfs_write_unlock(s);
 out_err:
        kfree(new_opts);
-        reiserfs_write_unlock(s);
        return err;
 }
@@ -2095,13 +2110,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
                          REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
        if (ret)
                goto out;
+        reiserfs_write_unlock(dquot->dq_sb);
        ret = dquot_commit(dquot);
+        reiserfs_write_lock(dquot->dq_sb);
        err =
            journal_end(&th, dquot->dq_sb,
                        REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
        if (!ret && err)
                ret = err;
-      out:
+out:
        reiserfs_write_unlock(dquot->dq_sb);
        return ret;
 }
@@ -2117,13 +2134,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
                          REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
        if (ret)
                goto out;
+        reiserfs_write_unlock(dquot->dq_sb);
        ret = dquot_acquire(dquot);
+        reiserfs_write_lock(dquot->dq_sb);
        err =
            journal_end(&th, dquot->dq_sb,
                        REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
        if (!ret && err)
                ret = err;
-      out:
+out:
        reiserfs_write_unlock(dquot->dq_sb);
        return ret;
 }
@@ -2137,19 +2156,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
        ret =
            journal_begin(&th, dquot->dq_sb,
                          REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+        reiserfs_write_unlock(dquot->dq_sb);
        if (ret) {
                /* Release dquot anyway to avoid endless cycle in dqput() */
                dquot_release(dquot);
                goto out;
        }
        ret = dquot_release(dquot);
+        reiserfs_write_lock(dquot->dq_sb);
        err =
            journal_end(&th, dquot->dq_sb,
                        REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
        if (!ret && err)
                ret = err;
-      out:
        reiserfs_write_unlock(dquot->dq_sb);
+out:
        return ret;
 }
@@ -2174,11 +2195,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
        ret = journal_begin(&th, sb, 2);
        if (ret)
                goto out;
+        reiserfs_write_unlock(sb);
        ret = dquot_commit_info(sb, type);
+        reiserfs_write_lock(sb);
        err = journal_end(&th, sb, 2);
        if (!ret && err)
                ret = err;
-      out:
+out:
        reiserfs_write_unlock(sb);
        return ret;
 }
@@ -2203,8 +2226,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
        struct reiserfs_transaction_handle th;
        int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
-        if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
+        reiserfs_write_lock(sb);
-                return -EINVAL;
+        if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
+                err = -EINVAL;
+                goto out;
+        }
        /* Quotafile not on the same filesystem? */
        if (path->dentry->d_sb != sb) {
@@ -2246,8 +2272,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                if (err)
                        goto out;
        }
-        err = dquot_quota_on(sb, type, format_id, path);
+        reiserfs_write_unlock(sb);
+        return dquot_quota_on(sb, type, format_id, path);
 out:
+        reiserfs_write_unlock(sb);
        return err;
 }
@@ -2320,7 +2348,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
                tocopy = sb->s_blocksize - offset < towrite ?
                    sb->s_blocksize - offset : towrite;
                tmp_bh.b_state = 0;
+                reiserfs_write_lock(sb);
                err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
+                reiserfs_write_unlock(sb);
                if (err)
                        goto out;
                if (offset || tocopy != sb->s_blocksize)
@@ -2336,10 +2366,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
                flush_dcache_page(bh->b_page);
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
+                reiserfs_write_lock(sb);
                reiserfs_prepare_for_journal(sb, bh, 1);
                journal_mark_dirty(current->journal_info, sb, bh);
                if (!journal_quota)
                        reiserfs_add_ordered_list(inode, bh);
+                reiserfs_write_unlock(sb);
                brelse(bh);
                offset = 0;
                towrite -= tocopy;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 28ec13af28d..2dcf3d473fe 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
        if (!lprops) {
                lprops = ubifs_fast_find_freeable(c);
                if (!lprops) {
-                        ubifs_assert(c->freeable_cnt == 0);
+                        /*
-                        if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+                         * The first condition means the following: go scan the
+                         * LPT if there are uncategorized lprops, which means
+                         * there may be freeable LEBs there (UBIFS does not
+                         * store the information about freeable LEBs in the
+                         * master node).
+                         */
+                        if (c->in_a_category_cnt != c->main_lebs ||
+                            c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+                                ubifs_assert(c->freeable_cnt == 0);
                                lprops = scan_for_leb_for_idx(c);
                                if (IS_ERR(lprops)) {
                                        err = PTR_ERR(lprops);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index e5a2a35a46d..46190a7c42a 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
        default:
                ubifs_assert(0);
        }
        lprops->flags &= ~LPROPS_CAT_MASK;
        lprops->flags |= cat;
+        c->in_a_category_cnt += 1;
+        ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
 }
 /**
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
        default:
                ubifs_assert(0);
        }
+        c->in_a_category_cnt -= 1;
+        ubifs_assert(c->in_a_category_cnt >= 0);
 }
 /**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 5486346d0a3..d133c276fe0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1183,6 +1183,8 @@ struct ubifs_debug_info;
 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
 * @freeable_cnt: number of freeable LEBs in @freeable_list
+ * @in_a_category_cnt: count of lprops which are in a certain category, which
+ *                     basically meants that they were loaded from the flash
 *
 * @ltab_lnum: LEB number of LPT's own lprops table
 * @ltab_offs: offset of LPT's own lprops table
@@ -1412,6 +1414,7 @@ struct ubifs_info {
        struct list_head freeable_list;
        struct list_head frdi_idx_list;
        int freeable_cnt;
+        int in_a_category_cnt;
        int ltab_lnum;
        int ltab_offs;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4f33c32affe..335206a9c69 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
        /*
         * Initialize the args structure.
         */
+        memset(&targs, 0, sizeof(targs));
        targs.tp = tp;
        targs.mp = mp;
        targs.agbp = agbp;
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
 * group or loop over the allocation groups to find the result.
 */
 int                             /* error */
-__xfs_alloc_vextent(
+xfs_alloc_vextent(
        xfs_alloc_arg_t *args)  /* allocation argument structure */
 {
        xfs_agblock_t   agsize; /* allocation group size */
@@ -2417,46 +2418,6 @@ error0:
        return error;
 }
-static void
-xfs_alloc_vextent_worker(
-        struct work_struct      *work)
-{
-        struct xfs_alloc_arg    *args = container_of(work,
-                                                struct xfs_alloc_arg, work);
-        unsigned long           pflags;
-        /* we are in a transaction context here */
-        current_set_flags_nested(&pflags, PF_FSTRANS);
-        args->result = __xfs_alloc_vextent(args);
-        complete(args->done);
-        current_restore_flags_nested(&pflags, PF_FSTRANS);
-}
-/*
- * Data allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Metadata
- * requests, OTOH, are generally from low stack usage paths, so avoid the
- * context switch overhead here.
- */
-int
-xfs_alloc_vextent(
-        struct xfs_alloc_arg    *args)
-{
-        DECLARE_COMPLETION_ONSTACK(done);
-        if (!args->userdata)
-                return __xfs_alloc_vextent(args);
-        args->done = &done;
-        INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
-        queue_work(xfs_alloc_wq, &args->work);
-        wait_for_completion(&done);
-        return args->result;
-}
 /*
 * Free an extent.
 * Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 93be4a667ca..feacb061bab 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
        char            isfl;           /* set if is freelist blocks - !acctg */
        char            userdata;       /* set if this is user data */
        xfs_fsblock_t   firstblock;     /* io first block allocated */
-        struct completion *done;
-        struct work_struct work;
-        int             result;
 } xfs_alloc_arg_t;
 /*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index f1647caace8..f7876c6d616 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -121,6 +121,8 @@ xfs_allocbt_free_block(
        xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
                              XFS_EXTENT_BUSY_SKIP_DISCARD);
        xfs_trans_agbtree_delta(cur->bc_tp, -1);
+        xfs_trans_binval(cur->bc_tp, bp);
        return 0;
 }
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e562dd43f41..e57e2daa357 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -481,11 +481,17 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 *
 * The fix is two passes across the ioend list - one to start writeback on the
 * buffer_heads, and then submit them for I/O on the second pass.
+ *
+ * If @fail is non-zero, it means that we have a situation where some part of
+ * the submission process has failed after we have marked paged for writeback
+ * and unlocked them. In this situation, we need to fail the ioend chain rather
+ * than submit it to IO. This typically only happens on a filesystem shutdown.
 */
 STATIC void
 xfs_submit_ioend(
        struct writeback_control *wbc,
-        xfs_ioend_t             *ioend)
+        xfs_ioend_t             *ioend,
+        int                     fail)
 {
        xfs_ioend_t             *head = ioend;
        xfs_ioend_t             *next;
@@ -506,6 +512,18 @@ xfs_submit_ioend(
                next = ioend->io_list;
                bio = NULL;
+                /*
+                 * If we are failing the IO now, just mark the ioend with an
+                 * error and finish it. This will run IO completion immediately
+                 * as there is only one reference to the ioend at this point in
+                 * time.
+                 */
+                if (fail) {
+                        ioend->io_error = -fail;
+                        xfs_finish_ioend(ioend);
+                        continue;
+                }
                for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
                        if (!bio) {
@@ -1060,7 +1078,18 @@ xfs_vm_writepage(
        xfs_start_page_writeback(page, 1, count);
-        if (ioend && imap_valid) {
+        /* if there is no IO to be submitted for this page, we are done */
+        if (!ioend)
+                return 0;
+        ASSERT(iohead);
+        /*
+         * Any errors from this point onwards need tobe reported through the IO
+         * completion path as we have marked the initial page as under writeback
+         * and unlocked it.
+         */
+        if (imap_valid) {
                xfs_off_t               end_index;
                end_index = imap.br_startoff + imap.br_blockcount;
@@ -1079,20 +1108,15 @@ xfs_vm_writepage(
                                  wbc, end_index);
        }
-        if (iohead) {
-                /*
-                 * Reserve log space if we might write beyond the on-disk
-                 * inode size.
-                 */
-                if (ioend->io_type != XFS_IO_UNWRITTEN &&
-                    xfs_ioend_is_append(ioend)) {
-                        err = xfs_setfilesize_trans_alloc(ioend);
-                        if (err)
-                                goto error;
-                }
-                xfs_submit_ioend(wbc, iohead);
+        /*
-        }
+         * Reserve log space if we might write beyond the on-disk inode size.
+         */
+        err = 0;
+        if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+                err = xfs_setfilesize_trans_alloc(ioend);
+        xfs_submit_ioend(wbc, iohead, err);
        return 0;
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d330111ca73..70eec182977 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1291,6 +1291,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
        leaf2 = blk2->bp->b_addr;
        ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+        ASSERT(leaf2->hdr.count == 0);
        args = state->args;
        trace_xfs_attr_leaf_rebalance(args);
@@ -1361,6 +1362,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 * I assert that since all callers pass in an empty
                 * second buffer, this code should never execute.
                 */
+                ASSERT(0);
                /*
                 * Figure the total bytes to be added to the destination leaf.
@@ -1422,10 +1424,24 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                        args->index2 = 0;
                        args->blkno2 = blk2->blkno;
                } else {
+                        /*
+                         * On a double leaf split, the original attr location
+                         * is already stored in blkno2/index2, so don't
+                         * overwrite it overwise we corrupt the tree.
+                         */
                        blk2->index = blk1->index
                                    - be16_to_cpu(leaf1->hdr.count);
-                        args->index = args->index2 = blk2->index;
+                        args->index = blk2->index;
-                        args->blkno = args->blkno2 = blk2->blkno;
+                        args->blkno = blk2->blkno;
+                        if (!state->extravalid) {
+                                /*
+                                 * set the new attr location to match the old
+                                 * one and let the higher level split code
+                                 * decide where in the leaf to place it.
+                                 */
+                                args->index2 = blk2->index;
+                                args->blkno2 = blk2->blkno;
+                        }
                }
        } else {
                ASSERT(state->inleaf == 1);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 848ffa77707..83d0cf3df93 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
         * Normal allocation, done through xfs_alloc_vextent.
         */
        tryagain = isaligned = 0;
+        memset(&args, 0, sizeof(args));
        args.tp = ap->tp;
        args.mp = mp;
        args.fsbno = ap->blkno;
@@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
         * Convert to a btree with two levels, one record in root.
         */
        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = mp;
        args.firstblock = *firstblock;
@@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
                xfs_buf_t       *bp;    /* buffer for extent block */
                xfs_bmbt_rec_host_t *ep;/* extent record pointer */
+                memset(&args, 0, sizeof(args));
                args.tp = tp;
                args.mp = ip->i_mount;
                args.firstblock = *firstblock;
@@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
 STATIC int
-xfs_bmapi_allocate(
+__xfs_bmapi_allocate(
-        struct xfs_bmalloca     *bma,
+        struct xfs_bmalloca     *bma)
-        int                     flags)
 {
        struct xfs_mount        *mp = bma->ip->i_mount;
-        int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+        int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
                                                XFS_ATTR_FORK : XFS_DATA_FORK;
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
        int                     tmp_logflags = 0;
@@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
         * Indicate if this is the first user data in the file, or just any
         * user data.
         */
-        if (!(flags & XFS_BMAPI_METADATA)) {
+        if (!(bma->flags & XFS_BMAPI_METADATA)) {
                bma->userdata = (bma->offset == 0) ?
                        XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
        }
-        bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
+        bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
        /*
         * Only want to do the alignment at the eof if it is userdata and
         * allocation length is larger than a stripe unit.
         */
        if (mp->m_dalign && bma->length >= mp->m_dalign &&
-            !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
+            !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
                error = xfs_bmap_isaeof(bma, whichfork);
                if (error)
                        return error;
        }
+        if (bma->flags & XFS_BMAPI_STACK_SWITCH)
+                bma->stack_switch = 1;
        error = xfs_bmap_alloc(bma);
        if (error)
                return error;
@@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
         * A wasdelay extent has been initialized, so shouldn't be flagged
         * as unwritten.
         */
-        if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
+        if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
            xfs_sb_version_hasextflgbit(&mp->m_sb))
                bma->got.br_state = XFS_EXT_UNWRITTEN;
@@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
        return 0;
 }
+static void
+xfs_bmapi_allocate_worker(
+        struct work_struct      *work)
+{
+        struct xfs_bmalloca     *args = container_of(work,
+                                                struct xfs_bmalloca, work);
+        unsigned long           pflags;
+        /* we are in a transaction context here */
+        current_set_flags_nested(&pflags, PF_FSTRANS);
+        args->result = __xfs_bmapi_allocate(args);
+        complete(args->done);
+        current_restore_flags_nested(&pflags, PF_FSTRANS);
+}
+/*
+ * Some allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Otherwise just
+ * call directly to avoid the context switch overhead here.
+ */
+int
+xfs_bmapi_allocate(
+        struct xfs_bmalloca     *args)
+{
+        DECLARE_COMPLETION_ONSTACK(done);
+        if (!args->stack_switch)
+                return __xfs_bmapi_allocate(args);
+        args->done = &done;
+        INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
+        queue_work(xfs_alloc_wq, &args->work);
+        wait_for_completion(&done);
+        return args->result;
+}
 STATIC int
 xfs_bmapi_convert_unwritten(
        struct xfs_bmalloca     *bma,
@@ -4919,6 +4963,7 @@ xfs_bmapi_write(
                        bma.conv = !!(flags & XFS_BMAPI_CONVERT);
                        bma.wasdel = wasdelay;
                        bma.offset = bno;
+                        bma.flags = flags;
                        /*
                         * There's a 32/64 bit type mismatch between the
@@ -4934,7 +4979,7 @@ xfs_bmapi_write(
                        ASSERT(len > 0);
                        ASSERT(bma.length > 0);
-                        error = xfs_bmapi_allocate(&bma, flags);
+                        error = xfs_bmapi_allocate(&bma);
                        if (error)
                                goto error0;
                        if (bma.blkno == NULLFSBLOCK)
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 803b56d7ce1..5f469c3516e 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,6 +77,7 @@ typedef	struct xfs_bmap_free
 * from written to unwritten, otherwise convert from unwritten to written.
 */
 #define XFS_BMAPI_CONVERT       0x040
+#define XFS_BMAPI_STACK_SWITCH  0x080
 #define XFS_BMAPI_FLAGS \
        { XFS_BMAPI_ENTIRE,     "ENTIRE" }, \
@@ -85,7 +86,8 @@ typedef	struct xfs_bmap_free
        { XFS_BMAPI_PREALLOC,   "PREALLOC" }, \
        { XFS_BMAPI_IGSTATE,    "IGSTATE" }, \
        { XFS_BMAPI_CONTIG,     "CONTIG" }, \
-        { XFS_BMAPI_CONVERT,    "CONVERT" }
+        { XFS_BMAPI_CONVERT,    "CONVERT" }, \
+        { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
 static inline int xfs_bmapi_aflag(int w)
@@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
        char                    userdata;/* set if is user data */
        char                    aeof;   /* allocated space at eof */
        char                    conv;   /* overwriting unwritten extents */
+        char                    stack_switch;
+        int                     flags;
+        struct completion       *done;
+        struct work_struct      work;
+        int                     result;
 } xfs_bmalloca_t;
 /*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 933b7930b86..4b0b8dd1b7b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1197,9 +1197,14 @@ xfs_buf_bio_end_io(
 {
        xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-        xfs_buf_ioerror(bp, -error);
+        /*
+         * don't overwrite existing errors - otherwise we can lose errors on
+         * buffers that require multiple bios to complete.
+         */
+        if (!bp->b_error)
+                xfs_buf_ioerror(bp, -error);
-        if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+        if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
                invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
        _xfs_buf_ioend(bp, 1);
@@ -1279,6 +1284,11 @@ next_chunk:
                if (size)
                        goto next_chunk;
        } else {
+                /*
+                 * This is guaranteed not to be the last io reference count
+                 * because the caller (xfs_buf_iorequest) holds a count itself.
+                 */
+                atomic_dec(&bp->b_io_remaining);
                xfs_buf_ioerror(bp, EIO);
                bio_put(bio);
        }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a8d0ed91119..becf4a97efc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -526,7 +526,25 @@ xfs_buf_item_unpin(
                }
                xfs_buf_relse(bp);
        } else if (freed && remove) {
+                /*
+                 * There are currently two references to the buffer - the active
+                 * LRU reference and the buf log item. What we are about to do
+                 * here - simulate a failed IO completion - requires 3
+                 * references.
+                 *
+                 * The LRU reference is removed by the xfs_buf_stale() call. The
+                 * buf item reference is removed by the xfs_buf_iodone()
+                 * callback that is run by xfs_buf_do_callbacks() during ioend
+                 * processing (via the bp->b_iodone callback), and then finally
+                 * the ioend processing will drop the IO reference if the buffer
+                 * is marked XBF_ASYNC.
+                 *
+                 * Hence we need to take an additional reference here so that IO
+                 * completion processing doesn't free the buffer prematurely.
+                 */
                xfs_buf_lock(bp);
+                xfs_buf_hold(bp);
+                bp->b_flags |= XBF_ASYNC;
                xfs_buf_ioerror(bp, EIO);
                XFS_BUF_UNDONE(bp);
                xfs_buf_stale(bp);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index c25b094efbf..4beaede4327 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -399,9 +399,26 @@ xfs_growfs_data_private(
        /* update secondary superblocks. */
        for (agno = 1; agno < nagcount; agno++) {
-                error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                error = 0;
+                /*
+                 * new secondary superblocks need to be zeroed, not read from
+                 * disk as the contents of the new area we are growing into is
+                 * completely unknown.
+                 */
+                if (agno < oagcount) {
+                        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
                                  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
                                  XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                } else {
+                        bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
+                                  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
+                                  XFS_FSS_TO_BB(mp, 1), 0);
+                        if (bp)
+                                xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+                        else
+                                error = ENOMEM;
+                }
                if (error) {
                        xfs_warn(mp,
                "error %d reading secondary superblock for ag %d",
@@ -423,7 +440,7 @@ xfs_growfs_data_private(
                        break; /* no point in continuing */
                }
        }
-        return 0;
+        return error;
 error0:
        xfs_trans_cancel(tp, XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 445bf1aef31..c5c4ef4f2bd 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
                                        /* boundary */
        struct xfs_perag *pag;
+        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = tp->t_mountp;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2778258fcfa..1938b41ee9f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
                 * to mark all the active inodes on the buffer stale.
                 */
                bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
-                                        mp->m_bsize * blks_per_cluster, 0);
+                                        mp->m_bsize * blks_per_cluster,
+                                        XBF_UNMAPPED);
                if (!bp)
                        return ENOMEM;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8305f2ac677..c1df3c623de 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,7 +70,7 @@ xfs_find_handle(
        int                     hsize;
        xfs_handle_t            handle;
        struct inode            *inode;
-        struct fd               f;
+        struct fd               f = {0};
        struct path             path;
        int                     error;
        struct xfs_inode        *ip;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 973dff6ad93..7f537663365 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
                         * pointer that the caller gave to us.
                         */
                        error = xfs_bmapi_write(tp, ip, map_start_fsb,
-                                                count_fsb, 0, &first_block, 1,
+                                                count_fsb,
+                                                XFS_BMAPI_STACK_SWITCH,
+                                                &first_block, 1,
                                                imap, &nimaps, &free_list);
                        if (error)
                                goto trans_cancel;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7f4f9370d0e..4dad756962d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2387,14 +2387,27 @@ xlog_state_do_callback(
                                /*
-                                 * update the last_sync_lsn before we drop the
+                                 * Completion of a iclog IO does not imply that
+                                 * a transaction has completed, as transactions
+                                 * can be large enough to span many iclogs. We
+                                 * cannot change the tail of the log half way
+                                 * through a transaction as this may be the only
+                                 * transaction in the log and moving th etail to
+                                 * point to the middle of it will prevent
+                                 * recovery from finding the start of the
+                                 * transaction. Hence we should only update the
+                                 * last_sync_lsn if this iclog contains
+                                 * transaction completion callbacks on it.
+                                 *
+                                 * We have to do this before we drop the
                                 * icloglock to ensure we are the only one that
                                 * can update it.
                                 */
                                ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
                                        be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
-                                atomic64_set(&log->l_last_sync_lsn,
+                                if (iclog->ic_callback)
-                                        be64_to_cpu(iclog->ic_header.h_lsn));
+                                        atomic64_set(&log->l_last_sync_lsn,
+                                                be64_to_cpu(iclog->ic_header.h_lsn));
                        } else
                                ioerrors++;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5da3ace352b..d308749fabf 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
                                 *   - order is important.
                                 */
                                error = xlog_bread_offset(log, 0,
-                                                bblks - split_bblks, hbp,
+                                                bblks - split_bblks, dbp,
                                                offset + BBTOB(split_bblks));
                                if (error)
                                        goto bread_err2;
author	Dave Airlie <airlied@redhat.com>	2012-12-07 22:17:07 -0500
committer	Dave Airlie <airlied@redhat.com>	2012-12-10 05:03:58 -0500
commit	1a1494def7eacbd25db05185aa2e81ef90892460 (patch)
tree	40911f075b1fe527c6d20bf8c3070d4cdca11e97 /fs
parent	8de9e417757fb9f130f55a38f4ee7027b60de1c7 (diff)
parent	71bfe916ebe6d026cd3d0e41c398574fc1228e03 (diff)