Merge commit 'v2.6.27-rc3' into x86/urgent

author: Ingo Molnar <mingo@elte.hu> 2008-08-13 07:08:47 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-08-13 07:08:47 -0400
commit: a12e61df4fa1cfae7a6b76976fa65a6fcb048e3f (patch)
tree: 957c915298b299902aa80afa7c78e94c71b3dc25 /fs
parent: 7b27718bdb1b70166383dec91391df5534d449ee (diff)
parent: 30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)
163 files changed, 5161 insertions, 4452 deletions
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c37..78db4953a800 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
        }
        mntget(newmnt);
-        err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+        err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
        switch (err) {
        case 0:
                path_put(&nd->path);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c489..065b4e10681a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
                        page = pages[loop];
                        if (page->index > wb->last)
                                break;
-                        if (TestSetPageLocked(page))
+                        if (!trylock_page(page))
                                break;
                        if (!PageDirty(page) ||
                            page_private(page) != (unsigned long) wb) {
diff --git a/fs/bio.c b/fs/bio.c
index 25f1af0d81e5..8000e2fa16cb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
         */
        bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-        if (bvl) {
+        if (bvl)
-                struct biovec_slab *bp = bvec_slabs + *idx;
+                memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
-                memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
-        }
        return bvl;
 }
@@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                                goto out;
                        }
                        bio->bi_flags |= idx << BIO_POOL_OFFSET;
-                        bio->bi_max_vecs = bvec_slabs[idx].nr_vecs;
+                        bio->bi_max_vecs = bvec_nr_vecs(idx);
                }
                bio->bi_io_vec = bvl;
        }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dcf37cada369..aff54219e049 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
         * hooks: /n/, see "layering violations".
         */
        ret = devcgroup_inode_permission(bdev->bd_inode, perm);
-        if (ret != 0)
+        if (ret != 0) {
+                bdput(bdev);
                return ret;
+        }
        ret = -ENXIO;
        file->f_mapping = bdev->bd_inode->i_mapping;
@@ -1234,6 +1236,7 @@ fail:
        bdev = ERR_PTR(error);
        goto out;
 }
+EXPORT_SYMBOL(lookup_bdev);
 /**
 * open_bdev_excl  -  open a block device by name and set it up for use
diff --git a/fs/buffer.c b/fs/buffer.c
index ca12a6bb82b1..38653e36e225 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
 /*
 * The buffer's backing address_space's private_lock must be held
 */
-static inline void __remove_assoc_queue(struct buffer_head *bh)
+static void __remove_assoc_queue(struct buffer_head *bh)
 {
        list_del_init(&bh->b_assoc_buffers);
        WARN_ON(!bh->b_assoc_map);
@@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
                 */
                if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
                        lock_buffer(bh);
-                } else if (test_set_buffer_locked(bh)) {
+                } else if (!trylock_buffer(bh)) {
                        redirty_page_for_writepage(wbc, page);
                        continue;
                }
@@ -3000,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
                if (rw == SWRITE || rw == SWRITE_SYNC)
                        lock_buffer(bh);
-                else if (test_set_buffer_locked(bh))
+                else if (!trylock_buffer(bh))
                        continue;
                if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1f3465201fdf..f5d0083e09fa 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
+Version 1.54
+------------
+Fix premature write failure on congested networks (we would give up
+on EAGAIN from the socket too quickly on large writes).
+Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
+Fix endian problems in acl (mode from/to cifs acl) on bigendian
+architectures.
 Version 1.53
 ------------
 DFS support added (Microsoft Distributed File System client support needed
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 6bb440b257b0..5fabd2caf93c 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -483,6 +483,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
        asn1_open(&ctx, security_blob, length);
+        /* GSSAPI header */
        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
                cFYI(1, ("Error decoding negTokenInit header"));
                return 0;
@@ -490,153 +491,142 @@ decode_negTokenInit(unsigned char *security_blob, int length,
                   || (tag != ASN1_EOC)) {
                cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag));
                return 0;
-        } else {
+        }
-                /*      remember to free obj->oid */
-                rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
-                if (rc) {
-                        if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
-                                rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
-                                if (rc) {
-                                        rc = compare_oid(oid, oidlen,
-                                                         SPNEGO_OID,
-                                                         SPNEGO_OID_LEN);
-                                        kfree(oid);
-                                }
-                        } else
-                                rc = 0;
-                }
-                if (!rc) {
+        /* Check for SPNEGO OID -- remember to free obj->oid */
-                        cFYI(1, ("Error decoding negTokenInit header"));
+        rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
-                        return 0;
+        if (rc) {
-                }
+                if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
+                    (cls == ASN1_UNI)) {
+                        rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
+                        if (rc) {
+                                rc = compare_oid(oid, oidlen, SPNEGO_OID,
+                                                 SPNEGO_OID_LEN);
+                                kfree(oid);
+                        }
+                } else
+                        rc = 0;
+        }
-                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+        /* SPNEGO OID not present or garbled -- bail out */
-                        cFYI(1, ("Error decoding negTokenInit"));
+        if (!rc) {
-                        return 0;
+                cFYI(1, ("Error decoding negTokenInit header"));
-                } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+                return 0;
-                           || (tag != ASN1_EOC)) {
+        }
-                        cFYI(1,
-                             ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
-                              cls, con, tag, end, *end));
-                        return 0;
-                }
-                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                        cFYI(1, ("Error decoding negTokenInit"));
+                cFYI(1, ("Error decoding negTokenInit"));
-                        return 0;
+                return 0;
-                } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+        } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
-                           || (tag != ASN1_SEQ)) {
+                   || (tag != ASN1_EOC)) {
-                        cFYI(1,
+                cFYI(1,
-                             ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+                     ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
-                              cls, con, tag, end, *end));
+                      cls, con, tag, end, *end));
-                        return 0;
+                return 0;
-                }
+        }
-                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                        cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+                cFYI(1, ("Error decoding negTokenInit"));
-                        return 0;
+                return 0;
-                } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
+        } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-                           || (tag != ASN1_EOC)) {
+                   || (tag != ASN1_SEQ)) {
-                        cFYI(1,
+                cFYI(1,
-                             ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
+                     ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
-                              cls, con, tag, end, *end));
+                      cls, con, tag, end, *end));
-                        return 0;
+                return 0;
-                }
+        }
-                if (asn1_header_decode
+        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                    (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
+                cFYI(1, ("Error decoding 2nd part of negTokenInit"));
-                        cFYI(1, ("Error decoding 2nd part of negTokenInit"));
+                return 0;
-                        return 0;
+        } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
-                } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+                   || (tag != ASN1_EOC)) {
-                           || (tag != ASN1_SEQ)) {
+                cFYI(1,
-                        cFYI(1,
+                     ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
-                             ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
+                      cls, con, tag, end, *end));
-                              cls, con, tag, end, *end));
+                return 0;
-                        return 0;
+        }
-                }
-                while (!asn1_eoc_decode(&ctx, sequence_end)) {
+        if (asn1_header_decode
-                        rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
+            (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
-                        if (!rc) {
+                cFYI(1, ("Error decoding 2nd part of negTokenInit"));
-                                cFYI(1,
+                return 0;
-                                     ("Error decoding negTokenInit hdr exit2"));
+        } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-                                return 0;
+                   || (tag != ASN1_SEQ)) {
-                        }
+                cFYI(1,
-                        if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
+                     ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
-                                if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
+                      cls, con, tag, end, *end));
+                return 0;
-                                        cFYI(1,
+        }
-                                          ("OID len = %d oid = 0x%lx 0x%lx "
-                                           "0x%lx 0x%lx",
-                                           oidlen, *oid, *(oid + 1),
-                                           *(oid + 2), *(oid + 3)));
-                                        if (compare_oid(oid, oidlen,
-                                                        MSKRB5_OID,
-                                                        MSKRB5_OID_LEN))
-                                                use_kerberos = true;
-                                        else if (compare_oid(oid, oidlen,
-                                                             KRB5_OID,
-                                                             KRB5_OID_LEN))
-                                                use_kerberos = true;
-                                        else if (compare_oid(oid, oidlen,
-                                                             NTLMSSP_OID,
-                                                             NTLMSSP_OID_LEN))
-                                                use_ntlmssp = true;
-                                        kfree(oid);
-                                }
-                        } else {
-                                cFYI(1, ("Should be an oid what is going on?"));
-                        }
-                }
-                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+        while (!asn1_eoc_decode(&ctx, sequence_end)) {
-                        cFYI(1,
+                rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
-                             ("Error decoding last part negTokenInit exit3"));
+                if (!rc) {
-                        return 0;
-                } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-                        /* tag = 3 indicating mechListMIC */
                        cFYI(1,
-                             ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
+                             ("Error decoding negTokenInit hdr exit2"));
-                              cls, con, tag, end, *end));
                        return 0;
                }
-                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+                if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
-                        cFYI(1,
+                        if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
-                             ("Error decoding last part negTokenInit exit5"));
-                        return 0;
+                                cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx "
-                } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
+                                         "0x%lx 0x%lx", oidlen, *oid,
-                           || (tag != ASN1_SEQ)) {
+                                         *(oid + 1), *(oid + 2), *(oid + 3)));
-                        cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
-                                cls, con, tag, end, *end));
+                                if (compare_oid(oid, oidlen, MSKRB5_OID,
+                                                MSKRB5_OID_LEN))
+                                        use_kerberos = true;
+                                else if (compare_oid(oid, oidlen, KRB5_OID,
+                                                     KRB5_OID_LEN))
+                                        use_kerberos = true;
+                                else if (compare_oid(oid, oidlen, NTLMSSP_OID,
+                                                     NTLMSSP_OID_LEN))
+                                        use_ntlmssp = true;
+                                kfree(oid);
+                        }
+                } else {
+                        cFYI(1, ("Should be an oid what is going on?"));
                }
+        }
-                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                        cFYI(1,
+                cFYI(1, ("Error decoding last part negTokenInit exit3"));
-                             ("Error decoding last part negTokenInit exit 7"));
+                return 0;
-                        return 0;
+        } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-                } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
+                /* tag = 3 indicating mechListMIC */
-                        cFYI(1,
+                cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
-                             ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
+                         cls, con, tag, end, *end));
-                              cls, con, tag, end, *end));
+                return 0;
-                        return 0;
+        }
-                }
+        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+                cFYI(1, ("Error decoding last part negTokenInit exit5"));
-                        cFYI(1,
+                return 0;
-                             ("Error decoding last part negTokenInit exit9"));
+        } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-                        return 0;
+                   || (tag != ASN1_SEQ)) {
-                } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
+                cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
-                           || (tag != ASN1_GENSTR)) {
+                        cls, con, tag, end, *end));
-                        cFYI(1,
+        }
-                             ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
-                              cls, con, tag, end, *end));
+        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                        return 0;
+                cFYI(1, ("Error decoding last part negTokenInit exit 7"));
-                }
+                return 0;
-                cFYI(1, ("Need to call asn1_octets_decode() function for %s",
+        } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-                         ctx.pointer)); /* is this UTF-8 or ASCII? */
+                cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
+                         cls, con, tag, end, *end));
+                return 0;
+        }
+        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+                cFYI(1, ("Error decoding last part negTokenInit exit9"));
+                return 0;
+        } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
+                   || (tag != ASN1_GENSTR)) {
+                cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
+                         cls, con, tag, end, *end));
+                return 0;
        }
+        cFYI(1, ("Need to call asn1_octets_decode() function for %s",
+                 ctx.pointer)); /* is this UTF-8 or ASCII? */
        if (use_kerberos)
                *secType = Kerberos;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 688a2d42153f..69a12aae91d3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
        spin_lock(&GlobalMid_Lock);
        list_for_each(tmp, &server->pending_mid_q) {
                mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-                if (mid_entry) {
+                cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
-                        cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
+                        mid_entry->midState,
-                                mid_entry->midState,
+                        (int)mid_entry->command,
-                                (int)mid_entry->command,
+                        mid_entry->pid,
-                                mid_entry->pid,
+                        mid_entry->tsk,
-                                mid_entry->tsk,
+                        mid_entry->mid));
-                                mid_entry->mid));
 #ifdef CONFIG_CIFS_STATS2
-                        cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
+                cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
-                                mid_entry->largeBuf,
+                        mid_entry->largeBuf,
-                                mid_entry->resp_buf,
+                        mid_entry->resp_buf,
-                                mid_entry->when_received,
+                        mid_entry->when_received,
-                                jiffies));
+                        jiffies));
 #endif /* STATS2 */
-                        cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
+                cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
-                                  mid_entry->multiEnd));
+                          mid_entry->multiEnd));
-                        if (mid_entry->resp_buf) {
+                if (mid_entry->resp_buf) {
-                                cifs_dump_detail(mid_entry->resp_buf);
+                        cifs_dump_detail(mid_entry->resp_buf);
-                                cifs_dump_mem("existing buf: ",
+                        cifs_dump_mem("existing buf: ",
-                                        mid_entry->resp_buf, 62);
+                                mid_entry->resp_buf, 62);
-                        }
                }
        }
        spin_unlock(&GlobalMid_Lock);
@@ -163,16 +161,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
                                mid_entry = list_entry(tmp1, struct
                                        mid_q_entry,
                                        qhead);
-                                if (mid_entry) {
+                                seq_printf(m, "State: %d com: %d pid:"
-                                        seq_printf(m,
+                                                " %d tsk: %p mid %d\n",
-                                                        "State: %d com: %d pid:"
+                                                mid_entry->midState,
-                                                        " %d tsk: %p mid %d\n",
+                                                (int)mid_entry->command,
-                                                        mid_entry->midState,
+                                                mid_entry->pid,
-                                                        (int)mid_entry->command,
+                                                mid_entry->tsk,
-                                                        mid_entry->pid,
+                                                mid_entry->mid);
-                                                        mid_entry->tsk,
-                                                        mid_entry->mid);
-                                }
                        }
                        spin_unlock(&GlobalMid_Lock);
                }
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e329..d2c8eef84f3c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
        int err;
        mntget(newmnt);
-        err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist);
+        err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
        switch (err) {
        case 0:
                path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7013aaff6aed..2434ab0e8791 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = {
        .describe       = user_describe,
 };
-#define MAX_VER_STR_LEN   9 /* length of longest version string e.g.
+#define MAX_VER_STR_LEN   8 /* length of longest version string e.g.
-                                strlen(";ver=0xFF") */
+                                strlen("ver=0xFF") */
 #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
                               in future could have strlen(";sec=ntlmsspi") */
 #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
@@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
        struct key *spnego_key;
        const char *hostname = server->hostname;
-        /* BB: come up with better scheme for determining length */
+        /* length of fields (with semicolons): ver=0xyz ip4=ipaddress
-        /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host=
+           host=hostname sec=mechanism uid=0xFF user=username */
-           hostname sec=mechanism uid=0x uid */
+        desc_len = MAX_VER_STR_LEN +
-        desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 +
+                   6 /* len of "host=" */ + strlen(hostname) +
-                  strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2);
+                   5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
+                   MAX_MECH_STR_LEN +
+                   7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
+                   6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
        spnego_key = ERR_PTR(-ENOMEM);
        description = kzalloc(desc_len, GFP_KERNEL);
        if (description == NULL)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1ec7076f7b24..e8da4ee761b5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -930,36 +930,34 @@ static int cifs_oplock_thread(void *dummyarg)
                        schedule_timeout(39*HZ);
                } else {
                        oplock_item = list_entry(GlobalOplock_Q.next,
-                                struct oplock_q_entry, qhead);
+                                                struct oplock_q_entry, qhead);
-                        if (oplock_item) {
+                        cFYI(1, ("found oplock item to write out"));
-                                cFYI(1, ("found oplock item to write out"));
+                        pTcon = oplock_item->tcon;
-                                pTcon = oplock_item->tcon;
+                        inode = oplock_item->pinode;
-                                inode = oplock_item->pinode;
+                        netfid = oplock_item->netfid;
-                                netfid = oplock_item->netfid;
+                        spin_unlock(&GlobalMid_Lock);
-                                spin_unlock(&GlobalMid_Lock);
+                        DeleteOplockQEntry(oplock_item);
-                                DeleteOplockQEntry(oplock_item);
+                        /* can not grab inode sem here since it would
-                                /* can not grab inode sem here since it would
                                deadlock when oplock received on delete
                                since vfs_unlink holds the i_mutex across
                                the call */
-                                /* mutex_lock(&inode->i_mutex);*/
+                        /* mutex_lock(&inode->i_mutex);*/
-                                if (S_ISREG(inode->i_mode)) {
+                        if (S_ISREG(inode->i_mode)) {
-                                        rc =
+                                rc = filemap_fdatawrite(inode->i_mapping);
-                                           filemap_fdatawrite(inode->i_mapping);
+                                if (CIFS_I(inode)->clientCanCacheRead == 0) {
-                                        if (CIFS_I(inode)->clientCanCacheRead
+                                        waitrc = filemap_fdatawait(
-                                                                         == 0) {
+                                                              inode->i_mapping);
-                                                waitrc = filemap_fdatawait(inode->i_mapping);
+                                        invalidate_remote_inode(inode);
-                                                invalidate_remote_inode(inode);
+                                }
-                                        }
+                                if (rc == 0)
-                                        if (rc == 0)
+                                        rc = waitrc;
-                                                rc = waitrc;
+                        } else
-                                } else
+                                rc = 0;
-                                        rc = 0;
+                        /* mutex_unlock(&inode->i_mutex);*/
-                                /* mutex_unlock(&inode->i_mutex);*/
+                        if (rc)
-                                if (rc)
+                                CIFS_I(inode)->write_behind_rc = rc;
-                                        CIFS_I(inode)->write_behind_rc = rc;
+                        cFYI(1, ("Oplock flush inode %p rc %d",
-                                cFYI(1, ("Oplock flush inode %p rc %d",
+                                inode, rc));
-                                        inode, rc));
                                /* releasing stale oplock after recent reconnect
                                of smb session using a now incorrect file
@@ -967,15 +965,13 @@ static int cifs_oplock_thread(void *dummyarg)
                                not bother sending an oplock release if session
                                to server still is disconnected since oplock
                                already released by the server in that case */
-                                if (pTcon->tidStatus != CifsNeedReconnect) {
+                        if (pTcon->tidStatus != CifsNeedReconnect) {
-                                    rc = CIFSSMBLock(0, pTcon, netfid,
+                                rc = CIFSSMBLock(0, pTcon, netfid,
-                                            0 /* len */ , 0 /* offset */, 0,
+                                                0 /* len */ , 0 /* offset */, 0,
-                                            0, LOCKING_ANDX_OPLOCK_RELEASE,
+                                                0, LOCKING_ANDX_OPLOCK_RELEASE,
-                                            false /* wait flag */);
+                                                false /* wait flag */);
-                                        cFYI(1, ("Oplock release rc = %d", rc));
+                                cFYI(1, ("Oplock release rc = %d", rc));
-                                }
+                        }
-                        } else
-                                spin_unlock(&GlobalMid_Lock);
                        set_current_state(TASK_INTERRUPTIBLE);
                        schedule_timeout(1);  /* yield in case q were corrupt */
                }
@@ -1001,8 +997,7 @@ static int cifs_dnotify_thread(void *dummyarg)
                list_for_each(tmp, &GlobalSMBSessionList) {
                        ses = list_entry(tmp, struct cifsSesInfo,
                                cifsSessionList);
-                        if (ses && ses->server &&
+                        if (ses->server && atomic_read(&ses->server->inFlight))
-                             atomic_read(&ses->server->inFlight))
                                wake_up_all(&ses->server->response_q);
                }
                read_unlock(&GlobalSMBSeslock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 25a6cbd15529..135c965c4137 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
-#define CIFS_VERSION   "1.53"
+#define CIFS_VERSION   "1.54"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 409abce12732..d2a073edd1b8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -262,7 +262,7 @@
 */
 #define CIFS_NO_HANDLE        0xFFFF
-#define NO_CHANGE_64          cpu_to_le64(0xFFFFFFFFFFFFFFFFULL)
+#define NO_CHANGE_64          0xFFFFFFFFFFFFFFFFULL
 #define NO_CHANGE_32          0xFFFFFFFFUL
 /* IPC$ in ASCII */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b9f5e935f821..a729d083e6f4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon);
 extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
                        struct kstatfs *FSData);
-extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon,
+extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
                        const char *fileName, const FILE_BASIC_INFO *data,
                        const struct nls_table *nls_codepage,
                        int remap_special_chars);
-extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
+extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
-                        const FILE_BASIC_INFO *data, __u16 fid);
+                        const FILE_BASIC_INFO *data, __u16 fid,
+                        __u32 pid_of_opener);
 #if 0
 extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
                        char *fileName, __u16 dos_attributes,
@@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
 extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
                         __u64 size, __u16 fileHandle, __u32 opener_pid,
                        bool AllocSizeFlag);
-extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon,
-                        char *full_path, __u64 mode, __u64 uid,
+struct cifs_unix_set_info_args {
-                        __u64 gid, dev_t dev,
+        __u64   ctime;
+        __u64   atime;
+        __u64   mtime;
+        __u64   mode;
+        __u64   uid;
+        __u64   gid;
+        dev_t   device;
+};
+extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
+                        char *fileName,
+                        const struct cifs_unix_set_info_args *args,
                        const struct nls_table *nls_codepage,
                        int remap_special_chars);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c621ffa2ca90..994de7c90474 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
        write_lock(&GlobalSMBSeslock);
        list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
-                if (open_file)
+                open_file->invalidHandle = true;
-                        open_file->invalidHandle = true;
        }
        write_unlock(&GlobalSMBSeslock);
        /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -4816,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
   time and resort to the original setpathinfo level which takes the ancient
   DOS time format with 2 second granularity */
 int
-CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
+CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
-                    const FILE_BASIC_INFO *data, __u16 fid)
+                    const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
 {
        struct smb_com_transaction2_sfi_req *pSMB  = NULL;
        char *data_offset;
@@ -4830,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
        if (rc)
                return rc;
-        /* At this point there is no need to override the current pid
+        pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
-        with the pid of the opener, but that could change if we someday
+        pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
-        use an existing handle (rather than opening one on the fly) */
-        /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
-        pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/
        params = 6;
        pSMB->MaxSetupCount = 0;
@@ -4882,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
 int
-CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName,
+CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
-                const FILE_BASIC_INFO *data,
+                   const char *fileName, const FILE_BASIC_INFO *data,
-                const struct nls_table *nls_codepage, int remap)
+                   const struct nls_table *nls_codepage, int remap)
 {
        TRANSACTION2_SPI_REQ *pSMB = NULL;
        TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5013,10 +5009,9 @@ SetAttrLgcyRetry:
 #endif /* temporarily unneeded SetAttr legacy function */
 int
-CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
+CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
-                    char *fileName, __u64 mode, __u64 uid, __u64 gid,
+                   const struct cifs_unix_set_info_args *args,
-                    dev_t device, const struct nls_table *nls_codepage,
+                   const struct nls_table *nls_codepage, int remap)
-                    int remap)
 {
        TRANSACTION2_SPI_REQ *pSMB = NULL;
        TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5025,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
        int bytes_returned = 0;
        FILE_UNIX_BASIC_INFO *data_offset;
        __u16 params, param_offset, offset, count, byte_count;
+        __u64 mode = args->mode;
        cFYI(1, ("In SetUID/GID/Mode"));
 setPermsRetry:
@@ -5080,16 +5076,16 @@ setPermsRetry:
        set file size and do not want to truncate file size to zero
        accidently as happened on one Samba server beta by putting
        zero instead of -1 here */
-        data_offset->EndOfFile = NO_CHANGE_64;
+        data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
-        data_offset->NumOfBytes = NO_CHANGE_64;
+        data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
-        data_offset->LastStatusChange = NO_CHANGE_64;
+        data_offset->LastStatusChange = cpu_to_le64(args->ctime);
-        data_offset->LastAccessTime = NO_CHANGE_64;
+        data_offset->LastAccessTime = cpu_to_le64(args->atime);
-        data_offset->LastModificationTime = NO_CHANGE_64;
+        data_offset->LastModificationTime = cpu_to_le64(args->mtime);
-        data_offset->Uid = cpu_to_le64(uid);
+        data_offset->Uid = cpu_to_le64(args->uid);
-        data_offset->Gid = cpu_to_le64(gid);
+        data_offset->Gid = cpu_to_le64(args->gid);
        /* better to leave device as zero when it is  */
-        data_offset->DevMajor = cpu_to_le64(MAJOR(device));
+        data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
-        data_offset->DevMinor = cpu_to_le64(MINOR(device));
+        data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
        data_offset->Permissions = cpu_to_le64(mode);
        if (S_ISREG(mode))
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b51d5777cde6..0711db65afe8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
        }
        list_for_each(tmp, &GlobalTreeConnectionList) {
                tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-                if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
+                if ((tcon->ses) && (tcon->ses->server == server))
                        tcon->tidStatus = CifsNeedReconnect;
        }
        read_unlock(&GlobalSMBSeslock);
@@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
                mid_entry = list_entry(tmp, struct
                                        mid_q_entry,
                                        qhead);
-                if (mid_entry) {
+                if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
-                        if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
                                /* Mark other intransit requests as needing
                                   retry so we do not immediately mark the
                                   session bad again (ie after we reconnect
                                   below) as they timeout too */
-                                mid_entry->midState = MID_RETRY_NEEDED;
+                        mid_entry->midState = MID_RETRY_NEEDED;
-                        }
                }
        }
        spin_unlock(&GlobalMid_Lock);
@@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
        current->flags |= PF_MEMALLOC;
        cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
-        write_lock(&GlobalSMBSeslock);
-        atomic_inc(&tcpSesAllocCount);
+        length = atomic_inc_return(&tcpSesAllocCount);
-        length = tcpSesAllocCount.counter;
+        if (length > 1)
-        write_unlock(&GlobalSMBSeslock);
-        if (length  > 1)
                mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
                                GFP_KERNEL);
@@ -745,14 +741,11 @@ multi_t2_fnd:
                coming home not much else we can do but free the memory */
        }
-        write_lock(&GlobalSMBSeslock);
-        atomic_dec(&tcpSesAllocCount);
-        length = tcpSesAllocCount.counter;
        /* last chance to mark ses pointers invalid
        if there are any pointing to this (e.g
        if a crazy root user tried to kill cifsd
        kernel thread explicitly this might happen) */
+        write_lock(&GlobalSMBSeslock);
        list_for_each(tmp, &GlobalSMBSessionList) {
                ses = list_entry(tmp, struct cifsSesInfo,
                                cifsSessionList);
@@ -763,6 +756,8 @@ multi_t2_fnd:
        kfree(server->hostname);
        kfree(server);
+        length = atomic_dec_return(&tcpSesAllocCount);
        if (length  > 0)
                mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
                                GFP_KERNEL);
@@ -3623,97 +3618,91 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
                }
                first_time = 1;
        }
-        if (!rc) {
-                pSesInfo->flags = 0;
+        if (rc)
-                pSesInfo->capabilities = pSesInfo->server->capabilities;
+                goto ss_err_exit;
-                if (linuxExtEnabled == 0)
-                        pSesInfo->capabilities &= (~CAP_UNIX);
+        pSesInfo->flags = 0;
+        pSesInfo->capabilities = pSesInfo->server->capabilities;
+        if (linuxExtEnabled == 0)
+                pSesInfo->capabilities &= (~CAP_UNIX);
        /*      pSesInfo->sequence_number = 0;*/
-                cFYI(1,
+        cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
-                      ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
+                 pSesInfo->server->secMode,
-                        pSesInfo->server->secMode,
+                 pSesInfo->server->capabilities,
-                        pSesInfo->server->capabilities,
+                 pSesInfo->server->timeAdj));
-                        pSesInfo->server->timeAdj));
+        if (experimEnabled < 2)
-                if (experimEnabled < 2)
+                rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
-                        rc = CIFS_SessSetup(xid, pSesInfo,
+        else if (extended_security
-                                            first_time, nls_info);
+                        && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-                else if (extended_security
+                        && (pSesInfo->server->secType == NTLMSSP)) {
-                                && (pSesInfo->capabilities
+                rc = -EOPNOTSUPP;
-                                        & CAP_EXTENDED_SECURITY)
+        } else if (extended_security
-                                && (pSesInfo->server->secType == NTLMSSP)) {
+                        && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-                        rc = -EOPNOTSUPP;
+                        && (pSesInfo->server->secType == RawNTLMSSP)) {
-                } else if (extended_security
+                cFYI(1, ("NTLMSSP sesssetup"));
-                           && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
+                rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
-                           && (pSesInfo->server->secType == RawNTLMSSP)) {
+                                                   nls_info);
-                        cFYI(1, ("NTLMSSP sesssetup"));
+                if (!rc) {
-                        rc = CIFSNTLMSSPNegotiateSessSetup(xid,
+                        if (ntlmv2_flag) {
-                                                pSesInfo,
+                                char *v2_response;
-                                                &ntlmv2_flag,
+                                cFYI(1, ("more secure NTLM ver2 hash"));
-                                                nls_info);
+                                if (CalcNTLMv2_partial_mac_key(pSesInfo,
-                        if (!rc) {
+                                                                nls_info)) {
-                                if (ntlmv2_flag) {
+                                        rc = -ENOMEM;
-                                        char *v2_response;
+                                        goto ss_err_exit;
-                                        cFYI(1, ("more secure NTLM ver2 hash"));
+                                } else
-                                        if (CalcNTLMv2_partial_mac_key(pSesInfo,
+                                        v2_response = kmalloc(16 + 64 /* blob*/,
-                                                nls_info)) {
+                                                                GFP_KERNEL);
-                                                rc = -ENOMEM;
+                                if (v2_response) {
-                                                goto ss_err_exit;
+                                        CalcNTLMv2_response(pSesInfo,
-                                        } else
+                                                                v2_response);
-                                                v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL);
+                                /*      if (first_time)
-                                        if (v2_response) {
+                                                cifs_calculate_ntlmv2_mac_key */
-                                                CalcNTLMv2_response(pSesInfo,
+                                        kfree(v2_response);
-                                                                   v2_response);
-                                /*              if (first_time)
-                                                  cifs_calculate_ntlmv2_mac_key(
-                                                   pSesInfo->server->mac_signing_key,
-                                                   response, ntlm_session_key,*/
-                                                kfree(v2_response);
                                        /* BB Put dummy sig in SessSetup PDU? */
-                                        } else {
-                                                rc = -ENOMEM;
-                                                goto ss_err_exit;
-                                        }
                                } else {
-                                        SMBNTencrypt(pSesInfo->password,
+                                        rc = -ENOMEM;
-                                                pSesInfo->server->cryptKey,
+                                        goto ss_err_exit;
-                                                ntlm_session_key);
-                                        if (first_time)
-                                                cifs_calculate_mac_key(
-                                                        &pSesInfo->server->mac_signing_key,
-                                                        ntlm_session_key,
-                                                        pSesInfo->password);
                                }
+                        } else {
+                                SMBNTencrypt(pSesInfo->password,
+                                             pSesInfo->server->cryptKey,
+                                             ntlm_session_key);
+                                if (first_time)
+                                        cifs_calculate_mac_key(
+                                             &pSesInfo->server->mac_signing_key,
+                                             ntlm_session_key,
+                                             pSesInfo->password);
+                        }
                        /* for better security the weaker lanman hash not sent
                           in AuthSessSetup so we no longer calculate it */
-                                rc = CIFSNTLMSSPAuthSessSetup(xid,
+                        rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
-                                        pSesInfo,
+                                                      ntlm_session_key,
-                                        ntlm_session_key,
+                                                      ntlmv2_flag,
-                                        ntlmv2_flag,
+                                                      nls_info);
-                                        nls_info);
+                }
-                        }
+        } else { /* old style NTLM 0.12 session setup */
-                } else { /* old style NTLM 0.12 session setup */
+                SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey,
-                        SMBNTencrypt(pSesInfo->password,
+                             ntlm_session_key);
-                                pSesInfo->server->cryptKey,
-                                ntlm_session_key);
-                        if (first_time)
+                if (first_time)
-                                cifs_calculate_mac_key(
+                        cifs_calculate_mac_key(
                                        &pSesInfo->server->mac_signing_key,
                                        ntlm_session_key, pSesInfo->password);
-                        rc = CIFSSessSetup(xid, pSesInfo,
+                rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
-                                ntlm_session_key, nls_info);
+        }
-                }
+        if (rc) {
-                if (rc) {
+                cERROR(1, ("Send error in SessSetup = %d", rc));
-                        cERROR(1, ("Send error in SessSetup = %d", rc));
+        } else {
-                } else {
+                cFYI(1, ("CIFS Session Established successfully"));
-                        cFYI(1, ("CIFS Session Established successfully"));
                        pSesInfo->status = CifsGood;
-                }
        }
 ss_err_exit:
        return rc;
 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fb69c1fa85c9..e962e75e6f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                /* If Open reported that we actually created a file
                then we now have to set the mode if possible */
                if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
+                        struct cifs_unix_set_info_args args = {
+                                .mode   = mode,
+                                .ctime  = NO_CHANGE_64,
+                                .atime  = NO_CHANGE_64,
+                                .mtime  = NO_CHANGE_64,
+                                .device = 0,
+                        };
                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-                                CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
+                                args.uid = (__u64) current->fsuid;
-                                        (__u64)current->fsuid,
+                                if (inode->i_mode & S_ISGID)
-                                        (__u64)current->fsgid,
+                                        args.gid = (__u64) inode->i_gid;
-                                        0 /* dev */,
+                                else
-                                        cifs_sb->local_nls,
+                                        args.gid = (__u64) current->fsgid;
-                                        cifs_sb->mnt_cifs_flags &
-                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                        } else {
-                                CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
+                                args.uid = NO_CHANGE_64;
-                                        (__u64)-1,
+                                args.gid = NO_CHANGE_64;
-                                        (__u64)-1,
-                                        0 /* dev */,
-                                        cifs_sb->local_nls,
-                                        cifs_sb->mnt_cifs_flags &
-                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                        }
+                        CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+                                cifs_sb->local_nls,
+                                cifs_sb->mnt_cifs_flags &
+                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
                } else {
                        /* BB implement mode setting via Windows security
                           descriptors e.g. */
@@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                                    (cifs_sb->mnt_cifs_flags &
                                     CIFS_MOUNT_SET_UID)) {
                                        newinode->i_uid = current->fsuid;
-                                        newinode->i_gid = current->fsgid;
+                                        if (inode->i_mode & S_ISGID)
+                                                newinode->i_gid =
+                                                        inode->i_gid;
+                                        else
+                                                newinode->i_gid =
+                                                        current->fsgid;
                                }
                        }
                }
@@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
        if (full_path == NULL)
                rc = -ENOMEM;
        else if (pTcon->unix_ext) {
-                mode &= ~current->fs->umask;
+                struct cifs_unix_set_info_args args = {
+                        .mode   = mode & ~current->fs->umask,
+                        .ctime  = NO_CHANGE_64,
+                        .atime  = NO_CHANGE_64,
+                        .mtime  = NO_CHANGE_64,
+                        .device = device_number,
+                };
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-                        rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path,
+                        args.uid = (__u64) current->fsuid;
-                                mode, (__u64)current->fsuid,
+                        args.gid = (__u64) current->fsgid;
-                                (__u64)current->fsgid,
-                                device_number, cifs_sb->local_nls,
-                                cifs_sb->mnt_cifs_flags &
-                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
                } else {
-                        rc = CIFSSMBUnixSetPerms(xid, pTcon,
+                        args.uid = NO_CHANGE_64;
-                                full_path, mode, (__u64)-1, (__u64)-1,
+                        args.gid = NO_CHANGE_64;
-                                device_number, cifs_sb->local_nls,
-                                cifs_sb->mnt_cifs_flags &
-                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
                }
+                rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
+                        &args, cifs_sb->local_nls,
+                        cifs_sb->mnt_cifs_flags &
+                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                if (!rc) {
                        rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a5..ff14d14903a0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file)
                /* time to set mode which we can not set earlier due to
                   problems creating new read-only files */
                if (pTcon->unix_ext) {
-                        CIFSSMBUnixSetPerms(xid, pTcon, full_path,
+                        struct cifs_unix_set_info_args args = {
-                                            inode->i_mode,
+                                .mode   = inode->i_mode,
-                                            (__u64)-1, (__u64)-1, 0 /* dev */,
+                                .uid    = NO_CHANGE_64,
+                                .gid    = NO_CHANGE_64,
+                                .ctime  = NO_CHANGE_64,
+                                .atime  = NO_CHANGE_64,
+                                .mtime  = NO_CHANGE_64,
+                                .device = 0,
+                        };
+                        CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
                                            cifs_sb->local_nls,
                                            cifs_sb->mnt_cifs_flags &
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-                } else {
-                        /* BB implement via Windows security descriptors eg
-                           CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
-                                              -1, -1, local_nls);
-                           in the meantime could set r/o dos attribute when
-                           perms are eg: mode & 0222 == 0 */
                }
        }
@@ -1280,7 +1281,7 @@ retry:
                        if (first < 0)
                                lock_page(page);
-                        else if (TestSetPageLocked(page))
+                        else if (!trylock_page(page))
                                break;
                        if (unlikely(page->mapping != mapping)) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 46e54d39461d..28a22092d450 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -737,7 +737,7 @@ psx_del_no_retry:
                        /* ATTRS set to normal clears r/o bit */
                        pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
                        if (!(pTcon->ses->flags & CIFS_SES_NT4))
-                                rc = CIFSSMBSetTimes(xid, pTcon, full_path,
+                                rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
                                                     pinfo_buf,
                                                     cifs_sb->local_nls,
                                                     cifs_sb->mnt_cifs_flags &
@@ -767,9 +767,10 @@ psx_del_no_retry:
                                                 cifs_sb->mnt_cifs_flags &
                                                    CIFS_MOUNT_MAP_SPECIAL_CHR);
                                if (rc == 0) {
-                                        rc = CIFSSMBSetFileTimes(xid, pTcon,
+                                        rc = CIFSSMBSetFileInfo(xid, pTcon,
-                                                                 pinfo_buf,
+                                                                pinfo_buf,
-                                                                 netfid);
+                                                                netfid,
+                                                                current->tgid);
                                        CIFSSMBClose(xid, pTcon, netfid);
                                }
                        }
@@ -984,32 +985,41 @@ mkdir_get_info:
                  * failed to get it from the server or was set bogus */
                if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
                                direntry->d_inode->i_nlink = 2;
                mode &= ~current->fs->umask;
+                /* must turn on setgid bit if parent dir has it */
+                if (inode->i_mode & S_ISGID)
+                        mode |= S_ISGID;
                if (pTcon->unix_ext) {
+                        struct cifs_unix_set_info_args args = {
+                                .mode   = mode,
+                                .ctime  = NO_CHANGE_64,
+                                .atime  = NO_CHANGE_64,
+                                .mtime  = NO_CHANGE_64,
+                                .device = 0,
+                        };
                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-                                CIFSSMBUnixSetPerms(xid, pTcon, full_path,
+                                args.uid = (__u64)current->fsuid;
-                                                    mode,
+                                if (inode->i_mode & S_ISGID)
-                                                    (__u64)current->fsuid,
+                                        args.gid = (__u64)inode->i_gid;
-                                                    (__u64)current->fsgid,
+                                else
-                                                    0 /* dev_t */,
+                                        args.gid = (__u64)current->fsgid;
-                                                    cifs_sb->local_nls,
-                                                    cifs_sb->mnt_cifs_flags &
-                                                    CIFS_MOUNT_MAP_SPECIAL_CHR);
                        } else {
-                                CIFSSMBUnixSetPerms(xid, pTcon, full_path,
+                                args.uid = NO_CHANGE_64;
-                                                    mode, (__u64)-1,
+                                args.gid = NO_CHANGE_64;
-                                                    (__u64)-1, 0 /* dev_t */,
-                                                    cifs_sb->local_nls,
-                                                    cifs_sb->mnt_cifs_flags &
-                                                    CIFS_MOUNT_MAP_SPECIAL_CHR);
                        }
+                        CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+                                            cifs_sb->local_nls,
+                                            cifs_sb->mnt_cifs_flags &
+                                            CIFS_MOUNT_MAP_SPECIAL_CHR);
                } else {
                        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
                            (mode & S_IWUGO) == 0) {
                                FILE_BASIC_INFO pInfo;
                                memset(&pInfo, 0, sizeof(pInfo));
                                pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
-                                CIFSSMBSetTimes(xid, pTcon, full_path,
+                                CIFSSMBSetPathInfo(xid, pTcon, full_path,
                                                &pInfo, cifs_sb->local_nls,
                                                cifs_sb->mnt_cifs_flags &
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1024,8 +1034,12 @@ mkdir_get_info:
                                     CIFS_MOUNT_SET_UID) {
                                        direntry->d_inode->i_uid =
                                                current->fsuid;
-                                        direntry->d_inode->i_gid =
+                                        if (inode->i_mode & S_ISGID)
-                                                current->fsgid;
+                                                direntry->d_inode->i_gid =
+                                                        inode->i_gid;
+                                        else
+                                                direntry->d_inode->i_gid =
+                                                        current->fsgid;
                                }
                        }
                }
@@ -1310,10 +1324,11 @@ int cifs_revalidate(struct dentry *direntry)
 /*              if (S_ISDIR(direntry->d_inode->i_mode))
                        shrink_dcache_parent(direntry); */
                if (S_ISREG(direntry->d_inode->i_mode)) {
-                        if (direntry->d_inode->i_mapping)
+                        if (direntry->d_inode->i_mapping) {
                                wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
                                if (wbrc)
                                        CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
+                        }
                        /* may eventually have to do this for open files too */
                        if (list_empty(&(cifsInode->openFileList))) {
                                /* changed on server - flush read ahead pages */
@@ -1489,30 +1504,228 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
        return rc;
 }
-int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+static int
+cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
+                    char *full_path, __u32 dosattr)
+{
+        int rc;
+        int oplock = 0;
+        __u16 netfid;
+        __u32 netpid;
+        bool set_time = false;
+        struct cifsFileInfo *open_file;
+        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+        struct cifsTconInfo *pTcon = cifs_sb->tcon;
+        FILE_BASIC_INFO info_buf;
+        if (attrs->ia_valid & ATTR_ATIME) {
+                set_time = true;
+                info_buf.LastAccessTime =
+                        cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
+        } else
+                info_buf.LastAccessTime = 0;
+        if (attrs->ia_valid & ATTR_MTIME) {
+                set_time = true;
+                info_buf.LastWriteTime =
+                    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
+        } else
+                info_buf.LastWriteTime = 0;
+        /*
+         * Samba throws this field away, but windows may actually use it.
+         * Do not set ctime unless other time stamps are changed explicitly
+         * (i.e. by utimes()) since we would then have a mix of client and
+         * server times.
+         */
+        if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
+                cFYI(1, ("CIFS - CTIME changed"));
+                info_buf.ChangeTime =
+                    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
+        } else
+                info_buf.ChangeTime = 0;
+        info_buf.CreationTime = 0;      /* don't change */
+        info_buf.Attributes = cpu_to_le32(dosattr);
+        /*
+         * If the file is already open for write, just use that fileid
+         */
+        open_file = find_writable_file(cifsInode);
+        if (open_file) {
+                netfid = open_file->netfid;
+                netpid = open_file->pid;
+                goto set_via_filehandle;
+        }
+        /*
+         * NT4 apparently returns success on this call, but it doesn't
+         * really work.
+         */
+        if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
+                rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
+                                     &info_buf, cifs_sb->local_nls,
+                                     cifs_sb->mnt_cifs_flags &
+                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
+                if (rc != -EOPNOTSUPP && rc != -EINVAL)
+                        goto out;
+        }
+        cFYI(1, ("calling SetFileInfo since SetPathInfo for "
+                 "times not supported by this server"));
+        rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
+                         SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
+                         CREATE_NOT_DIR, &netfid, &oplock,
+                         NULL, cifs_sb->local_nls,
+                         cifs_sb->mnt_cifs_flags &
+                                CIFS_MOUNT_MAP_SPECIAL_CHR);
+        if (rc != 0) {
+                if (rc == -EIO)
+                        rc = -EINVAL;
+                goto out;
+        }
+        netpid = current->tgid;
+set_via_filehandle:
+        rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
+        if (open_file == NULL)
+                CIFSSMBClose(xid, pTcon, netfid);
+        else
+                atomic_dec(&open_file->wrtPending);
+out:
+        return rc;
+}
+static int
+cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 {
+        int rc;
        int xid;
-        struct cifs_sb_info *cifs_sb;
-        struct cifsTconInfo *pTcon;
        char *full_path = NULL;
-        int rc = -EACCES;
-        FILE_BASIC_INFO time_buf;
-        bool set_time = false;
-        bool set_dosattr = false;
-        __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
-        __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
-        __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
-        struct cifsInodeInfo *cifsInode;
        struct inode *inode = direntry->d_inode;
+        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+        struct cifsTconInfo *pTcon = cifs_sb->tcon;
+        struct cifs_unix_set_info_args *args = NULL;
+        cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
+                 direntry->d_name.name, attrs->ia_valid));
+        xid = GetXid();
+        if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
+                /* check if we have permission to change attrs */
+                rc = inode_change_ok(inode, attrs);
+                if (rc < 0)
+                        goto out;
+                else
+                        rc = 0;
+        }
+        full_path = build_path_from_dentry(direntry);
+        if (full_path == NULL) {
+                rc = -ENOMEM;
+                goto out;
+        }
+        if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
+                /*
+                   Flush data before changing file size or changing the last
+                   write time of the file on the server. If the
+                   flush returns error, store it to report later and continue.
+                   BB: This should be smarter. Why bother flushing pages that
+                   will be truncated anyway? Also, should we error out here if
+                   the flush returns error?
+                 */
+                rc = filemap_write_and_wait(inode->i_mapping);
+                if (rc != 0) {
+                        cifsInode->write_behind_rc = rc;
+                        rc = 0;
+                }
+        }
+        if (attrs->ia_valid & ATTR_SIZE) {
+                rc = cifs_set_file_size(inode, attrs, xid, full_path);
+                if (rc != 0)
+                        goto out;
+        }
+        /* skip mode change if it's just for clearing setuid/setgid */
+        if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+                attrs->ia_valid &= ~ATTR_MODE;
+        args = kmalloc(sizeof(*args), GFP_KERNEL);
+        if (args == NULL) {
+                rc = -ENOMEM;
+                goto out;
+        }
+        /* set up the struct */
+        if (attrs->ia_valid & ATTR_MODE)
+                args->mode = attrs->ia_mode;
+        else
+                args->mode = NO_CHANGE_64;
+        if (attrs->ia_valid & ATTR_UID)
+                args->uid = attrs->ia_uid;
+        else
+                args->uid = NO_CHANGE_64;
+        if (attrs->ia_valid & ATTR_GID)
+                args->gid = attrs->ia_gid;
+        else
+                args->gid = NO_CHANGE_64;
+        if (attrs->ia_valid & ATTR_ATIME)
+                args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
+        else
+                args->atime = NO_CHANGE_64;
+        if (attrs->ia_valid & ATTR_MTIME)
+                args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
+        else
+                args->mtime = NO_CHANGE_64;
+        if (attrs->ia_valid & ATTR_CTIME)
+                args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
+        else
+                args->ctime = NO_CHANGE_64;
+        args->device = 0;
+        rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
+                                cifs_sb->local_nls,
+                                cifs_sb->mnt_cifs_flags &
+                                CIFS_MOUNT_MAP_SPECIAL_CHR);
+        if (!rc)
+                rc = inode_setattr(inode, attrs);
+out:
+        kfree(args);
+        kfree(full_path);
+        FreeXid(xid);
+        return rc;
+}
+static int
+cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
+{
+        int xid;
+        struct inode *inode = direntry->d_inode;
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+        char *full_path = NULL;
+        int rc = -EACCES;
+        __u32 dosattr = 0;
+        __u64 mode = NO_CHANGE_64;
        xid = GetXid();
        cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
                 direntry->d_name.name, attrs->ia_valid));
-        cifs_sb = CIFS_SB(inode->i_sb);
-        pTcon = cifs_sb->tcon;
        if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
                /* check if we have permission to change attrs */
                rc = inode_change_ok(inode, attrs);
@@ -1528,7 +1741,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                FreeXid(xid);
                return -ENOMEM;
        }
-        cifsInode = CIFS_I(inode);
        if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
                /*
@@ -1559,21 +1771,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
         * CIFSACL support + proper Windows to Unix idmapping, we may be
         * able to support this in the future.
         */
-        if (!pTcon->unix_ext &&
+        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
-            !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
                attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
-        } else {
-                if (attrs->ia_valid & ATTR_UID) {
-                        cFYI(1, ("UID changed to %d", attrs->ia_uid));
-                        uid = attrs->ia_uid;
-                }
-                if (attrs->ia_valid & ATTR_GID) {
-                        cFYI(1, ("GID changed to %d", attrs->ia_gid));
-                        gid = attrs->ia_gid;
-                }
-        }
-        time_buf.Attributes = 0;
        /* skip mode change if it's just for clearing setuid/setgid */
        if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -1584,13 +1783,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                mode = attrs->ia_mode;
        }
-        if ((pTcon->unix_ext)
+        if (attrs->ia_valid & ATTR_MODE) {
-            && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID)))
-                rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid,
-                                         0 /* dev_t */, cifs_sb->local_nls,
-                                         cifs_sb->mnt_cifs_flags &
-                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-        else if (attrs->ia_valid & ATTR_MODE) {
                rc = 0;
 #ifdef CONFIG_CIFS_EXPERIMENTAL
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
@@ -1599,24 +1792,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 #endif
                if (((mode & S_IWUGO) == 0) &&
                    (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
-                        set_dosattr = true;
-                        time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs |
+                        dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
-                                                          ATTR_READONLY);
                        /* fix up mode if we're not using dynperm */
                        if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
                                attrs->ia_mode = inode->i_mode & ~S_IWUGO;
                } else if ((mode & S_IWUGO) &&
                           (cifsInode->cifsAttrs & ATTR_READONLY)) {
-                        /* If file is readonly on server, we would
-                        not be able to write to it - so if any write
+                        dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
-                        bit is enabled for user or group or other we
+                        /* Attributes of 0 are ignored */
-                        need to at least try to remove r/o dos attr */
+                        if (dosattr == 0)
-                        set_dosattr = true;
+                                dosattr |= ATTR_NORMAL;
-                        time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
-                                            (~ATTR_READONLY));
-                        /* Windows ignores set to zero */
-                        if (time_buf.Attributes == 0)
-                                time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
                        /* reset local inode permissions to normal */
                        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
@@ -1634,82 +1822,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                }
        }
-        if (attrs->ia_valid & ATTR_ATIME) {
+        if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) ||
-                set_time = true;
+            ((attrs->ia_valid & ATTR_MODE) && dosattr)) {
-                time_buf.LastAccessTime =
+                rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
-                    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
+                /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */
-        } else
-                time_buf.LastAccessTime = 0;
-        if (attrs->ia_valid & ATTR_MTIME) {
-                set_time = true;
-                time_buf.LastWriteTime =
-                    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
-        } else
-                time_buf.LastWriteTime = 0;
-        /* Do not set ctime explicitly unless other time
-           stamps are changed explicitly (i.e. by utime()
-           since we would then have a mix of client and
-           server times */
-        if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
-                set_time = true;
-                /* Although Samba throws this field away
-                it may be useful to Windows - but we do
-                not want to set ctime unless some other
-                timestamp is changing */
-                cFYI(1, ("CIFS - CTIME changed"));
-                time_buf.ChangeTime =
-                    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
-        } else
-                time_buf.ChangeTime = 0;
-        if (set_time || set_dosattr) {
-                time_buf.CreationTime = 0;      /* do not change */
-                /* In the future we should experiment - try setting timestamps
-                   via Handle (SetFileInfo) instead of by path */
-                if (!(pTcon->ses->flags & CIFS_SES_NT4))
-                        rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf,
-                                             cifs_sb->local_nls,
-                                             cifs_sb->mnt_cifs_flags &
-                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-                else
-                        rc = -EOPNOTSUPP;
-                if (rc == -EOPNOTSUPP) {
-                        int oplock = 0;
-                        __u16 netfid;
-                        cFYI(1, ("calling SetFileInfo since SetPathInfo for "
-                                 "times not supported by this server"));
-                        /* BB we could scan to see if we already have it open
-                           and pass in pid of opener to function */
-                        rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
-                                         SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
-                                         CREATE_NOT_DIR, &netfid, &oplock,
-                                         NULL, cifs_sb->local_nls,
-                                         cifs_sb->mnt_cifs_flags &
-                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-                        if (rc == 0) {
-                                rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf,
-                                                         netfid);
-                                CIFSSMBClose(xid, pTcon, netfid);
-                        } else {
-                        /* BB For even older servers we could convert time_buf
-                           into old DOS style which uses two second
-                           granularity */
-                        /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path,
-                                        &time_buf, cifs_sb->local_nls); */
-                        }
-                }
                /* Even if error on time set, no sense failing the call if
                the server would set the time to a reasonable value anyway,
                and this check ensures that we are not being called from
                sys_utimes in which case we ought to fail the call back to
                the user when the server rejects the call */
                if ((rc) && (attrs->ia_valid &
-                         (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
+                                (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
                        rc = 0;
        }
@@ -1723,6 +1847,21 @@ cifs_setattr_exit:
        return rc;
 }
+int
+cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+        struct inode *inode = direntry->d_inode;
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+        struct cifsTconInfo *pTcon = cifs_sb->tcon;
+        if (pTcon->unix_ext)
+                return cifs_setattr_unix(direntry, attrs);
+        return cifs_setattr_nounix(direntry, attrs);
+        /* BB: add cifs_setattr_legacy for really old servers */
+}
 #if 0
 void cifs_delete_inode(struct inode *inode)
 {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 000ac509c98a..e286db9f5ee2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
        cFYI(1, ("Sending smb:  total_len %d", total_len));
        dump_smb(smb_buffer, len);
+        i = 0;
        while (total_len) {
                rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
                                    n_vec - first_vec, total_len);
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3ea..762d287123ca 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
 #define CONFIGFS_USET_DEFAULT   0x0080
 #define CONFIGFS_USET_DROPPING  0x0100
 #define CONFIGFS_USET_IN_MKDIR  0x0200
+#define CONFIGFS_USET_CREATING  0x0400
 #define CONFIGFS_NOT_PINNED     (CONFIGFS_ITEM_ATTR)
+extern struct mutex configfs_symlink_mutex;
 extern spinlock_t configfs_dirent_lock;
 extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
 extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
 extern int configfs_make_dirent(struct configfs_dirent *,
                                struct dentry *, void *, umode_t, int);
+extern int configfs_dirent_is_ready(struct configfs_dirent *);
 extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
 extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..7a8db78a91d2 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
        error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
        if (!error)
                error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-                                             CONFIGFS_DIR);
+                                             CONFIGFS_DIR | CONFIGFS_USET_CREATING);
        if (!error) {
                error = configfs_create(d, mode, init_dir);
                if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
 *      configfs_create_dir - create a directory for an config_item.
 *      @item:          config_itemwe're creating directory for.
 *      @dentry:        config_item's dentry.
+ *
+ *      Note: user-created entries won't be allowed under this new directory
+ *      until it is validated by configfs_dir_set_ready()
 */
 static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
        return error;
 }
+/*
+ * Allow userspace to create new entries under a new directory created with
+ * configfs_create_dir(), and under all of its chidlren directories recursively.
+ * @sd          configfs_dirent of the new directory to validate
+ *
+ * Caller must hold configfs_dirent_lock.
+ */
+static void configfs_dir_set_ready(struct configfs_dirent *sd)
+{
+        struct configfs_dirent *child_sd;
+        sd->s_type &= ~CONFIGFS_USET_CREATING;
+        list_for_each_entry(child_sd, &sd->s_children, s_sibling)
+                if (child_sd->s_type & CONFIGFS_USET_CREATING)
+                        configfs_dir_set_ready(child_sd);
+}
+/*
+ * Check that a directory does not belong to a directory hierarchy being
+ * attached and not validated yet.
+ * @sd          configfs_dirent of the directory to check
+ *
+ * @return      non-zero iff the directory was validated
+ *
+ * Note: takes configfs_dirent_lock, so the result may change from false to true
+ * in two consecutive calls, but never from true to false.
+ */
+int configfs_dirent_is_ready(struct configfs_dirent *sd)
+{
+        int ret;
+        spin_lock(&configfs_dirent_lock);
+        ret = !(sd->s_type & CONFIGFS_USET_CREATING);
+        spin_unlock(&configfs_dirent_lock);
+        return ret;
+}
 int configfs_create_link(struct configfs_symlink *sl,
                         struct dentry *parent,
                         struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
 * The only thing special about this is that we remove any files in
 * the directory before we remove the directory, and we've inlined
 * what used to be configfs_rmdir() below, instead of calling separately.
+ *
+ * Caller holds the mutex of the item's inode
 */
 static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
        struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
        struct configfs_dirent * sd;
        int found = 0;
-        int err = 0;
+        int err;
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         *
+         * This forbids userspace to read/write attributes of items which may
+         * not complete their initialization, since the dentries of the
+         * attributes won't be instantiated.
+         */
+        err = -ENOENT;
+        if (!configfs_dirent_is_ready(parent_sd))
+                goto out;
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
                return simple_lookup(dir, dentry, nd);
        }
+out:
        return ERR_PTR(err);
 }
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
        struct configfs_dirent *sd;
        int ret;
+        /* Mark that we're trying to drop the group */
+        parent_sd->s_type |= CONFIGFS_USET_DROPPING;
        ret = -EBUSY;
        if (!list_empty(&parent_sd->s_links))
                goto out;
        ret = 0;
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-                if (sd->s_type & CONFIGFS_NOT_PINNED)
+                if (!sd->s_element ||
+                    (sd->s_type & CONFIGFS_NOT_PINNED))
                        continue;
                if (sd->s_type & CONFIGFS_USET_DEFAULT) {
                        /* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
                                        *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
                                return -EAGAIN;
                        }
-                        /* Mark that we're trying to drop the group */
-                        sd->s_type |= CONFIGFS_USET_DROPPING;
                        /*
                         * Yup, recursive.  If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
        struct configfs_dirent *parent_sd = dentry->d_fsdata;
        struct configfs_dirent *sd;
-        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+        parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
-                if (sd->s_type & CONFIGFS_USET_DEFAULT) {
+        list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
+                if (sd->s_type & CONFIGFS_USET_DEFAULT)
                        configfs_detach_rollback(sd->s_dentry);
-                        sd->s_type &= ~CONFIGFS_USET_DROPPING;
-                }
-        }
 }
 static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
 static int populate_groups(struct config_group *group)
 {
        struct config_group *new_group;
-        struct dentry *dentry = group->cg_item.ci_dentry;
        int ret = 0;
        int i;
        if (group->default_groups) {
-                /*
-                 * FYI, we're faking mkdir here
-                 * I'm not sure we need this semaphore, as we're called
-                 * from our parent's mkdir.  That holds our parent's
-                 * i_mutex, so afaik lookup cannot continue through our
-                 * parent to find us, let alone mess with our tree.
-                 * That said, taking our i_mutex is closer to mkdir
-                 * emulation, and shouldn't hurt.
-                 */
-                mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
                for (i = 0; group->default_groups[i]; i++) {
                        new_group = group->default_groups[i];
                        ret = create_default_group(group, new_group);
-                        if (ret)
+                        if (ret) {
+                                detach_groups(group);
                                break;
+                        }
                }
-                mutex_unlock(&dentry->d_inode->i_mutex);
        }
-        if (ret)
-                detach_groups(group);
        return ret;
 }
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
        if (!ret) {
                ret = populate_attrs(item);
                if (ret) {
+                        /*
+                         * We are going to remove an inode and its dentry but
+                         * the VFS may already have hit and used them. Thus,
+                         * we must lock them as rmdir() would.
+                         */
+                        mutex_lock(&dentry->d_inode->i_mutex);
                        configfs_remove_dir(item);
+                        dentry->d_inode->i_flags |= S_DEAD;
+                        mutex_unlock(&dentry->d_inode->i_mutex);
                        d_delete(dentry);
                }
        }
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
        return ret;
 }
+/* Caller holds the mutex of the item's inode */
 static void configfs_detach_item(struct config_item *item)
 {
        detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
                sd = dentry->d_fsdata;
                sd->s_type |= CONFIGFS_USET_DIR;
+                /*
+                 * FYI, we're faking mkdir in populate_groups()
+                 * We must lock the group's inode to avoid races with the VFS
+                 * which can already hit the inode and try to add/remove entries
+                 * under it.
+                 *
+                 * We must also lock the inode to remove it safely in case of
+                 * error, as rmdir() would.
+                 */
+                mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
                ret = populate_groups(to_config_group(item));
                if (ret) {
                        configfs_detach_item(item);
-                        d_delete(dentry);
+                        dentry->d_inode->i_flags |= S_DEAD;
                }
+                mutex_unlock(&dentry->d_inode->i_mutex);
+                if (ret)
+                        d_delete(dentry);
        }
        return ret;
 }
+/* Caller holds the mutex of the group's inode */
 static void configfs_detach_group(struct config_item *item)
 {
        detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        struct configfs_subsystem *subsys;
        struct configfs_dirent *sd;
        struct config_item_type *type;
-        struct module *owner = NULL;
+        struct module *subsys_owner = NULL, *new_item_owner = NULL;
        char *name;
        if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        }
        sd = dentry->d_parent->d_fsdata;
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         */
+        if (!configfs_dirent_is_ready(sd)) {
+                ret = -ENOENT;
+                goto out;
+        }
        if (!(sd->s_type & CONFIGFS_USET_DIR)) {
                ret = -EPERM;
                goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                goto out_put;
        }
+        /*
+         * The subsystem may belong to a different module than the item
+         * being created.  We don't want to safely pin the new item but
+         * fail to pin the subsystem it sits under.
+         */
+        if (!subsys->su_group.cg_item.ci_type) {
+                ret = -EINVAL;
+                goto out_put;
+        }
+        subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+        if (!try_module_get(subsys_owner)) {
+                ret = -EINVAL;
+                goto out_put;
+        }
        name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
        if (!name) {
                ret = -ENOMEM;
-                goto out_put;
+                goto out_subsys_put;
        }
        snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        kfree(name);
        if (ret) {
                /*
-                 * If item == NULL, then link_obj() was never called.
+                 * If ret != 0, then link_obj() was never called.
                 * There are no extra references to clean up.
                 */
-                goto out_put;
+                goto out_subsys_put;
        }
        /*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                goto out_unlink;
        }
-        owner = type->ct_owner;
+        new_item_owner = type->ct_owner;
-        if (!try_module_get(owner)) {
+        if (!try_module_get(new_item_owner)) {
                ret = -EINVAL;
                goto out_unlink;
        }
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        spin_lock(&configfs_dirent_lock);
        sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
+        if (!ret)
+                configfs_dir_set_ready(dentry->d_fsdata);
        spin_unlock(&configfs_dirent_lock);
 out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
                mutex_unlock(&subsys->su_mutex);
                if (module_got)
-                        module_put(owner);
+                        module_put(new_item_owner);
        }
+out_subsys_put:
+        if (ret)
+                module_put(subsys_owner);
 out_put:
        /*
         * link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
        struct config_item *item;
        struct configfs_subsystem *subsys;
        struct configfs_dirent *sd;
-        struct module *owner = NULL;
+        struct module *subsys_owner = NULL, *dead_item_owner = NULL;
        int ret;
        if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                return -EINVAL;
        }
+        /* configfs_mkdir() shouldn't have allowed this */
+        BUG_ON(!subsys->su_group.cg_item.ci_type);
+        subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
+        /*
+         * Ensure that no racing symlink() will make detach_prep() fail while
+         * the new link is temporarily attached
+         */
+        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        do {
                struct mutex *wait_mutex;
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                if (ret) {
                        configfs_detach_rollback(dentry);
                        spin_unlock(&configfs_dirent_lock);
+                        mutex_unlock(&configfs_symlink_mutex);
                        if (ret != -EAGAIN) {
                                config_item_put(parent_item);
                                return ret;
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                        mutex_lock(wait_mutex);
                        mutex_unlock(wait_mutex);
+                        mutex_lock(&configfs_symlink_mutex);
                        spin_lock(&configfs_dirent_lock);
                }
        } while (ret == -EAGAIN);
        spin_unlock(&configfs_dirent_lock);
+        mutex_unlock(&configfs_symlink_mutex);
        /* Get a working ref for the duration of this function */
        item = configfs_get_config_item(dentry);
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
        config_item_put(parent_item);
        if (item->ci_type)
-                owner = item->ci_type->ct_owner;
+                dead_item_owner = item->ci_type->ct_owner;
        if (sd->s_type & CONFIGFS_USET_DIR) {
                configfs_detach_group(item);
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
        /* Drop our reference from above */
        config_item_put(item);
-        module_put(owner);
+        module_put(dead_item_owner);
+        module_put(subsys_owner);
        return 0;
 }
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
 {
        struct dentry * dentry = file->f_path.dentry;
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
+        int err;
        mutex_lock(&dentry->d_inode->i_mutex);
-        file->private_data = configfs_new_dirent(parent_sd, NULL);
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         */
+        err = -ENOENT;
+        if (configfs_dirent_is_ready(parent_sd)) {
+                file->private_data = configfs_new_dirent(parent_sd, NULL);
+                if (IS_ERR(file->private_data))
+                        err = PTR_ERR(file->private_data);
+                else
+                        err = 0;
+        }
        mutex_unlock(&dentry->d_inode->i_mutex);
-        return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0;
+        return err;
 }
 static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                if (err) {
                        d_delete(dentry);
                        dput(dentry);
+                } else {
+                        spin_lock(&configfs_dirent_lock);
+                        configfs_dir_set_ready(dentry->d_fsdata);
+                        spin_unlock(&configfs_dirent_lock);
                }
        }
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
        mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
                          I_MUTEX_PARENT);
        mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        if (configfs_detach_prep(dentry, NULL)) {
                printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
        }
        spin_unlock(&configfs_dirent_lock);
+        mutex_unlock(&configfs_symlink_mutex);
        configfs_detach_group(&group->cg_item);
        dentry->d_inode->i_flags |= S_DEAD;
        mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40ac..bf74973b0492 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
 #include <linux/configfs.h>
 #include "configfs_internal.h"
+/* Protects attachments of new symlinks */
+DEFINE_MUTEX(configfs_symlink_mutex);
 static int item_depth(struct config_item * item)
 {
        struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
        struct configfs_symlink *sl;
        int ret;
+        ret = -ENOENT;
+        if (!configfs_dirent_is_ready(target_sd))
+                goto out;
        ret = -ENOMEM;
        sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
        if (sl) {
                sl->sl_target = config_item_get(item);
                spin_lock(&configfs_dirent_lock);
+                if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
+                        spin_unlock(&configfs_dirent_lock);
+                        config_item_put(item);
+                        kfree(sl);
+                        return -ENOENT;
+                }
                list_add(&sl->sl_list, &target_sd->s_links);
                spin_unlock(&configfs_dirent_lock);
                ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
                }
        }
+out:
        return ret;
 }
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 {
        int ret;
        struct nameidata nd;
+        struct configfs_dirent *sd;
        struct config_item *parent_item;
        struct config_item *target_item;
        struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
        if (dentry->d_parent == configfs_sb->s_root)
                goto out;
+        sd = dentry->d_parent->d_fsdata;
+        /*
+         * Fake invisibility if dir belongs to a group/default groups hierarchy
+         * being attached
+         */
+        ret = -ENOENT;
+        if (!configfs_dirent_is_ready(sd))
+                goto out;
        parent_item = configfs_get_config_item(dentry->d_parent);
        type = parent_item->ci_type;
+        ret = -EPERM;
        if (!type || !type->ct_item_ops ||
            !type->ct_item_ops->allow_link)
                goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
        ret = type->ct_item_ops->allow_link(parent_item, target_item);
        if (!ret) {
+                mutex_lock(&configfs_symlink_mutex);
                ret = create_link(parent_item, target_item, dentry);
+                mutex_unlock(&configfs_symlink_mutex);
                if (ret && type->ct_item_ops->drop_link)
                        type->ct_item_ops->drop_link(parent_item,
                                                     target_item);
diff --git a/fs/dcache.c b/fs/dcache.c
index f2584d22cb45..101663d15e9f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1220,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
        return new;
 }
+/**
+ * d_add_ci - lookup or allocate new dentry with case-exact name
+ * @inode:  the inode case-insensitive lookup has found
+ * @dentry: the negative dentry that was passed to the parent's lookup func
+ * @name:   the case-exact name to be associated with the returned dentry
+ *
+ * This is to avoid filling the dcache with case-insensitive names to the
+ * same inode, only the actual correct case is stored in the dcache for
+ * case-insensitive filesystems.
+ *
+ * For a case-insensitive lookup match and if the the case-exact dentry
+ * already exists in in the dcache, use it and return it.
+ *
+ * If no entry exists with the exact case name, allocate new dentry with
+ * the exact case, and return the spliced entry.
+ */
+struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
+                        struct qstr *name)
+{
+        int error;
+        struct dentry *found;
+        struct dentry *new;
+        /* Does a dentry matching the name exist already? */
+        found = d_hash_and_lookup(dentry->d_parent, name);
+        /* If not, create it now and return */
+        if (!found) {
+                new = d_alloc(dentry->d_parent, name);
+                if (!new) {
+                        error = -ENOMEM;
+                        goto err_out;
+                }
+                found = d_splice_alias(inode, new);
+                if (found) {
+                        dput(new);
+                        return found;
+                }
+                return new;
+        }
+        /* Matching dentry exists, check if it is negative. */
+        if (found->d_inode) {
+                if (unlikely(found->d_inode != inode)) {
+                        /* This can't happen because bad inodes are unhashed. */
+                        BUG_ON(!is_bad_inode(inode));
+                        BUG_ON(!is_bad_inode(found->d_inode));
+                }
+                /*
+                 * Already have the inode and the dentry attached, decrement
+                 * the reference count to balance the iget() done
+                 * earlier on.  We found the dentry using d_lookup() so it
+                 * cannot be disconnected and thus we do not need to worry
+                 * about any NFS/disconnectedness issues here.
+                 */
+                iput(inode);
+                return found;
+        }
+        /*
+         * Negative dentry: instantiate it unless the inode is a directory and
+         * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
+         * in which case d_move() that in place of the found dentry.
+         */
+        if (!S_ISDIR(inode->i_mode)) {
+                /* Not a directory; everything is easy. */
+                d_instantiate(found, inode);
+                return found;
+        }
+        spin_lock(&dcache_lock);
+        if (list_empty(&inode->i_dentry)) {
+                /*
+                 * Directory without a 'disconnected' dentry; we need to do
+                 * d_instantiate() by hand because it takes dcache_lock which
+                 * we already hold.
+                 */
+                list_add(&found->d_alias, &inode->i_dentry);
+                found->d_inode = inode;
+                spin_unlock(&dcache_lock);
+                security_d_instantiate(found, inode);
+                return found;
+        }
+        /*
+         * Directory with a 'disconnected' dentry; get a reference to the
+         * 'disconnected' dentry.
+         */
+        new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+        dget_locked(new);
+        spin_unlock(&dcache_lock);
+        /* Do security vodoo. */
+        security_d_instantiate(found, inode);
+        /* Move new in place of found. */
+        d_move(new, found);
+        /* Balance the iget() we did above. */
+        iput(inode);
+        /* Throw away found. */
+        dput(found);
+        /* Use new as the actual dentry. */
+        return new;
+err_out:
+        iput(inode);
+        return ERR_PTR(error);
+}
 /**
 * d_lookup - search for a dentry
@@ -2254,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
 EXPORT_SYMBOL(d_splice_alias);
+EXPORT_SYMBOL(d_add_ci);
 EXPORT_SYMBOL(d_validate);
 EXPORT_SYMBOL(dget_locked);
 EXPORT_SYMBOL(dput);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06e..488eb424f662 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
 #define DEVPTS_DEFAULT_MODE 0600
 extern int pty_limit;                   /* Config limit on Unix98 ptys */
-static DEFINE_IDR(allocated_ptys);
+static DEFINE_IDA(allocated_ptys);
 static DEFINE_MUTEX(allocated_ptys_lock);
 static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
 int devpts_new_index(void)
 {
        int index;
-        int idr_ret;
+        int ida_ret;
 retry:
-        if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+        if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
                return -ENOMEM;
        }
        mutex_lock(&allocated_ptys_lock);
-        idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+        ida_ret = ida_get_new(&allocated_ptys, &index);
-        if (idr_ret < 0) {
+        if (ida_ret < 0) {
                mutex_unlock(&allocated_ptys_lock);
-                if (idr_ret == -EAGAIN)
+                if (ida_ret == -EAGAIN)
                        goto retry;
                return -EIO;
        }
        if (index >= pty_limit) {
-                idr_remove(&allocated_ptys, index);
+                ida_remove(&allocated_ptys, index);
                mutex_unlock(&allocated_ptys_lock);
                return -EIO;
        }
@@ -208,7 +208,7 @@ retry:
 void devpts_kill_index(int idx)
 {
        mutex_lock(&allocated_ptys_lock);
-        idr_remove(&allocated_ptys, idx);
+        ida_remove(&allocated_ptys, idx);
        mutex_unlock(&allocated_ptys_lock);
 }
diff --git a/fs/dquot.c b/fs/dquot.c
index 1346eebe74ce..8ec4d6cc7633 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
        return ret;
 }
+int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+                      struct path *path)
+{
+        int error = security_quota_on(path->dentry);
+        if (error)
+                return error;
+        /* Quota file not on the same filesystem? */
+        if (path->mnt->mnt_sb != sb)
+                error = -EXDEV;
+        else
+                error = vfs_quota_on_inode(path->dentry->d_inode, type,
+                                           format_id);
+        return error;
+}
 /* Actual function called from quotactl() */
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
                 int remount)
@@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
                return vfs_quota_on_remount(sb, type);
        error = path_lookup(path, LOOKUP_FOLLOW, &nd);
-        if (error < 0)
+        if (!error) {
-                return error;
+                error = vfs_quota_on_path(sb, type, format_id, &nd.path);
-        error = security_quota_on(nd.path.dentry);
+                path_put(&nd.path);
-        if (error)
+        }
-                goto out_path;
-        /* Quota file not on the same filesystem? */
-        if (nd.path.mnt->mnt_sb != sb)
-                error = -EXDEV;
-        else
-                error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
-                                           format_id);
-out_path:
-        path_put(&nd.path);
        return error;
 }
@@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_data_lock);
 EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
 EXPORT_SYMBOL(vfs_quota_off);
 EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0c87474f7917..7cc0eb756b55 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1041,10 +1041,7 @@ retry:
 }
 /*
- * It opens an eventpoll file descriptor. The "size" parameter is there
+ * Open an eventpoll file descriptor.
- * for historical reasons, when epoll was using an hash instead of an
- * RB tree. With the current implementation, the "size" parameter is ignored
- * (besides sanity checks).
 */
 asmlinkage long sys_epoll_create1(int flags)
 {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ddced384674..f38a5afc39a1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
                journal_unlock_updates(EXT3_SB(sb)->s_journal);
        }
+        err = vfs_quota_on_path(sb, type, format_id, &nd.path);
        path_put(&nd.path);
-        return vfs_quota_on(sb, type, format_id, path, remount);
+        return err;
 }
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index c7d04e165446..694ed6fadcc8 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
        acl = posix_acl_alloc(count, GFP_NOFS);
        if (!acl)
                return ERR_PTR(-ENOMEM);
-        for (n=0; n < count; n++) {
+        for (n = 0; n < count; n++) {
                ext4_acl_entry *entry =
                        (ext4_acl_entry *)value;
                if ((char *)value + sizeof(ext4_acl_entry_short) > end)
                        goto fail;
                acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
                acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
-                switch(acl->a_entries[n].e_tag) {
-                        case ACL_USER_OBJ:
+                switch (acl->a_entries[n].e_tag) {
-                        case ACL_GROUP_OBJ:
+                case ACL_USER_OBJ:
-                        case ACL_MASK:
+                case ACL_GROUP_OBJ:
-                        case ACL_OTHER:
+                case ACL_MASK:
-                                value = (char *)value +
+                case ACL_OTHER:
-                                        sizeof(ext4_acl_entry_short);
+                        value = (char *)value +
-                                acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+                                sizeof(ext4_acl_entry_short);
-                                break;
+                        acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+                        break;
-                        case ACL_USER:
-                        case ACL_GROUP:
+                case ACL_USER:
-                                value = (char *)value + sizeof(ext4_acl_entry);
+                case ACL_GROUP:
-                                if ((char *)value > end)
+                        value = (char *)value + sizeof(ext4_acl_entry);
-                                        goto fail;
+                        if ((char *)value > end)
-                                acl->a_entries[n].e_id =
-                                        le32_to_cpu(entry->e_id);
-                                break;
-                        default:
                                goto fail;
+                        acl->a_entries[n].e_id =
+                                le32_to_cpu(entry->e_id);
+                        break;
+                default:
+                        goto fail;
                }
        }
        if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
                return ERR_PTR(-ENOMEM);
        ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
        e = (char *)ext_acl + sizeof(ext4_acl_header);
-        for (n=0; n < acl->a_count; n++) {
+        for (n = 0; n < acl->a_count; n++) {
                ext4_acl_entry *entry = (ext4_acl_entry *)e;
                entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
                entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
-                switch(acl->a_entries[n].e_tag) {
+                switch (acl->a_entries[n].e_tag) {
-                        case ACL_USER:
+                case ACL_USER:
-                        case ACL_GROUP:
+                case ACL_GROUP:
-                                entry->e_id =
+                        entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
-                                        cpu_to_le32(acl->a_entries[n].e_id);
+                        e += sizeof(ext4_acl_entry);
-                                e += sizeof(ext4_acl_entry);
+                        break;
-                                break;
+                case ACL_USER_OBJ:
-                        case ACL_USER_OBJ:
+                case ACL_GROUP_OBJ:
-                        case ACL_GROUP_OBJ:
+                case ACL_MASK:
-                        case ACL_MASK:
+                case ACL_OTHER:
-                        case ACL_OTHER:
+                        e += sizeof(ext4_acl_entry_short);
-                                e += sizeof(ext4_acl_entry_short);
+                        break;
-                                break;
+                default:
-                        default:
+                        goto fail;
-                                goto fail;
                }
        }
        return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return NULL;
-        switch(type) {
+        switch (type) {
-                case ACL_TYPE_ACCESS:
+        case ACL_TYPE_ACCESS:
-                        acl = ext4_iget_acl(inode, &ei->i_acl);
+                acl = ext4_iget_acl(inode, &ei->i_acl);
-                        if (acl != EXT4_ACL_NOT_CACHED)
+                if (acl != EXT4_ACL_NOT_CACHED)
-                                return acl;
+                        return acl;
-                        name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+                name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
-                        break;
+                break;
-                case ACL_TYPE_DEFAULT:
+        case ACL_TYPE_DEFAULT:
-                        acl = ext4_iget_acl(inode, &ei->i_default_acl);
+                acl = ext4_iget_acl(inode, &ei->i_default_acl);
-                        if (acl != EXT4_ACL_NOT_CACHED)
+                if (acl != EXT4_ACL_NOT_CACHED)
-                                return acl;
+                        return acl;
-                        name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+                name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
-                        break;
+                break;
-                default:
+        default:
-                        return ERR_PTR(-EINVAL);
+                return ERR_PTR(-EINVAL);
        }
        retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
        if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
        kfree(value);
        if (!IS_ERR(acl)) {
-                switch(type) {
+                switch (type) {
-                        case ACL_TYPE_ACCESS:
+                case ACL_TYPE_ACCESS:
-                                ext4_iset_acl(inode, &ei->i_acl, acl);
+                        ext4_iset_acl(inode, &ei->i_acl, acl);
-                                break;
+                        break;
-                        case ACL_TYPE_DEFAULT:
+                case ACL_TYPE_DEFAULT:
-                                ext4_iset_acl(inode, &ei->i_default_acl, acl);
+                        ext4_iset_acl(inode, &ei->i_default_acl, acl);
-                                break;
+                        break;
                }
        }
        return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
        if (S_ISLNK(inode->i_mode))
                return -EOPNOTSUPP;
-        switch(type) {
+        switch (type) {
-                case ACL_TYPE_ACCESS:
+        case ACL_TYPE_ACCESS:
-                        name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
+                name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
-                        if (acl) {
+                if (acl) {
-                                mode_t mode = inode->i_mode;
+                        mode_t mode = inode->i_mode;
-                                error = posix_acl_equiv_mode(acl, &mode);
+                        error = posix_acl_equiv_mode(acl, &mode);
-                                if (error < 0)
+                        if (error < 0)
-                                        return error;
+                                return error;
-                                else {
+                        else {
-                                        inode->i_mode = mode;
+                                inode->i_mode = mode;
-                                        ext4_mark_inode_dirty(handle, inode);
+                                ext4_mark_inode_dirty(handle, inode);
-                                        if (error == 0)
+                                if (error == 0)
-                                                acl = NULL;
+                                        acl = NULL;
-                                }
                        }
-                        break;
+                }
+                break;
-                case ACL_TYPE_DEFAULT:
+        case ACL_TYPE_DEFAULT:
-                        name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
+                name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
-                        if (!S_ISDIR(inode->i_mode))
+                if (!S_ISDIR(inode->i_mode))
-                                return acl ? -EACCES : 0;
+                        return acl ? -EACCES : 0;
-                        break;
+                break;
-                default:
+        default:
-                        return -EINVAL;
+                return -EINVAL;
        }
        if (acl) {
                value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
        kfree(value);
        if (!error) {
-                switch(type) {
+                switch (type) {
-                        case ACL_TYPE_ACCESS:
+                case ACL_TYPE_ACCESS:
-                                ext4_iset_acl(inode, &ei->i_acl, acl);
+                        ext4_iset_acl(inode, &ei->i_acl, acl);
-                                break;
+                        break;
-                        case ACL_TYPE_DEFAULT:
+                case ACL_TYPE_DEFAULT:
-                                ext4_iset_acl(inode, &ei->i_default_acl, acl);
+                        ext4_iset_acl(inode, &ei->i_default_acl, acl);
-                                break;
+                        break;
                }
        }
        return error;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b9832..1ae5004e93fc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
        if (unlikely(!bh)) {
                ext4_error(sb, __func__,
                            "Cannot read block bitmap - "
-                            "block_group = %d, block_bitmap = %llu",
+                            "block_group = %lu, block_bitmap = %llu",
-                            (int)block_group, (unsigned long long)bitmap_blk);
+                            block_group, bitmap_blk);
                return NULL;
        }
        if (bh_uptodate_or_lock(bh))
                return bh;
+        spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                ext4_init_block_bitmap(sb, bh, block_group, desc);
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
+                spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
                return bh;
        }
+        spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
        if (bh_submit_read(bh) < 0) {
                put_bh(bh);
                ext4_error(sb, __func__,
                            "Cannot read block bitmap - "
-                            "block_group = %d, block_bitmap = %llu",
+                            "block_group = %lu, block_bitmap = %llu",
-                            (int)block_group, (unsigned long long)bitmap_blk);
+                            block_group, bitmap_blk);
                return NULL;
        }
        ext4_valid_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b14..6c7924d9e358 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
 /* inode.c */
-void ext4_da_release_space(struct inode *inode, int used, int to_free);
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                struct buffer_head *bh, ext4_fsblk_t blocknr);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892ed..612c3d2c3824 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
        if (handle->h_buffer_credits > needed)
                return 0;
        err = ext4_journal_extend(handle, needed);
-        if (err)
+        if (err <= 0)
                return err;
        return ext4_journal_restart(handle, needed);
 }
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
        /*
         * get the next allocated block if the extent in the path
-         * is before the requested block(s) 
+         * is before the requested block(s)
         */
        if (b2 < b1) {
                b2 = ext4_ext_next_allocated_block(path);
@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        BUG_ON(b != ex_ee_block + ex_ee_len - 1);
                }
-                /* at present, extent can't cross block group: */
+                /*
-                /* leaf + bitmap + group desc + sb + inode */
+                 * 3 for leaf, sb, and inode plus 2 (bmap and group
-                credits = 5;
+                 * descriptor) for each block group; assume two block
+                 * groups plus ex_ee_len/blocks_per_block_group for
+                 * the worst case
+                 */
+                credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
                if (ex == EXT_FIRST_EXTENT(eh)) {
                        correct_index = 1;
                        credits += (ext_depth(inode)) + 1;
@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                unsigned int newdepth;
                /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
                if (allocated <= EXT4_EXT_ZERO_LEN) {
-                        /* Mark first half uninitialized.
+                        /*
+                         * iblock == ee_block is handled by the zerouout
+                         * at the beginning.
+                         * Mark first half uninitialized.
                         * Mark second half initialized and zero out the
                         * initialized extent
                         */
@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                ex->ee_len   = orig_ex.ee_len;
                                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                                ext4_ext_dirty(handle, inode, path + depth);
-                                /* zeroed the full extent */
+                                /* blocks available from iblock */
                                return allocated;
                        } else if (err)
@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                        err = PTR_ERR(path);
                                        return err;
                                }
+                                /* get the second half extent details */
                                ex = path[depth].p_ext;
                                err = ext4_ext_get_access(handle, inode,
                                                                path + depth);
@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
                        /* zeroed the full extent */
+                        /* blocks available from iblock */
                        return allocated;
                } else if (err)
@@ -2418,23 +2427,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                 */
                orig_ex.ee_len = cpu_to_le16(ee_len -
                                                ext4_ext_get_actual_len(ex3));
-                if (newdepth != depth) {
+                depth = newdepth;
-                        depth = newdepth;
+                ext4_ext_drop_refs(path);
-                        ext4_ext_drop_refs(path);
+                path = ext4_ext_find_extent(inode, iblock, path);
-                        path = ext4_ext_find_extent(inode, iblock, path);
+                if (IS_ERR(path)) {
-                        if (IS_ERR(path)) {
+                        err = PTR_ERR(path);
-                                err = PTR_ERR(path);
+                        goto out;
-                                goto out;
-                        }
-                        eh = path[depth].p_hdr;
-                        ex = path[depth].p_ext;
-                        if (ex2 != &newex)
-                                ex2 = ex;
-                        err = ext4_ext_get_access(handle, inode, path + depth);
-                        if (err)
-                                goto out;
                }
+                eh = path[depth].p_hdr;
+                ex = path[depth].p_ext;
+                if (ex2 != &newex)
+                        ex2 = ex;
+                err = ext4_ext_get_access(handle, inode, path + depth);
+                if (err)
+                        goto out;
                allocated = max_blocks;
                /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
                        ext4_ext_dirty(handle, inode, path + depth);
                        /* zero out the first half */
+                        /* blocks available from iblock */
                        return allocated;
                }
        }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344f..655e760212b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
 * Return buffer_head of bitmap on success or NULL.
 */
 static struct buffer_head *
-read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
+ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 {
        struct ext4_group_desc *desc;
        struct buffer_head *bh = NULL;
+        ext4_fsblk_t bitmap_blk;
        desc = ext4_get_group_desc(sb, block_group, NULL);
        if (!desc)
-                goto error_out;
+                return NULL;
-        if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+        bitmap_blk = ext4_inode_bitmap(sb, desc);
-                bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc));
+        bh = sb_getblk(sb, bitmap_blk);
-                if (!buffer_uptodate(bh)) {
+        if (unlikely(!bh)) {
-                        lock_buffer(bh);
+                ext4_error(sb, __func__,
-                        if (!buffer_uptodate(bh)) {
+                            "Cannot read inode bitmap - "
-                                ext4_init_inode_bitmap(sb, bh, block_group,
+                            "block_group = %lu, inode_bitmap = %llu",
-                                                       desc);
+                            block_group, bitmap_blk);
-                                set_buffer_uptodate(bh);
+                return NULL;
-                        }
-                        unlock_buffer(bh);
-                }
-        } else {
-                bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
        }
-        if (!bh)
+        if (bh_uptodate_or_lock(bh))
-                ext4_error(sb, "read_inode_bitmap",
+                return bh;
+        spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+        if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+                ext4_init_inode_bitmap(sb, bh, block_group, desc);
+                set_buffer_uptodate(bh);
+                unlock_buffer(bh);
+                spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+                return bh;
+        }
+        spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
+        if (bh_submit_read(bh) < 0) {
+                put_bh(bh);
+                ext4_error(sb, __func__,
                            "Cannot read inode bitmap - "
                            "block_group = %lu, inode_bitmap = %llu",
-                            block_group, ext4_inode_bitmap(sb, desc));
+                            block_group, bitmap_blk);
-error_out:
+                return NULL;
+        }
        return bh;
 }
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
        }
        block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
        bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
-        bitmap_bh = read_inode_bitmap(sb, block_group);
+        bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
        if (!bitmap_bh)
                goto error_return;
@@ -623,7 +633,7 @@ got_group:
                        goto fail;
                brelse(bitmap_bh);
-                bitmap_bh = read_inode_bitmap(sb, group);
+                bitmap_bh = ext4_read_inode_bitmap(sb, group);
                if (!bitmap_bh)
                        goto fail;
@@ -728,7 +738,7 @@ got:
                        /* When marking the block group with
                         * ~EXT4_BG_INODE_UNINIT we don't want to depend
-                         * on the value of bg_itable_unsed even though
+                         * on the value of bg_itable_unused even though
                         * mke2fs could have initialized the same for us.
                         * Instead we calculated the value below
                         */
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
        block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
        bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
-        bitmap_bh = read_inode_bitmap(sb, block_group);
+        bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
        if (!bitmap_bh) {
                ext4_warning(sb, __func__,
                             "inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
                        continue;
                desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
                brelse(bitmap_bh);
-                bitmap_bh = read_inode_bitmap(sb, i);
+                bitmap_bh = ext4_read_inode_bitmap(sb, i);
                if (!bitmap_bh)
                        continue;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9843b046c235..59fbbe899acc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
 void ext4_delete_inode (struct inode * inode)
 {
        handle_t *handle;
+        int err;
        if (ext4_should_order_data(inode))
                ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
        if (is_bad_inode(inode))
                goto no_delete;
-        handle = start_transaction(inode);
+        handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
        if (IS_ERR(handle)) {
+                ext4_std_error(inode->i_sb, PTR_ERR(handle));
                /*
                 * If we're going to skip the normal cleanup, we still need to
                 * make sure that the in-core orphan linked list is properly
@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
        if (IS_SYNC(inode))
                handle->h_sync = 1;
        inode->i_size = 0;
+        err = ext4_mark_inode_dirty(handle, inode);
+        if (err) {
+                ext4_warning(inode->i_sb, __func__,
+                             "couldn't mark inode dirty (err %d)", err);
+                goto stop_handle;
+        }
        if (inode->i_blocks)
                ext4_truncate(inode);
+        /*
+         * ext4_ext_truncate() doesn't reserve any slop when it
+         * restarts journal transactions; therefore there may not be
+         * enough credits left in the handle to remove the inode from
+         * the orphan list and set the dtime field.
+         */
+        if (handle->h_buffer_credits < 3) {
+                err = ext4_journal_extend(handle, 3);
+                if (err > 0)
+                        err = ext4_journal_restart(handle, 3);
+                if (err != 0) {
+                        ext4_warning(inode->i_sb, __func__,
+                                     "couldn't extend journal (err %d)", err);
+                stop_handle:
+                        ext4_journal_stop(handle);
+                        goto no_delete;
+                }
+        }
        /*
         * Kill off the orphan record which ext4_truncate created.
         * AKPM: I think this can be inside the above `if'.
@@ -952,6 +980,67 @@ out:
        return err;
 }
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
+ */
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+        int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+        int ind_blks, dind_blks, tind_blks;
+        /* number of new indirect blocks needed */
+        ind_blks = (blocks + icap - 1) / icap;
+        dind_blks = (ind_blks + icap - 1) / icap;
+        tind_blks = 1;
+        return ind_blks + dind_blks + tind_blks;
+}
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+                return ext4_ext_calc_metadata_amount(inode, blocks);
+        return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+static void ext4_da_update_reserve_space(struct inode *inode, int used)
+{
+        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+        int total, mdb, mdb_free;
+        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+        /* recalculate the number of metablocks still need to be reserved */
+        total = EXT4_I(inode)->i_reserved_data_blocks - used;
+        mdb = ext4_calc_metadata_amount(inode, total);
+        /* figure out how many metablocks to release */
+        BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+        mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+        /* Account for allocated meta_blocks */
+        mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+        /* update fs free blocks counter for truncate case */
+        percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
+        /* update per-inode reservations */
+        BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
+        EXT4_I(inode)->i_reserved_data_blocks -= used;
+        BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+        EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+        EXT4_I(inode)->i_allocated_meta_blocks = 0;
+        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
 /*
@@ -965,10 +1054,9 @@ out:
 /*
+ * The ext4_get_blocks_wrap() function try to look up the requested blocks,
+ * and returns if the blocks are already mapped.
 *
- *
- * ext4_ext4 get_block() wrapper function
- * It will do a look up first, and returns if the blocks already mapped.
 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
 * and store the allocated blocks in the result buffer head and mark it
 * mapped.
@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
                 * which were deferred till now
                 */
                if ((retval > 0) && buffer_delay(bh))
-                        ext4_da_release_space(inode, retval, 0);
+                        ext4_da_update_reserve_space(inode, retval);
        }
        up_write((&EXT4_I(inode)->i_data_sem));
@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
 {
        handle_t *handle = ext4_journal_current_handle();
        struct inode *inode = mapping->host;
-        unsigned from, to;
        int ret = 0, ret2;
-        from = pos & (PAGE_CACHE_SIZE - 1);
-        to = from + len;
        ret = ext4_jbd2_file_inode(handle, inode);
        if (ret == 0) {
@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file,
        return ret ? ret : copied;
 }
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
- */
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
-{
-        int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
-        int ind_blks, dind_blks, tind_blks;
-        /* number of new indirect blocks needed */
-        ind_blks = (blocks + icap - 1) / icap;
-        dind_blks = (ind_blks + icap - 1) / icap;
-        tind_blks = 1;
-        return ind_blks + dind_blks + tind_blks;
-}
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
- */
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
-{
-        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
-                return ext4_ext_calc_metadata_amount(inode, blocks);
-        return ext4_indirect_calc_metadata_amount(inode, blocks);
-}
 static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
 {
@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
                return -ENOSPC;
        }
        /* reduce fs free blocks counter */
        percpu_counter_sub(&sbi->s_freeblocks_counter, total);
@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
        return 0;       /* success */
 }
-void ext4_da_release_space(struct inode *inode, int used, int to_free)
+static void ext4_da_release_space(struct inode *inode, int to_free)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        int total, mdb, mdb_free, release;
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        /* recalculate the number of metablocks still need to be reserved */
-        total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+        total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
        mdb = ext4_calc_metadata_amount(inode, total);
        /* figure out how many metablocks to release */
        BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
        mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-        /* Account for allocated meta_blocks */
-        mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
        release = to_free + mdb_free;
        /* update fs free blocks counter for truncate case */
        percpu_counter_add(&sbi->s_freeblocks_counter, release);
        /* update per-inode reservations */
-        BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
+        BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
-        EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+        EXT4_I(inode)->i_reserved_data_blocks -= to_free;
        BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
        EXT4_I(inode)->i_reserved_meta_blocks = mdb;
-        EXT4_I(inode)->i_allocated_meta_blocks = 0;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page,
                }
                curr_off = next_off;
        } while ((bh = bh->b_this_page) != head);
-        ext4_da_release_space(page->mapping->host, 0, to_release);
+        ext4_da_release_space(page->mapping->host, to_release);
 }
 /*
@@ -2280,8 +2329,11 @@ retry:
        }
        page = __grab_cache_page(mapping, index);
-        if (!page)
+        if (!page) {
-                return -ENOMEM;
+                ext4_journal_stop(handle);
+                ret = -ENOMEM;
+                goto out;
+        }
        *pagep = page;
        ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -3590,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
        }
        if (!buffer_uptodate(bh)) {
                lock_buffer(bh);
+                /*
+                 * If the buffer has the write error flag, we have failed
+                 * to write out another inode in the same block.  In this
+                 * case, we don't have to read the block because we may
+                 * read the old inode data successfully.
+                 */
+                if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+                        set_buffer_uptodate(bh);
                if (buffer_uptodate(bh)) {
                        /* someone brought it uptodate while we waited */
                        unlock_buffer(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbee..865e9ddb44d4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                if (bh_uptodate_or_lock(bh[i]))
                        continue;
+                spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
                if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                        ext4_init_block_bitmap(sb, bh[i],
                                                first_group + i, desc);
                        set_buffer_uptodate(bh[i]);
                        unlock_buffer(bh[i]);
+                        spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
                        continue;
                }
+                spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
                get_bh(bh[i]);
                bh[i]->b_end_io = end_buffer_read_sync;
                submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
 int ext4_mb_init(struct super_block *sb, int needs_recovery)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        unsigned i;
+        unsigned i, j;
        unsigned offset;
        unsigned max;
        int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
        sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
-        i = sizeof(struct ext4_locality_group) * NR_CPUS;
+        i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
        sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
        if (sbi->s_locality_groups == NULL) {
                clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
                kfree(sbi->s_mb_maxs);
                return -ENOMEM;
        }
-        for (i = 0; i < NR_CPUS; i++) {
+        for (i = 0; i < nr_cpu_ids; i++) {
                struct ext4_locality_group *lg;
                lg = &sbi->s_locality_groups[i];
                mutex_init(&lg->lg_mutex);
-                INIT_LIST_HEAD(&lg->lg_prealloc_list);
+                for (j = 0; j < PREALLOC_TB_SIZE; j++)
+                        INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
                spin_lock_init(&lg->lg_prealloc_lock);
        }
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
                                struct ext4_prealloc_space *pa)
 {
        unsigned int len = ac->ac_o_ex.fe_len;
        ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
                                        &ac->ac_b_ex.fe_group,
                                        &ac->ac_b_ex.fe_start);
@@ -3282,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 static noinline_for_stack int
 ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
+        int order, i;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
        struct ext4_prealloc_space *pa;
@@ -3322,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        lg = ac->ac_lg;
        if (lg == NULL)
                return 0;
+        order  = fls(ac->ac_o_ex.fe_len) - 1;
-        rcu_read_lock();
+        if (order > PREALLOC_TB_SIZE - 1)
-        list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
+                /* The max size of hash table is PREALLOC_TB_SIZE */
-                spin_lock(&pa->pa_lock);
+                order = PREALLOC_TB_SIZE - 1;
-                if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
-                        atomic_inc(&pa->pa_count);
+        for (i = order; i < PREALLOC_TB_SIZE; i++) {
-                        ext4_mb_use_group_pa(ac, pa);
+                rcu_read_lock();
+                list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
+                                        pa_inode_list) {
+                        spin_lock(&pa->pa_lock);
+                        if (pa->pa_deleted == 0 &&
+                                        pa->pa_free >= ac->ac_o_ex.fe_len) {
+                                atomic_inc(&pa->pa_count);
+                                ext4_mb_use_group_pa(ac, pa);
+                                spin_unlock(&pa->pa_lock);
+                                ac->ac_criteria = 20;
+                                rcu_read_unlock();
+                                return 1;
+                        }
                        spin_unlock(&pa->pa_lock);
-                        ac->ac_criteria = 20;
-                        rcu_read_unlock();
-                        return 1;
                }
-                spin_unlock(&pa->pa_lock);
+                rcu_read_unlock();
        }
-        rcu_read_unlock();
        return 0;
 }
@@ -3560,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        pa->pa_free = pa->pa_len;
        atomic_set(&pa->pa_count, 1);
        spin_lock_init(&pa->pa_lock);
+        INIT_LIST_HEAD(&pa->pa_inode_list);
        pa->pa_deleted = 0;
        pa->pa_linear = 1;
@@ -3580,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
        ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-        spin_lock(pa->pa_obj_lock);
+        /*
-        list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list);
+         * We will later add the new pa to the right bucket
-        spin_unlock(pa->pa_obj_lock);
+         * after updating the pa_free in ext4_mb_release_context
+         */
        return 0;
 }
@@ -3733,20 +3747,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
        bitmap_bh = ext4_read_block_bitmap(sb, group);
        if (bitmap_bh == NULL) {
-                /* error handling here */
+                ext4_error(sb, __func__, "Error in reading block "
-                ext4_mb_release_desc(&e4b);
+                                "bitmap for %lu\n", group);
-                BUG_ON(bitmap_bh == NULL);
+                return 0;
        }
        err = ext4_mb_load_buddy(sb, group, &e4b);
-        BUG_ON(err != 0); /* error handling here */
+        if (err) {
+                ext4_error(sb, __func__, "Error in loading buddy "
+                                "information for %lu\n", group);
+                put_bh(bitmap_bh);
+                return 0;
+        }
        if (needed == 0)
                needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
-        grp = ext4_get_group_info(sb, group);
        INIT_LIST_HEAD(&list);
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        ext4_lock_group(sb, group);
@@ -3903,13 +3920,18 @@ repeat:
                ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
                err = ext4_mb_load_buddy(sb, group, &e4b);
-                BUG_ON(err != 0); /* error handling here */
+                if (err) {
+                        ext4_error(sb, __func__, "Error in loading buddy "
+                                        "information for %lu\n", group);
+                        continue;
+                }
                bitmap_bh = ext4_read_block_bitmap(sb, group);
                if (bitmap_bh == NULL) {
-                        /* error handling here */
+                        ext4_error(sb, __func__, "Error in reading block "
+                                        "bitmap for %lu\n", group);
                        ext4_mb_release_desc(&e4b);
-                        BUG_ON(bitmap_bh == NULL);
+                        continue;
                }
                ext4_lock_group(sb, group);
@@ -4112,22 +4134,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 }
+static noinline_for_stack void
+ext4_mb_discard_lg_preallocations(struct super_block *sb,
+                                        struct ext4_locality_group *lg,
+                                        int order, int total_entries)
+{
+        ext4_group_t group = 0;
+        struct ext4_buddy e4b;
+        struct list_head discard_list;
+        struct ext4_prealloc_space *pa, *tmp;
+        struct ext4_allocation_context *ac;
+        mb_debug("discard locality group preallocation\n");
+        INIT_LIST_HEAD(&discard_list);
+        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
+        spin_lock(&lg->lg_prealloc_lock);
+        list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
+                                                pa_inode_list) {
+                spin_lock(&pa->pa_lock);
+                if (atomic_read(&pa->pa_count)) {
+                        /*
+                         * This is the pa that we just used
+                         * for block allocation. So don't
+                         * free that
+                         */
+                        spin_unlock(&pa->pa_lock);
+                        continue;
+                }
+                if (pa->pa_deleted) {
+                        spin_unlock(&pa->pa_lock);
+                        continue;
+                }
+                /* only lg prealloc space */
+                BUG_ON(!pa->pa_linear);
+                /* seems this one can be freed ... */
+                pa->pa_deleted = 1;
+                spin_unlock(&pa->pa_lock);
+                list_del_rcu(&pa->pa_inode_list);
+                list_add(&pa->u.pa_tmp_list, &discard_list);
+                total_entries--;
+                if (total_entries <= 5) {
+                        /*
+                         * we want to keep only 5 entries
+                         * allowing it to grow to 8. This
+                         * mak sure we don't call discard
+                         * soon for this list.
+                         */
+                        break;
+                }
+        }
+        spin_unlock(&lg->lg_prealloc_lock);
+        list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+                ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+                if (ext4_mb_load_buddy(sb, group, &e4b)) {
+                        ext4_error(sb, __func__, "Error in loading buddy "
+                                        "information for %lu\n", group);
+                        continue;
+                }
+                ext4_lock_group(sb, group);
+                list_del(&pa->pa_group_list);
+                ext4_mb_release_group_pa(&e4b, pa, ac);
+                ext4_unlock_group(sb, group);
+                ext4_mb_release_desc(&e4b);
+                list_del(&pa->u.pa_tmp_list);
+                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+        }
+        if (ac)
+                kmem_cache_free(ext4_ac_cachep, ac);
+}
+/*
+ * We have incremented pa_count. So it cannot be freed at this
+ * point. Also we hold lg_mutex. So no parallel allocation is
+ * possible from this lg. That means pa_free cannot be updated.
+ *
+ * A parallel ext4_mb_discard_group_preallocations is possible.
+ * which can cause the lg_prealloc_list to be updated.
+ */
+static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
+{
+        int order, added = 0, lg_prealloc_count = 1;
+        struct super_block *sb = ac->ac_sb;
+        struct ext4_locality_group *lg = ac->ac_lg;
+        struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
+        order = fls(pa->pa_free) - 1;
+        if (order > PREALLOC_TB_SIZE - 1)
+                /* The max size of hash table is PREALLOC_TB_SIZE */
+                order = PREALLOC_TB_SIZE - 1;
+        /* Add the prealloc space to lg */
+        rcu_read_lock();
+        list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
+                                                pa_inode_list) {
+                spin_lock(&tmp_pa->pa_lock);
+                if (tmp_pa->pa_deleted) {
+                        spin_unlock(&pa->pa_lock);
+                        continue;
+                }
+                if (!added && pa->pa_free < tmp_pa->pa_free) {
+                        /* Add to the tail of the previous entry */
+                        list_add_tail_rcu(&pa->pa_inode_list,
+                                                &tmp_pa->pa_inode_list);
+                        added = 1;
+                        /*
+                         * we want to count the total
+                         * number of entries in the list
+                         */
+                }
+                spin_unlock(&tmp_pa->pa_lock);
+                lg_prealloc_count++;
+        }
+        if (!added)
+                list_add_tail_rcu(&pa->pa_inode_list,
+                                        &lg->lg_prealloc_list[order]);
+        rcu_read_unlock();
+        /* Now trim the list to be not more than 8 elements */
+        if (lg_prealloc_count > 8) {
+                ext4_mb_discard_lg_preallocations(sb, lg,
+                                                order, lg_prealloc_count);
+                return;
+        }
+        return ;
+}
 /*
 * release all resource we used in allocation
 */
 static int ext4_mb_release_context(struct ext4_allocation_context *ac)
 {
-        if (ac->ac_pa) {
+        struct ext4_prealloc_space *pa = ac->ac_pa;
-                if (ac->ac_pa->pa_linear) {
+        if (pa) {
+                if (pa->pa_linear) {
                        /* see comment in ext4_mb_use_group_pa() */
-                        spin_lock(&ac->ac_pa->pa_lock);
+                        spin_lock(&pa->pa_lock);
-                        ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len;
+                        pa->pa_pstart += ac->ac_b_ex.fe_len;
-                        ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len;
+                        pa->pa_lstart += ac->ac_b_ex.fe_len;
-                        ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len;
+                        pa->pa_free -= ac->ac_b_ex.fe_len;
-                        ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len;
+                        pa->pa_len -= ac->ac_b_ex.fe_len;
-                        spin_unlock(&ac->ac_pa->pa_lock);
+                        spin_unlock(&pa->pa_lock);
+                        /*
+                         * We want to add the pa to the right bucket.
+                         * Remove it from the list and while adding
+                         * make sure the list to which we are adding
+                         * doesn't grow big.
+                         */
+                        if (likely(pa->pa_free)) {
+                                spin_lock(pa->pa_obj_lock);
+                                list_del_rcu(&pa->pa_inode_list);
+                                spin_unlock(pa->pa_obj_lock);
+                                ext4_mb_add_n_trim(ac);
+                        }
                }
-                ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa);
+                ext4_mb_put_pa(ac, ac->ac_sb, pa);
        }
        if (ac->ac_bitmap_page)
                page_cache_release(ac->ac_bitmap_page);
@@ -4420,11 +4588,15 @@ do_more:
                count -= overflow;
        }
        bitmap_bh = ext4_read_block_bitmap(sb, block_group);
-        if (!bitmap_bh)
+        if (!bitmap_bh) {
+                err = -EIO;
                goto error_return;
+        }
        gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
-        if (!gdp)
+        if (!gdp) {
+                err = -EIO;
                goto error_return;
+        }
        if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
            in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bcf..c7c9906c2a75 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
 * Locality group:
 *   we try to group all related changes together
 *   so that writeback can flush/allocate them together as well
+ *   Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
+ *   (512). We store prealloc space into the hash based on the pa_free blocks
+ *   order value.ie, fls(pa_free)-1;
 */
+#define PREALLOC_TB_SIZE 10
 struct ext4_locality_group {
        /* for allocator */
-        struct mutex            lg_mutex;       /* to serialize allocates */
+        /* to serialize allocates */
-        struct list_head        lg_prealloc_list;/* list of preallocations */
+        struct mutex            lg_mutex;
+        /* list of preallocations */
+        struct list_head        lg_prealloc_list[PREALLOC_TB_SIZE];
        spinlock_t              lg_prealloc_lock;
 };
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd93..0a9265164265 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
                             "Inode bitmap not in group (block %llu)",
                             (unsigned long long)input->inode_bitmap);
        else if (outside(input->inode_table, start, end) ||
-                 outside(itend - 1, start, end))
+                 outside(itend - 1, start, end))
                ext4_warning(sb, __func__,
                             "Inode table not in group (blocks %llu-%llu)",
                             (unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
                             (unsigned long long)input->inode_bitmap,
                             start, metaend - 1);
        else if (inside(input->inode_table, start, metaend) ||
-                 inside(itend - 1, start, metaend))
+                 inside(itend - 1, start, metaend))
                ext4_warning(sb, __func__,
                             "Inode table (%llu-%llu) overlaps"
                             "GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
        if (err) {
                if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
                        return err;
-                if ((err = ext4_journal_get_write_access(handle, bh)))
+                if ((err = ext4_journal_get_write_access(handle, bh)))
                        return err;
-        }
+        }
        return 0;
 }
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
                       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
                       gdb_num);
-        /*
+        /*
-         * If we are not using the primary superblock/GDT copy don't resize,
+         * If we are not using the primary superblock/GDT copy don't resize,
-         * because the user tools have no way of handling this.  Probably a
+         * because the user tools have no way of handling this.  Probably a
-         * bad time to do it anyways.
+         * bad time to do it anyways.
-         */
+         */
        if (EXT4_SB(sb)->s_sbh->b_blocknr !=
            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
                ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        return 0;
 exit_inode:
-        //ext4_journal_release_buffer(handle, iloc.bh);
+        /* ext4_journal_release_buffer(handle, iloc.bh); */
        brelse(iloc.bh);
 exit_dindj:
-        //ext4_journal_release_buffer(handle, dind);
+        /* ext4_journal_release_buffer(handle, dind); */
 exit_primary:
-        //ext4_journal_release_buffer(handle, *primary);
+        /* ext4_journal_release_buffer(handle, *primary); */
 exit_sbh:
-        //ext4_journal_release_buffer(handle, *primary);
+        /* ext4_journal_release_buffer(handle, *primary); */
 exit_dind:
        brelse(dind);
 exit_bh:
@@ -818,12 +818,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
                goto exit_journal;
-        /*
+        /*
-         * We will only either add reserved group blocks to a backup group
+         * We will only either add reserved group blocks to a backup group
-         * or remove reserved blocks for the first group in a new group block.
+         * or remove reserved blocks for the first group in a new group block.
-         * Doing both would be mean more complex code, and sane people don't
+         * Doing both would be mean more complex code, and sane people don't
-         * use non-sparse filesystems anymore.  This is already checked above.
+         * use non-sparse filesystems anymore.  This is already checked above.
-         */
+         */
        if (gdb_off) {
                primary = sbi->s_group_desc[gdb_num];
                if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +835,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        } else if ((err = add_new_gdb(handle, inode, input, &primary)))
                goto exit_journal;
-        /*
+        /*
-         * OK, now we've set up the new group.  Time to make it active.
+         * OK, now we've set up the new group.  Time to make it active.
-         *
+         *
-         * Current kernels don't lock all allocations via lock_super(),
+         * Current kernels don't lock all allocations via lock_super(),
-         * so we have to be safe wrt. concurrent accesses the group
+         * so we have to be safe wrt. concurrent accesses the group
-         * data.  So we need to be careful to set all of the relevant
+         * data.  So we need to be careful to set all of the relevant
-         * group descriptor data etc. *before* we enable the group.
+         * group descriptor data etc. *before* we enable the group.
-         *
+         *
-         * The key field here is sbi->s_groups_count: as long as
+         * The key field here is sbi->s_groups_count: as long as
-         * that retains its old value, nobody is going to access the new
+         * that retains its old value, nobody is going to access the new
-         * group.
+         * group.
-         *
+         *
-         * So first we update all the descriptor metadata for the new
+         * So first we update all the descriptor metadata for the new
-         * group; then we update the total disk blocks count; then we
+         * group; then we update the total disk blocks count; then we
-         * update the groups count to enable the group; then finally we
+         * update the groups count to enable the group; then finally we
-         * update the free space counts so that the system can start
+         * update the free space counts so that the system can start
-         * using the new disk blocks.
+         * using the new disk blocks.
-         */
+         */
        /* Update group descriptor block for new group */
        gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +946,8 @@ exit_put:
        return err;
 } /* ext4_group_add */
-/* Extend the filesystem to the new number of blocks specified.  This entry
+/*
+ * Extend the filesystem to the new number of blocks specified.  This entry
 * point is only used to extend the current filesystem to the end of the last
 * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
 * for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                             o_blocks_count + add, add);
        /* See if the device is actually as big as what was requested */
-        bh = sb_bread(sb, o_blocks_count + add -1);
+        bh = sb_bread(sb, o_blocks_count + add - 1);
        if (!bh) {
                ext4_warning(sb, __func__,
                             "can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b5479b1dff14..d5d77958b861 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
 static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
                               unsigned int);
-static void ext4_commit_super (struct super_block * sb,
+static void ext4_commit_super(struct super_block *sb,
-                               struct ext4_super_block * es,
+                              struct ext4_super_block *es, int sync);
-                               int sync);
+static void ext4_mark_recovery_complete(struct super_block *sb,
-static void ext4_mark_recovery_complete(struct super_block * sb,
+                                        struct ext4_super_block *es);
-                                        struct ext4_super_block * es);
+static void ext4_clear_journal_err(struct super_block *sb,
-static void ext4_clear_journal_err(struct super_block * sb,
+                                   struct ext4_super_block *es);
-                                   struct ext4_super_block * es);
 static int ext4_sync_fs(struct super_block *sb, int wait);
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
                                     char nbuf[16]);
-static int ext4_remount (struct super_block * sb, int * flags, char * data);
+static int ext4_remount(struct super_block *sb, int *flags, char *data);
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static void ext4_unlockfs(struct super_block *sb);
-static void ext4_write_super (struct super_block * sb);
+static void ext4_write_super(struct super_block *sb);
 static void ext4_write_super_lockfs(struct super_block *sb);
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
        if (sb->s_flags & MS_RDONLY)
                return;
-        if (!test_opt (sb, ERRORS_CONT)) {
+        if (!test_opt(sb, ERRORS_CONT)) {
                journal_t *journal = EXT4_SB(sb)->s_journal;
                EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
                if (journal)
                        jbd2_journal_abort(journal, -EIO);
        }
-        if (test_opt (sb, ERRORS_RO)) {
+        if (test_opt(sb, ERRORS_RO)) {
-                printk (KERN_CRIT "Remounting filesystem read-only\n");
+                printk(KERN_CRIT "Remounting filesystem read-only\n");
                sb->s_flags |= MS_RDONLY;
        }
        ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
                        sb->s_id);
 }
-void ext4_error (struct super_block * sb, const char * function,
+void ext4_error(struct super_block *sb, const char *function,
-                 const char * fmt, ...)
+                const char *fmt, ...)
 {
        va_list args;
        va_start(args, fmt);
-        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
        vprintk(fmt, args);
        printk("\n");
        va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
        ext4_handle_error(sb);
 }
-static const char *ext4_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block *sb, int errno,
                                     char nbuf[16])
 {
        char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
 /* __ext4_std_error decodes expected errors from journaling functions
 * automatically and invokes the appropriate error response.  */
-void __ext4_std_error (struct super_block * sb, const char * function,
+void __ext4_std_error(struct super_block *sb, const char *function, int errno)
-                       int errno)
 {
        char nbuf[16];
        const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
                return;
        errstr = ext4_decode_error(sb, errno, nbuf);
-        printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
+        printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
-                sb->s_id, function, errstr);
+               sb->s_id, function, errstr);
        ext4_handle_error(sb);
 }
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
 * case we take the easy way out and panic immediately.
 */
-void ext4_abort (struct super_block * sb, const char * function,
+void ext4_abort(struct super_block *sb, const char *function,
-                 const char * fmt, ...)
+                const char *fmt, ...)
 {
        va_list args;
-        printk (KERN_CRIT "ext4_abort called.\n");
+        printk(KERN_CRIT "ext4_abort called.\n");
        va_start(args, fmt);
-        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
+        printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
        vprintk(fmt, args);
        printk("\n");
        va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
-void ext4_warning (struct super_block * sb, const char * function,
+void ext4_warning(struct super_block *sb, const char *function,
-                   const char * fmt, ...)
+                  const char *fmt, ...)
 {
        va_list args;
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
        }
 }
-static void ext4_put_super (struct super_block * sb)
+static void ext4_put_super(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
@@ -647,7 +645,8 @@ static void ext4_clear_inode(struct inode *inode)
                                       &EXT4_I(inode)->jinode);
 }
-static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
+static inline void ext4_show_quota_options(struct seq_file *seq,
+                                           struct super_block *sb)
 {
 #if defined(CONFIG_QUOTA)
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +821,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 }
 #ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
+#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
-#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 static int ext4_dquot_initialize(struct inode *inode, int type);
 static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +990,12 @@ static ext4_fsblk_t get_sb_block(void **data)
        return sb_block;
 }
-static int parse_options (char *options, struct super_block *sb,
+static int parse_options(char *options, struct super_block *sb,
-                          unsigned int *inum, unsigned long *journal_devnum,
+                         unsigned int *inum, unsigned long *journal_devnum,
-                          ext4_fsblk_t *n_blocks_count, int is_remount)
+                         ext4_fsblk_t *n_blocks_count, int is_remount)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        char * p;
+        char *p;
        substring_t args[MAX_OPT_ARGS];
        int data_opt = 0;
        int option;
@@ -1009,7 +1008,7 @@ static int parse_options (char *options, struct super_block *sb,
        if (!options)
                return 1;
-        while ((p = strsep (&options, ",")) != NULL) {
+        while ((p = strsep(&options, ",")) != NULL) {
                int token;
                if (!*p)
                        continue;
@@ -1017,16 +1016,16 @@ static int parse_options (char *options, struct super_block *sb,
                token = match_token(p, tokens, args);
                switch (token) {
                case Opt_bsd_df:
-                        clear_opt (sbi->s_mount_opt, MINIX_DF);
+                        clear_opt(sbi->s_mount_opt, MINIX_DF);
                        break;
                case Opt_minix_df:
-                        set_opt (sbi->s_mount_opt, MINIX_DF);
+                        set_opt(sbi->s_mount_opt, MINIX_DF);
                        break;
                case Opt_grpid:
-                        set_opt (sbi->s_mount_opt, GRPID);
+                        set_opt(sbi->s_mount_opt, GRPID);
                        break;
                case Opt_nogrpid:
-                        clear_opt (sbi->s_mount_opt, GRPID);
+                        clear_opt(sbi->s_mount_opt, GRPID);
                        break;
                case Opt_resuid:
                        if (match_int(&args[0], &option))
@@ -1043,41 +1042,41 @@ static int parse_options (char *options, struct super_block *sb,
                        /* *sb_block = match_int(&args[0]); */
                        break;
                case Opt_err_panic:
-                        clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+                        clear_opt(sbi->s_mount_opt, ERRORS_CONT);
-                        clear_opt (sbi->s_mount_opt, ERRORS_RO);
+                        clear_opt(sbi->s_mount_opt, ERRORS_RO);
-                        set_opt (sbi->s_mount_opt, ERRORS_PANIC);
+                        set_opt(sbi->s_mount_opt, ERRORS_PANIC);
                        break;
                case Opt_err_ro:
-                        clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+                        clear_opt(sbi->s_mount_opt, ERRORS_CONT);
-                        clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+                        clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
-                        set_opt (sbi->s_mount_opt, ERRORS_RO);
+                        set_opt(sbi->s_mount_opt, ERRORS_RO);
                        break;
                case Opt_err_cont:
-                        clear_opt (sbi->s_mount_opt, ERRORS_RO);
+                        clear_opt(sbi->s_mount_opt, ERRORS_RO);
-                        clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+                        clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
-                        set_opt (sbi->s_mount_opt, ERRORS_CONT);
+                        set_opt(sbi->s_mount_opt, ERRORS_CONT);
                        break;
                case Opt_nouid32:
-                        set_opt (sbi->s_mount_opt, NO_UID32);
+                        set_opt(sbi->s_mount_opt, NO_UID32);
                        break;
                case Opt_nocheck:
-                        clear_opt (sbi->s_mount_opt, CHECK);
+                        clear_opt(sbi->s_mount_opt, CHECK);
                        break;
                case Opt_debug:
-                        set_opt (sbi->s_mount_opt, DEBUG);
+                        set_opt(sbi->s_mount_opt, DEBUG);
                        break;
                case Opt_oldalloc:
-                        set_opt (sbi->s_mount_opt, OLDALLOC);
+                        set_opt(sbi->s_mount_opt, OLDALLOC);
                        break;
                case Opt_orlov:
-                        clear_opt (sbi->s_mount_opt, OLDALLOC);
+                        clear_opt(sbi->s_mount_opt, OLDALLOC);
                        break;
 #ifdef CONFIG_EXT4DEV_FS_XATTR
                case Opt_user_xattr:
-                        set_opt (sbi->s_mount_opt, XATTR_USER);
+                        set_opt(sbi->s_mount_opt, XATTR_USER);
                        break;
                case Opt_nouser_xattr:
-                        clear_opt (sbi->s_mount_opt, XATTR_USER);
+                        clear_opt(sbi->s_mount_opt, XATTR_USER);
                        break;
 #else
                case Opt_user_xattr:
@@ -1115,7 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
                                       "journal on remount\n");
                                return 0;
                        }
-                        set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
+                        set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
                        break;
                case Opt_journal_inum:
                        if (is_remount) {
@@ -1145,7 +1144,7 @@ static int parse_options (char *options, struct super_block *sb,
                        set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
                        break;
                case Opt_noload:
-                        set_opt (sbi->s_mount_opt, NOLOAD);
+                        set_opt(sbi->s_mount_opt, NOLOAD);
                        break;
                case Opt_commit:
                        if (match_int(&args[0], &option))
@@ -1331,7 +1330,7 @@ set_qf_format:
                                        "on this filesystem, use tune2fs\n");
                                return 0;
                        }
-                        set_opt (sbi->s_mount_opt, EXTENTS);
+                        set_opt(sbi->s_mount_opt, EXTENTS);
                        break;
                case Opt_noextents:
                        /*
@@ -1348,7 +1347,7 @@ set_qf_format:
                                                "-o noextents options\n");
                                return 0;
                        }
-                        clear_opt (sbi->s_mount_opt, EXTENTS);
+                        clear_opt(sbi->s_mount_opt, EXTENTS);
                        break;
                case Opt_i_version:
                        set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1373,9 @@ set_qf_format:
                        set_opt(sbi->s_mount_opt, DELALLOC);
                        break;
                default:
-                        printk (KERN_ERR
+                        printk(KERN_ERR
-                                "EXT4-fs: Unrecognized mount option \"%s\" "
+                               "EXT4-fs: Unrecognized mount option \"%s\" "
-                                "or missing value\n", p);
+                               "or missing value\n", p);
                        return 0;
                }
        }
@@ -1423,31 +1422,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
        int res = 0;
        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
-                printk (KERN_ERR "EXT4-fs warning: revision level too high, "
+                printk(KERN_ERR "EXT4-fs warning: revision level too high, "
-                        "forcing read-only mode\n");
+                       "forcing read-only mode\n");
                res = MS_RDONLY;
        }
        if (read_only)
                return res;
        if (!(sbi->s_mount_state & EXT4_VALID_FS))
-                printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
+                printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
-                        "running e2fsck is recommended\n");
+                       "running e2fsck is recommended\n");
        else if ((sbi->s_mount_state & EXT4_ERROR_FS))
-                printk (KERN_WARNING
+                printk(KERN_WARNING
-                        "EXT4-fs warning: mounting fs with errors, "
+                       "EXT4-fs warning: mounting fs with errors, "
-                        "running e2fsck is recommended\n");
+                       "running e2fsck is recommended\n");
        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
                 le16_to_cpu(es->s_mnt_count) >=
                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-                printk (KERN_WARNING
+                printk(KERN_WARNING
-                        "EXT4-fs warning: maximal mount count reached, "
+                       "EXT4-fs warning: maximal mount count reached, "
-                        "running e2fsck is recommended\n");
+                       "running e2fsck is recommended\n");
        else if (le32_to_cpu(es->s_checkinterval) &&
                (le32_to_cpu(es->s_lastcheck) +
                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
-                printk (KERN_WARNING
+                printk(KERN_WARNING
-                        "EXT4-fs warning: checktime reached, "
+                       "EXT4-fs warning: checktime reached, "
-                        "running e2fsck is recommended\n");
+                       "running e2fsck is recommended\n");
 #if 0
                /* @@@ We _will_ want to clear the valid bit if we find
                 * inconsistencies, to force a fsck at reboot.  But for
@@ -1506,14 +1505,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
        flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
                groups_per_flex;
-        sbi->s_flex_groups = kmalloc(flex_group_count *
+        sbi->s_flex_groups = kzalloc(flex_group_count *
                                     sizeof(struct flex_groups), GFP_KERNEL);
        if (sbi->s_flex_groups == NULL) {
-                printk(KERN_ERR "EXT4-fs: not enough memory\n");
+                printk(KERN_ERR "EXT4-fs: not enough memory for "
+                                "%lu flex groups\n", flex_group_count);
                goto failed;
        }
-        memset(sbi->s_flex_groups, 0, flex_group_count *
-               sizeof(struct flex_groups));
        gdp = ext4_get_group_desc(sb, 1, &bh);
        block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
@@ -1597,16 +1595,14 @@ static int ext4_check_descriptors(struct super_block *sb)
                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
                block_bitmap = ext4_block_bitmap(sb, gdp);
-                if (block_bitmap < first_block || block_bitmap > last_block)
+                if (block_bitmap < first_block || block_bitmap > last_block) {
-                {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                               "Block bitmap for group %lu not in group "
                               "(block %llu)!", i, block_bitmap);
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
-                if (inode_bitmap < first_block || inode_bitmap > last_block)
+                if (inode_bitmap < first_block || inode_bitmap > last_block) {
-                {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                               "Inode bitmap for group %lu not in group "
                               "(block %llu)!", i, inode_bitmap);
@@ -1614,26 +1610,28 @@ static int ext4_check_descriptors(struct super_block *sb)
                }
                inode_table = ext4_inode_table(sb, gdp);
                if (inode_table < first_block ||
-                    inode_table + sbi->s_itb_per_group - 1 > last_block)
+                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
-                {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                               "Inode table for group %lu not in group "
                               "(block %llu)!", i, inode_table);
                        return 0;
                }
+                spin_lock(sb_bgl_lock(sbi, i));
                if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
                               "Checksum for group %lu failed (%u!=%u)\n",
                               i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
                               gdp)), le16_to_cpu(gdp->bg_checksum));
-                        return 0;
+                        if (!(sb->s_flags & MS_RDONLY))
+                                return 0;
                }
+                spin_unlock(sb_bgl_lock(sbi, i));
                if (!flexbg_flag)
                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
        }
        ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
-        sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
+        sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
        return 1;
 }
@@ -1654,8 +1652,8 @@ static int ext4_check_descriptors(struct super_block *sb)
 * e2fsck was run on this filesystem, and it must have already done the orphan
 * inode cleanup for us, so we can safely abort without any further action.
 */
-static void ext4_orphan_cleanup (struct super_block * sb,
+static void ext4_orphan_cleanup(struct super_block *sb,
-                                 struct ext4_super_block * es)
+                                struct ext4_super_block *es)
 {
        unsigned int s_flags = sb->s_flags;
        int nr_orphans = 0, nr_truncates = 0;
@@ -1732,7 +1730,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
                iput(inode);  /* The delete magic happens here! */
        }
-#define PLURAL(x) (x), ((x)==1) ? "" : "s"
+#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
        if (nr_orphans)
                printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1899,12 +1897,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
        return 0;
 }
-static int ext4_fill_super (struct super_block *sb, void *data, int silent)
+static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                __releases(kernel_lock)
                                __acquires(kernel_lock)
 {
-        struct buffer_head * bh;
+        struct buffer_head *bh;
        struct ext4_super_block *es = NULL;
        struct ext4_sb_info *sbi;
        ext4_fsblk_t block;
@@ -1953,7 +1951,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        }
        if (!(bh = sb_bread(sb, logical_sb_block))) {
-                printk (KERN_ERR "EXT4-fs: unable to read superblock\n");
+                printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
                goto out_fail;
        }
        /*
@@ -2026,8 +2024,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        set_opt(sbi->s_mount_opt, DELALLOC);
-        if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
+        if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
-                            NULL, 0))
+                           NULL, 0))
                goto failed_mount;
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2102,7 +2100,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                        goto failed_mount;
                }
-                brelse (bh);
+                brelse(bh);
                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
                offset = do_div(logical_sb_block, blocksize);
                bh = sb_bread(sb, logical_sb_block);
@@ -2114,8 +2112,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
                sbi->s_es = es;
                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
-                        printk (KERN_ERR
+                        printk(KERN_ERR
-                                "EXT4-fs: Magic mismatch, very weird !\n");
+                               "EXT4-fs: Magic mismatch, very weird !\n");
                        goto failed_mount;
                }
        }
@@ -2132,9 +2130,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
                    (!is_power_of_2(sbi->s_inode_size)) ||
                    (sbi->s_inode_size > blocksize)) {
-                        printk (KERN_ERR
+                        printk(KERN_ERR
-                                "EXT4-fs: unsupported inode size: %d\n",
+                               "EXT4-fs: unsupported inode size: %d\n",
-                                sbi->s_inode_size);
+                               sbi->s_inode_size);
                        goto failed_mount;
                }
                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2166,20 +2164,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        sbi->s_mount_state = le16_to_cpu(es->s_state);
        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
-        for (i=0; i < 4; i++)
+        for (i = 0; i < 4; i++)
                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
        sbi->s_def_hash_version = es->s_def_hash_version;
        if (sbi->s_blocks_per_group > blocksize * 8) {
-                printk (KERN_ERR
+                printk(KERN_ERR
-                        "EXT4-fs: #blocks per group too big: %lu\n",
+                       "EXT4-fs: #blocks per group too big: %lu\n",
-                        sbi->s_blocks_per_group);
+                       sbi->s_blocks_per_group);
                goto failed_mount;
        }
        if (sbi->s_inodes_per_group > blocksize * 8) {
-                printk (KERN_ERR
+                printk(KERN_ERR
-                        "EXT4-fs: #inodes per group too big: %lu\n",
+                       "EXT4-fs: #inodes per group too big: %lu\n",
-                        sbi->s_inodes_per_group);
+                       sbi->s_inodes_per_group);
                goto failed_mount;
        }
@@ -2213,10 +2211,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        sbi->s_groups_count = blocks_count;
        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
                   EXT4_DESC_PER_BLOCK(sb);
-        sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
+        sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
                                    GFP_KERNEL);
        if (sbi->s_group_desc == NULL) {
-                printk (KERN_ERR "EXT4-fs: not enough memory\n");
+                printk(KERN_ERR "EXT4-fs: not enough memory\n");
                goto failed_mount;
        }
@@ -2226,13 +2224,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                block = descriptor_loc(sb, logical_sb_block, i);
                sbi->s_group_desc[i] = sb_bread(sb, block);
                if (!sbi->s_group_desc[i]) {
-                        printk (KERN_ERR "EXT4-fs: "
+                        printk(KERN_ERR "EXT4-fs: "
-                                "can't read group descriptor %d\n", i);
+                               "can't read group descriptor %d\n", i);
                        db_count = i;
                        goto failed_mount2;
                }
        }
-        if (!ext4_check_descriptors (sb)) {
+        if (!ext4_check_descriptors(sb)) {
                printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
                goto failed_mount2;
        }
@@ -2308,11 +2306,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                    EXT4_SB(sb)->s_journal->j_failed_commit) {
                        printk(KERN_CRIT "EXT4-fs error (device %s): "
                               "ext4_fill_super: Journal transaction "
-                               "%u is corrupt\n", sb->s_id, 
+                               "%u is corrupt\n", sb->s_id,
                               EXT4_SB(sb)->s_journal->j_failed_commit);
-                        if (test_opt (sb, ERRORS_RO)) {
+                        if (test_opt(sb, ERRORS_RO)) {
-                                printk (KERN_CRIT
+                                printk(KERN_CRIT
-                                        "Mounting filesystem read-only\n");
+                                       "Mounting filesystem read-only\n");
                                sb->s_flags |= MS_RDONLY;
                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2332,9 +2330,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                        goto failed_mount3;
        } else {
                if (!silent)
-                        printk (KERN_ERR
+                        printk(KERN_ERR
-                                "ext4: No journal on filesystem on %s\n",
+                               "ext4: No journal on filesystem on %s\n",
-                                sb->s_id);
+                               sb->s_id);
                goto failed_mount3;
        }
@@ -2418,7 +2416,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
                goto failed_mount4;
        }
-        ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+        ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
        /* determine the minimum size of new large inodes, if present */
        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2457,12 +2455,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        ext4_orphan_cleanup(sb, es);
        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
        if (needs_recovery)
-                printk (KERN_INFO "EXT4-fs: recovery complete.\n");
+                printk(KERN_INFO "EXT4-fs: recovery complete.\n");
        ext4_mark_recovery_complete(sb, es);
-        printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+        printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
-                test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+               test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
-                test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
+               test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
-                "writeback");
+               "writeback");
        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
                printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2575,14 +2573,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
 static journal_t *ext4_get_dev_journal(struct super_block *sb,
                                       dev_t j_dev)
 {
-        struct buffer_head * bh;
+        struct buffer_head *bh;
        journal_t *journal;
        ext4_fsblk_t start;
        ext4_fsblk_t len;
        int hblock, blocksize;
        ext4_fsblk_t sb_block;
        unsigned long offset;
-        struct ext4_super_block * es;
+        struct ext4_super_block *es;
        struct block_device *bdev;
        bdev = ext4_blkdev_get(j_dev);
@@ -2697,8 +2695,8 @@ static int ext4_load_journal(struct super_block *sb,
                                        "unavailable, cannot proceed.\n");
                                return -EROFS;
                        }
-                        printk (KERN_INFO "EXT4-fs: write access will "
+                        printk(KERN_INFO "EXT4-fs: write access will "
-                                        "be enabled during recovery.\n");
+                               "be enabled during recovery.\n");
                }
        }
@@ -2751,8 +2749,8 @@ static int ext4_load_journal(struct super_block *sb,
        return 0;
 }
-static int ext4_create_journal(struct super_block * sb,
+static int ext4_create_journal(struct super_block *sb,
-                               struct ext4_super_block * es,
+                               struct ext4_super_block *es,
                               unsigned int journal_inum)
 {
        journal_t *journal;
@@ -2793,9 +2791,8 @@ static int ext4_create_journal(struct super_block * sb,
        return 0;
 }
-static void ext4_commit_super (struct super_block * sb,
+static void ext4_commit_super(struct super_block *sb,
-                               struct ext4_super_block * es,
+                              struct ext4_super_block *es, int sync)
-                               int sync)
 {
        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
@@ -2816,8 +2813,8 @@ static void ext4_commit_super (struct super_block * sb,
 * remounting) the filesystem readonly, then we will end up with a
 * consistent fs on disk.  Record that fact.
 */
-static void ext4_mark_recovery_complete(struct super_block * sb,
+static void ext4_mark_recovery_complete(struct super_block *sb,
-                                        struct ext4_super_block * es)
+                                        struct ext4_super_block *es)
 {
        journal_t *journal = EXT4_SB(sb)->s_journal;
@@ -2839,8 +2836,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
 * has recorded an error from a previous lifetime, move that error to the
 * main filesystem now.
 */
-static void ext4_clear_journal_err(struct super_block * sb,
+static void ext4_clear_journal_err(struct super_block *sb,
-                                   struct ext4_super_block * es)
+                                   struct ext4_super_block *es)
 {
        journal_t *journal;
        int j_errno;
@@ -2865,7 +2862,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-                ext4_commit_super (sb, es, 1);
+                ext4_commit_super(sb, es, 1);
                jbd2_journal_clear_err(journal);
        }
@@ -2898,7 +2895,7 @@ int ext4_force_commit(struct super_block *sb)
 * This implicitly triggers the writebehind on sync().
 */
-static void ext4_write_super (struct super_block * sb)
+static void ext4_write_super(struct super_block *sb)
 {
        if (mutex_trylock(&sb->s_lock) != 0)
                BUG();
@@ -2954,13 +2951,14 @@ static void ext4_unlockfs(struct super_block *sb)
        }
 }
-static int ext4_remount (struct super_block * sb, int * flags, char * data)
+static int ext4_remount(struct super_block *sb, int *flags, char *data)
 {
-        struct ext4_super_block * es;
+        struct ext4_super_block *es;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        ext4_fsblk_t n_blocks_count = 0;
        unsigned long old_sb_flags;
        struct ext4_mount_options old_opts;
+        ext4_group_t g;
        int err;
 #ifdef CONFIG_QUOTA
        int i;
@@ -3039,6 +3037,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
                        }
                        /*
+                         * Make sure the group descriptor checksums
+                         * are sane.  If they aren't, refuse to
+                         * remount r/w.
+                         */
+                        for (g = 0; g < sbi->s_groups_count; g++) {
+                                struct ext4_group_desc *gdp =
+                                        ext4_get_group_desc(sb, g, NULL);
+                                if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
+                                        printk(KERN_ERR
+               "EXT4-fs: ext4_remount: "
+                "Checksum for group %lu failed (%u!=%u)\n",
+                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
+                                               le16_to_cpu(gdp->bg_checksum));
+                                        err = -EINVAL;
+                                        goto restore_opts;
+                                }
+                        }
+                        /*
                         * If we have an unprocessed orphan list hanging
                         * around from a previously readonly bdev mount,
                         * require a full umount/remount for now.
@@ -3063,7 +3081,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
                        sbi->s_mount_state = le16_to_cpu(es->s_state);
                        if ((err = ext4_group_extend(sb, es, n_blocks_count)))
                                goto restore_opts;
-                        if (!ext4_setup_super (sb, es, 0))
+                        if (!ext4_setup_super(sb, es, 0))
                                sb->s_flags &= ~MS_RDONLY;
                }
        }
@@ -3093,7 +3111,7 @@ restore_opts:
        return err;
 }
-static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
+static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3331,12 +3349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
        }
        /* Journaling quota? */
        if (EXT4_SB(sb)->s_qf_names[type]) {
-                /* Quotafile not of fs root? */
+                /* Quotafile not in fs root? */
                if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
                        printk(KERN_WARNING
                                "EXT4-fs: Quota file not on filesystem root. "
                                "Journaled quota will not work.\n");
-        }
+        }
        /*
         * When we journal data on quota file, we have to flush journal to see
@@ -3352,8 +3370,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
        }
+        err = vfs_quota_on_path(sb, type, format_id, &nd.path);
        path_put(&nd.path);
-        return vfs_quota_on(sb, type, format_id, path, remount);
+        return err;
 }
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2e..8954208b4893 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
        char *name = entry->e_name;
        int n;
-        for (n=0; n < entry->e_name_len; n++) {
+        for (n = 0; n < entry->e_name_len; n++) {
                hash = (hash << NAME_HASH_SHIFT) ^
                       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
                       *name++;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8707a8cfa02c..ddde37025ca6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -313,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
        return 0;
 }
+#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
 int fat_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -336,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
        /* Check for setting the inode time. */
        ia_valid = attr->ia_valid;
-        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
+        if (ia_valid & TIMES_SET_FLAGS) {
                if (fat_allow_set_time(sbi, inode))
-                        attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+                        attr->ia_valid &= ~TIMES_SET_FLAGS;
        }
        error = inode_change_ok(inode, attr);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 61d625136813..ac4f7db9f134 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd)
        return res;
 }
-/*
- * locate_fd finds a free file descriptor in the open_fds fdset,
- * expanding the fd arrays if necessary.  Must be called with the
- * file_lock held for write.
- */
-static int locate_fd(unsigned int orig_start, int cloexec)
-{
-        struct files_struct *files = current->files;
-        unsigned int newfd;
-        unsigned int start;
-        int error;
-        struct fdtable *fdt;
-        spin_lock(&files->file_lock);
-repeat:
-        fdt = files_fdtable(files);
-        /*
-         * Someone might have closed fd's in the range
-         * orig_start..fdt->next_fd
-         */
-        start = orig_start;
-        if (start < files->next_fd)
-                start = files->next_fd;
-        newfd = start;
-        if (start < fdt->max_fds)
-                newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
-                                           fdt->max_fds, start);
-        error = expand_files(files, newfd);
-        if (error < 0)
-                goto out;
-        /*
-         * If we needed to expand the fs array we
-         * might have blocked - try again.
-         */
-        if (error)
-                goto repeat;
-        if (start <= files->next_fd)
-                files->next_fd = newfd + 1;
-        FD_SET(newfd, fdt->open_fds);
-        if (cloexec)
-                FD_SET(newfd, fdt->close_on_exec);
-        else
-                FD_CLR(newfd, fdt->close_on_exec);
-        error = newfd;
-out:
-        spin_unlock(&files->file_lock);
-        return error;
-}
-static int dupfd(struct file *file, unsigned int start, int cloexec)
-{
-        int fd = locate_fd(start, cloexec);
-        if (fd >= 0)
-                fd_install(fd, file);
-        else
-                fput(file);
-        return fd;
-}
 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
 {
        int err = -EBADF;
@@ -130,31 +63,35 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
                return -EINVAL;
        spin_lock(&files->file_lock);
-        if (!(file = fcheck(oldfd)))
-                goto out_unlock;
-        get_file(file);                 /* We are now finished with oldfd */
        err = expand_files(files, newfd);
+        file = fcheck(oldfd);
+        if (unlikely(!file))
+                goto Ebadf;
        if (unlikely(err < 0)) {
                if (err == -EMFILE)
-                        err = -EBADF;
+                        goto Ebadf;
-                goto out_fput;
+                goto out_unlock;
        }
+        /*
-        /* To avoid races with open() and dup(), we will mark the fd as
+         * We need to detect attempts to do dup2() over allocated but still
-         * in-use in the open-file bitmap throughout the entire dup2()
+         * not finished descriptor.  NB: OpenBSD avoids that at the price of
-         * process.  This is quite safe: do_close() uses the fd array
+         * extra work in their equivalent of fget() - they insert struct
-         * entry, not the bitmap, to decide what work needs to be
+         * file immediately after grabbing descriptor, mark it larval if
-         * done.  --sct */
+         * more work (e.g. actual opening) is needed and make sure that
-        /* Doesn't work. open() might be there first. --AV */
+         * fget() treats larval files as absent.  Potentially interesting,
+         * but while extra work in fget() is trivial, locking implications
-        /* Yes. It's a race. In user space. Nothing sane to do */
+         * and amount of surgery on open()-related paths in VFS are not.
+         * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
+         * deadlocks in rather amusing ways, AFAICS.  All of that is out of
+         * scope of POSIX or SUS, since neither considers shared descriptor
+         * tables and this condition does not arise without those.
+         */
        err = -EBUSY;
        fdt = files_fdtable(files);
        tofree = fdt->fd[newfd];
        if (!tofree && FD_ISSET(newfd, fdt->open_fds))
-                goto out_fput;
+                goto out_unlock;
+        get_file(file);
        rcu_assign_pointer(fdt->fd[newfd], file);
        FD_SET(newfd, fdt->open_fds);
        if (flags & O_CLOEXEC)
@@ -165,17 +102,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
        if (tofree)
                filp_close(tofree, files);
-        err = newfd;
-out:
-        return err;
-out_unlock:
-        spin_unlock(&files->file_lock);
-        goto out;
-out_fput:
+        return newfd;
+Ebadf:
+        err = -EBADF;
+out_unlock:
        spin_unlock(&files->file_lock);
-        fput(file);
+        return err;
-        goto out;
 }
 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
@@ -194,10 +128,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 asmlinkage long sys_dup(unsigned int fildes)
 {
        int ret = -EBADF;
-        struct file * file = fget(fildes);
+        struct file *file = fget(fildes);
-        if (file)
+        if (file) {
-                ret = dupfd(file, 0, 0);
+                ret = get_unused_fd();
+                if (ret >= 0)
+                        fd_install(ret, file);
+                else
+                        fput(file);
+        }
        return ret;
 }
@@ -322,8 +261,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
        case F_DUPFD_CLOEXEC:
                if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
                        break;
-                get_file(filp);
+                err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
-                err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
+                if (err >= 0) {
+                        get_file(filp);
+                        fd_install(err, filp);
+                }
                break;
        case F_GETFD:
                err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index d8773b19fe47..f313314f996f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
 *  Manage the dynamic fd arrays in the process files_struct.
 */
+#include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/time.h>
@@ -432,3 +433,63 @@ struct files_struct init_files = {
        },
        .file_lock      = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
+/*
+ * allocate a file descriptor, mark it busy.
+ */
+int alloc_fd(unsigned start, unsigned flags)
+{
+        struct files_struct *files = current->files;
+        unsigned int fd;
+        int error;
+        struct fdtable *fdt;
+        spin_lock(&files->file_lock);
+repeat:
+        fdt = files_fdtable(files);
+        fd = start;
+        if (fd < files->next_fd)
+                fd = files->next_fd;
+        if (fd < fdt->max_fds)
+                fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+                                           fdt->max_fds, fd);
+        error = expand_files(files, fd);
+        if (error < 0)
+                goto out;
+        /*
+         * If we needed to expand the fs array we
+         * might have blocked - try again.
+         */
+        if (error)
+                goto repeat;
+        if (start <= files->next_fd)
+                files->next_fd = fd + 1;
+        FD_SET(fd, fdt->open_fds);
+        if (flags & O_CLOEXEC)
+                FD_SET(fd, fdt->close_on_exec);
+        else
+                FD_CLR(fd, fdt->close_on_exec);
+        error = fd;
+#if 1
+        /* Sanity check */
+        if (rcu_dereference(fdt->fd[fd]) != NULL) {
+                printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
+                rcu_assign_pointer(fdt->fd[fd], NULL);
+        }
+#endif
+out:
+        spin_unlock(&files->file_lock);
+        return error;
+}
+int get_unused_fd(void)
+{
+        return alloc_fd(0, 0);
+}
+EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 2eccbfaa1d48..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
                goto nope;
        /* OK, it's a truncated page */
-        if (TestSetPageLocked(page))
+        if (!trylock_page(page))
                goto nope;
        page_cache_get(page);
@@ -221,7 +221,7 @@ write_out_data:
                 * blocking lock_buffer().
                 */
                if (buffer_dirty(bh)) {
-                        if (test_set_buffer_locked(bh)) {
+                        if (!trylock_buffer(bh)) {
                                BUFFER_TRACE(bh, "needs blocking lock");
                                spin_unlock(&journal->j_list_lock);
                                /* Write out all data to prevent deadlocks */
@@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal)
                        spin_lock(&journal->j_list_lock);
                }
                if (unlikely(!buffer_uptodate(bh))) {
-                        if (TestSetPageLocked(bh->b_page)) {
+                        if (!trylock_page(bh->b_page)) {
                                spin_unlock(&journal->j_list_lock);
                                lock_page(bh->b_page);
                                spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 8dee32007500..0540ca27a446 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
                goto out;
        }
-        lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+        lock_map_acquire(&handle->h_lockdep_map);
 out:
        return handle;
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle)
                spin_unlock(&journal->j_state_lock);
        }
-        lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+        lock_map_release(&handle->h_lockdep_map);
        jbd_free_handle(handle);
        return err;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be873226..f2ad061e95ec 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
                goto nope;
        /* OK, it's a truncated page */
-        if (TestSetPageLocked(page))
+        if (!trylock_page(page))
                goto nope;
        page_cache_get(page);
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
                jinode->i_flags |= JI_COMMIT_RUNNING;
                spin_unlock(&journal->j_list_lock);
                err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
-                if (!ret)
+                if (err) {
-                        ret = err;
+                        /*
+                         * Because AS_EIO is cleared by
+                         * wait_on_page_writeback_range(), set it again so
+                         * that user process can get -EIO from fsync().
+                         */
+                        set_bit(AS_EIO,
+                                &jinode->i_vfs_inode->i_mapping->flags);
+                        if (!ret)
+                                ret = err;
+                }
                spin_lock(&journal->j_list_lock);
                jinode->i_flags &= ~JI_COMMIT_RUNNING;
                wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
         * commit block, which happens below in such setting.
         */
        err = journal_finish_inode_data_buffers(journal, commit_transaction);
-        if (err)
+        if (err) {
-                jbd2_journal_abort(journal, err);
+                char b[BDEVNAME_SIZE];
+                printk(KERN_WARNING
+                        "JBD2: Detected IO errors while flushing file data "
+                        "on %s\n", bdevname(journal->j_fs_dev, b));
+                err = 0;
+        }
        /* Lo and behold: we have just managed to send a transaction to
           the log.  Before we can commit it, wait for the IO so far to
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6ae..8207a01c4edb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
 EXPORT_SYMBOL(jbd2_journal_create);
 EXPORT_SYMBOL(jbd2_journal_load);
 EXPORT_SYMBOL(jbd2_journal_destroy);
-EXPORT_SYMBOL(jbd2_journal_update_superblock);
 EXPORT_SYMBOL(jbd2_journal_abort);
 EXPORT_SYMBOL(jbd2_journal_errno);
 EXPORT_SYMBOL(jbd2_journal_ack_err);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f7cadbb19fa..e5d540588fa9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
                goto out;
        }
-        lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+        lock_map_acquire(&handle->h_lockdep_map);
 out:
        return handle;
 }
@@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle)
                spin_unlock(&journal->j_state_lock);
        }
-        lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+        lock_map_release(&handle->h_lockdep_map);
        jbd2_free_handle(handle);
        return err;
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 629af01e5ade..6caf1e1ee26d 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,6 +23,8 @@
 int jffs2_sum_init(struct jffs2_sb_info *c)
 {
+        uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
        c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
        if (!c->summary) {
@@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
                return -ENOMEM;
        }
-        c->summary->sum_buf = vmalloc(c->sector_size);
+        c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL);
        if (!c->summary->sum_buf) {
                JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n");
@@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c)
        jffs2_sum_disable_collecting(c->summary);
-        vfree(c->summary->sum_buf);
+        kfree(c->summary->sum_buf);
        c->summary->sum_buf = NULL;
        kfree(c->summary);
@@ -665,7 +667,7 @@ crc_err:
 /* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */
 static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
-                                        uint32_t infosize, uint32_t datasize, int padsize)
+                                uint32_t infosize, uint32_t datasize, int padsize)
 {
        struct jffs2_raw_summary isum;
        union jffs2_sum_mem *temp;
@@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
        int ret;
        size_t retlen;
+        if (padsize + datasize > MAX_SUMMARY_SIZE) {
+                /* It won't fit in the buffer. Abort summary for this jeb */
+                jffs2_sum_disable_collecting(c->summary);
+                JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n",
+                              datasize, padsize, jeb->offset);
+                /* Non-fatal */
+                return 0;
+        }
+        /* Is there enough space for summary? */
+        if (padsize < 0) {
+                /* don't try to write out summary for this jeb */
+                jffs2_sum_disable_collecting(c->summary);
+                JFFS2_WARNING("Not enough space for summary, padsize = %d\n",
+                              padsize);
+                /* Non-fatal */
+                return 0;
+        }
        memset(c->summary->sum_buf, 0xff, datasize);
        memset(&isum, 0, sizeof(isum));
@@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
 {
        int datasize, infosize, padsize;
        struct jffs2_eraseblock *jeb;
-        int ret;
+        int ret = 0;
        dbg_summary("called\n");
@@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
        infosize += padsize;
        datasize += padsize;
-        /* Is there enough space for summary? */
-        if (padsize < 0) {
-                /* don't try to write out summary for this jeb */
-                jffs2_sum_disable_collecting(c->summary);
-                JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
-                spin_lock(&c->erase_completion_lock);
-                return 0;
-        }
        ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
        spin_lock(&c->erase_completion_lock);
        return ret;
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 8bf34f2fa5ce..60207a2ae952 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -13,6 +13,12 @@
 #ifndef JFFS2_SUMMARY_H
 #define JFFS2_SUMMARY_H
+/* Limit summary size to 64KiB so that we can kmalloc it. If the summary
+   is larger than that, we have to just ditch it and avoid using summary
+   for the eraseblock in question... and it probably doesn't hurt us much
+   anyway. */
+#define MAX_SUMMARY_SIZE 65536
 #include <linux/uio.h>
 #include <linux/jffs2.h>
diff --git a/fs/libfs.c b/fs/libfs.c
index baeb71ee1cde..1add676a19df 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
        s->s_flags = MS_NOUSER;
        s->s_maxbytes = ~0ULL;
-        s->s_blocksize = 1024;
+        s->s_blocksize = PAGE_SIZE;
-        s->s_blocksize_bits = 10;
+        s->s_blocksize_bits = PAGE_SHIFT;
        s->s_magic = magic;
        s->s_op = ops ? ops : &simple_super_operations;
        s->s_time_gran = 1;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 399444639337..4a714f64515b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 {
        struct nlm_host *host;
        struct nlm_file *file;
-        int rc = rpc_success;
+        __be32 rc = rpc_success;
        dprintk("lockd: TEST4        called\n");
        resp->cookie = argp->cookie;
@@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 {
        struct nlm_host *host;
        struct nlm_file *file;
-        int rc = rpc_success;
+        __be32 rc = rpc_success;
        dprintk("lockd: LOCK          called\n");
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 76019d2ff72d..76262c1986f2 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 {
        struct nlm_host *host;
        struct nlm_file *file;
-        int rc = rpc_success;
+        __be32 rc = rpc_success;
        dprintk("lockd: TEST          called\n");
        resp->cookie = argp->cookie;
@@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 {
        struct nlm_host *host;
        struct nlm_file *file;
-        int rc = rpc_success;
+        __be32 rc = rpc_success;
        dprintk("lockd: LOCK          called\n");
diff --git a/fs/namei.c b/fs/namei.c
index a7b0a0b80128..4ea63ed5e791 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -274,7 +274,7 @@ int inode_permission(struct inode *inode, int mask)
                return retval;
        return security_inode_permission(inode,
-                        mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
+                        mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
 }
 /**
@@ -1431,8 +1431,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 *  3. We should have write and exec permissions on dir
 *  4. We can't do it if dir is immutable (done in permission())
 */
-static inline int may_create(struct inode *dir, struct dentry *child,
+static inline int may_create(struct inode *dir, struct dentry *child)
-                             struct nameidata *nd)
 {
        if (child->d_inode)
                return -EEXIST;
@@ -1504,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
                struct nameidata *nd)
 {
-        int error = may_create(dir, dentry, nd);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -1948,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 {
-        int error = may_create(dir, dentry, NULL);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -2049,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-        int error = may_create(dir, dentry, NULL);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -2316,7 +2315,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 {
-        int error = may_create(dir, dentry, NULL);
+        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -2386,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        if (!inode)
                return -ENOENT;
-        error = may_create(dir, new_dentry, NULL);
+        error = may_create(dir, new_dentry);
        if (error)
                return error;
@@ -2595,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                return error;
        if (!new_dentry->d_inode)
-                error = may_create(new_dir, new_dentry, NULL);
+                error = may_create(new_dir, new_dentry);
        else
                error = may_delete(new_dir, new_dentry, is_dir);
        if (error)
diff --git a/fs/namespace.c b/fs/namespace.c
index 411728c0c8bb..6e283c93b50d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1667,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
        if (IS_ERR(mnt))
                return PTR_ERR(mnt);
-        return do_add_mount(mnt, nd, mnt_flags, NULL);
+        return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
 }
 /*
 * add a mount into a namespace's mount tree
 * - provide the option of adding the new mount to an expiration list
 */
-int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
+int do_add_mount(struct vfsmount *newmnt, struct path *path,
                 int mnt_flags, struct list_head *fslist)
 {
        int err;
        down_write(&namespace_sem);
        /* Something was mounted here while we slept */
-        while (d_mountpoint(nd->path.dentry) &&
+        while (d_mountpoint(path->dentry) &&
-               follow_down(&nd->path.mnt, &nd->path.dentry))
+               follow_down(&path->mnt, &path->dentry))
                ;
        err = -EINVAL;
-        if (!check_mnt(nd->path.mnt))
+        if (!check_mnt(path->mnt))
                goto unlock;
        /* Refuse the same filesystem on the same mount point */
        err = -EBUSY;
-        if (nd->path.mnt->mnt_sb == newmnt->mnt_sb &&
+        if (path->mnt->mnt_sb == newmnt->mnt_sb &&
-            nd->path.mnt->mnt_root == nd->path.dentry)
+            path->mnt->mnt_root == path->dentry)
                goto unlock;
        err = -EINVAL;
@@ -1699,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
                goto unlock;
        newmnt->mnt_flags = mnt_flags;
-        if ((err = graft_tree(newmnt, &nd->path)))
+        if ((err = graft_tree(newmnt, path)))
                goto unlock;
        if (fslist) /* add to the specified expiration list */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef76399..66df08dd1caf 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
                goto out_err;
        mntget(mnt);
-        err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
+        err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
                           &nfs_automount_list);
        if (err < 0) {
                mntput(mnt);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8478fc25daee..46763d1cd397 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
        Opt_err
 };
-static match_table_t __initconst tokens = {
+static match_table_t __initdata tokens = {
        {Opt_port, "port=%u"},
        {Opt_rsize, "rsize=%u"},
        {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 33bfcf09db46..9dc036f18356 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp)
        /* Look up the dentry */
        err = path_lookup(nxp->ex_path, 0, &nd);
        if (err)
-                goto out_unlock;
+                goto out_put_clp;
        err = -EINVAL;
        exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL);
@@ -1090,9 +1090,9 @@ finish:
                exp_put(exp);
        if (fsid_key && !IS_ERR(fsid_key))
                cache_put(&fsid_key->h, &svc_expkey_cache);
-        if (clp)
-                auth_domain_put(clp);
        path_put(&nd.path);
+out_put_clp:
+        auth_domain_put(clp);
 out_unlock:
        exp_writeunlock();
 out:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eef1629806f5..2e51adac65de 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,7 +851,7 @@ struct nfsd4_operation {
 static struct nfsd4_operation nfsd4_ops[];
-static inline char *nfsd4_op_name(unsigned opnum);
+static const char *nfsd4_op_name(unsigned opnum);
 /*
 * COMPOUND call.
@@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
        },
 };
-static inline char *
+static const char *nfsd4_op_name(unsigned opnum)
-nfsd4_op_name(unsigned opnum)
 {
        if (opnum < ARRAY_SIZE(nfsd4_ops))
                return nfsd4_ops[opnum].op_name;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e42..b38f944f0667 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
                tbh = bhs[i];
                if (!tbh)
                        continue;
-                if (unlikely(test_set_buffer_locked(tbh)))
+                if (!trylock_buffer(tbh))
                        BUG();
                /* The buffer dirty state is now irrelevant, just clean it. */
                clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc507..9669541d0119 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
        for (i = 0; i < nr_bhs; i++) {
                struct buffer_head *tbh = bhs[i];
-                if (unlikely(test_set_buffer_locked(tbh)))
+                if (!trylock_buffer(tbh))
                        continue;
                if (unlikely(buffer_uptodate(tbh))) {
                        unlock_buffer(tbh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e7..17d32ca6bc35 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
                for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
                        struct buffer_head *tbh = bhs[i_bhs];
-                        if (unlikely(test_set_buffer_locked(tbh)))
+                        if (!trylock_buffer(tbh))
                                BUG();
                        BUG_ON(!buffer_uptodate(tbh));
                        clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
        for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
                struct buffer_head *tbh = bhs[i_bhs];
-                if (unlikely(test_set_buffer_locked(tbh)))
+                if (!trylock_buffer(tbh))
                        BUG();
                BUG_ON(!buffer_uptodate(tbh));
                clear_buffer_dirty(tbh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6d..506c24fb5078 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
        for(i = 0; i < wc->w_num_pages; i++) {
                tmppage = wc->w_pages[i];
-                if (ocfs2_should_order_data(inode))
+                if (page_has_buffers(tmppage)) {
-                        walk_page_buffers(wc->w_handle, page_buffers(tmppage),
+                        if (ocfs2_should_order_data(inode))
-                                          from, to, NULL,
+                                walk_page_buffers(wc->w_handle,
-                                          ocfs2_journal_dirty_data);
+                                                  page_buffers(tmppage),
+                                                  from, to, NULL,
-                block_commit_write(tmppage, from, to);
+                                                  ocfs2_journal_dirty_data);
+                        block_commit_write(tmppage, from, to);
+                }
        }
 }
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
                        to = PAGE_CACHE_SIZE;
                }
-                if (ocfs2_should_order_data(inode))
+                if (page_has_buffers(tmppage)) {
-                        walk_page_buffers(wc->w_handle, page_buffers(tmppage),
+                        if (ocfs2_should_order_data(inode))
-                                          from, to, NULL,
+                                walk_page_buffers(wc->w_handle,
-                                          ocfs2_journal_dirty_data);
+                                                  page_buffers(tmppage),
+                                                  from, to, NULL,
-                block_commit_write(tmppage, from, to);
+                                                  ocfs2_journal_dirty_data);
+                        block_commit_write(tmppage, from, to);
+                }
        }
 out_write_size:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index be2dd95d3a1d..ec2ed15c3daa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1766,8 +1766,8 @@ out_inode_unlock:
 out_rw_unlock:
        ocfs2_rw_unlock(inode, 1);
-        mutex_unlock(&inode->i_mutex);
 out:
+        mutex_unlock(&inode->i_mutex);
        return ret;
 }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfdd..7a37240f7a31 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
 static int ocfs2_commit_cache(struct ocfs2_super *osb);
 static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
-                                      int dirty);
+                                      int dirty, int replayed);
 static int ocfs2_trylock_journal(struct ocfs2_super *osb,
                                 int slot_num);
 static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
        return status;
 }
+static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
+{
+        le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
+}
+static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
+{
+        return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
+}
 static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
-                                      int dirty)
+                                      int dirty, int replayed)
 {
        int status;
        unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
                flags &= ~OCFS2_JOURNAL_DIRTY_FL;
        fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+        if (replayed)
+                ocfs2_bump_recovery_generation(fe);
        status = ocfs2_write_block(osb, bh, journal->j_inode);
        if (status < 0)
                mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
                 * Do not toggle if flush was unsuccessful otherwise
                 * will leave dirty metadata in a "clean" journal
                 */
-                status = ocfs2_journal_toggle_dirty(osb, 0);
+                status = ocfs2_journal_toggle_dirty(osb, 0, 0);
                if (status < 0)
                        mlog_errno(status);
        }
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
        }
 }
-int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
+int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
 {
        int status = 0;
        struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
        ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
-        status = ocfs2_journal_toggle_dirty(osb, 1);
+        status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
        if (status < 0) {
                mlog_errno(status);
                goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
                goto bail;
        }
-        status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);
+        status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
        if (status < 0)
                mlog_errno(status);
@@ -1034,6 +1047,12 @@ restart:
        spin_unlock(&osb->osb_lock);
        mlog(0, "All nodes recovered\n");
+        /* Refresh all journal recovery generations from disk */
+        status = ocfs2_check_journals_nolocks(osb);
+        status = (status == -EROFS) ? 0 : status;
+        if (status < 0)
+                mlog_errno(status);
        ocfs2_super_unlock(osb, 1);
        /* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
        mlog_exit_void();
 }
+static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
+                                    int slot_num,
+                                    struct buffer_head **bh,
+                                    struct inode **ret_inode)
+{
+        int status = -EACCES;
+        struct inode *inode = NULL;
+        BUG_ON(slot_num >= osb->max_slots);
+        inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
+                                            slot_num);
+        if (!inode || is_bad_inode(inode)) {
+                mlog_errno(status);
+                goto bail;
+        }
+        SET_INODE_JOURNAL(inode);
+        status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
+        if (status < 0) {
+                mlog_errno(status);
+                goto bail;
+        }
+        status = 0;
+bail:
+        if (inode) {
+                if (status || !ret_inode)
+                        iput(inode);
+                else
+                        *ret_inode = inode;
+        }
+        return status;
+}
 /* Does the actual journal replay and marks the journal inode as
 * clean. Will only replay if the journal inode is marked dirty. */
 static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
        struct ocfs2_dinode *fe;
        journal_t *journal = NULL;
        struct buffer_head *bh = NULL;
+        u32 slot_reco_gen;
-        inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
+        status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
-                                            slot_num);
+        if (status) {
-        if (inode == NULL) {
-                status = -EACCES;
                mlog_errno(status);
                goto done;
        }
-        if (is_bad_inode(inode)) {
-                status = -EACCES;
+        fe = (struct ocfs2_dinode *)bh->b_data;
-                iput(inode);
+        slot_reco_gen = ocfs2_get_recovery_generation(fe);
-                inode = NULL;
+        brelse(bh);
-                mlog_errno(status);
+        bh = NULL;
+        /*
+         * As the fs recovery is asynchronous, there is a small chance that
+         * another node mounted (and recovered) the slot before the recovery
+         * thread could get the lock. To handle that, we dirty read the journal
+         * inode for that slot to get the recovery generation. If it is
+         * different than what we expected, the slot has been recovered.
+         * If not, it needs recovery.
+         */
+        if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
+                mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
+                     osb->slot_recovery_generations[slot_num], slot_reco_gen);
+                osb->slot_recovery_generations[slot_num] = slot_reco_gen;
+                status = -EBUSY;
                goto done;
        }
-        SET_INODE_JOURNAL(inode);
+        /* Continue with recovery as the journal has not yet been recovered */
        status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
        if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
        fe = (struct ocfs2_dinode *) bh->b_data;
        flags = le32_to_cpu(fe->id1.journal1.ij_flags);
+        slot_reco_gen = ocfs2_get_recovery_generation(fe);
        if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
                mlog(0, "No recovery required for node %d\n", node_num);
+                /* Refresh recovery generation for the slot */
+                osb->slot_recovery_generations[slot_num] = slot_reco_gen;
                goto done;
        }
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
        flags &= ~OCFS2_JOURNAL_DIRTY_FL;
        fe->id1.journal1.ij_flags = cpu_to_le32(flags);
+        /* Increment recovery generation to indicate successful recovery */
+        ocfs2_bump_recovery_generation(fe);
+        osb->slot_recovery_generations[slot_num] =
+                                        ocfs2_get_recovery_generation(fe);
        status = ocfs2_write_block(osb, bh, inode);
        if (status < 0)
                mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
        status = ocfs2_replay_journal(osb, node_num, slot_num);
        if (status < 0) {
+                if (status == -EBUSY) {
+                        mlog(0, "Skipping recovery for slot %u (node %u) "
+                             "as another node has recovered it\n", slot_num,
+                             node_num);
+                        status = 0;
+                        goto done;
+                }
                mlog_errno(status);
                goto done;
        }
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 {
        unsigned int node_num;
        int status, i;
+        struct buffer_head *bh = NULL;
+        struct ocfs2_dinode *di;
        /* This is called with the super block cluster lock, so we
         * know that the slot map can't change underneath us. */
        spin_lock(&osb->osb_lock);
        for (i = 0; i < osb->max_slots; i++) {
+                /* Read journal inode to get the recovery generation */
+                status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
+                if (status) {
+                        mlog_errno(status);
+                        goto bail;
+                }
+                di = (struct ocfs2_dinode *)bh->b_data;
+                osb->slot_recovery_generations[i] =
+                                        ocfs2_get_recovery_generation(di);
+                brelse(bh);
+                bh = NULL;
+                mlog(0, "Slot %u recovery generation is %u\n", i,
+                     osb->slot_recovery_generations[i]);
                if (i == osb->slot_num)
                        continue;
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
        return 0;
 }
-/* Look for a dirty journal without taking any cluster locks. Used for
+/* Reads all the journal inodes without taking any cluster locks. Used
- * hard readonly access to determine whether the file system journals
+ * for hard readonly access to determine whether any journal requires
- * require recovery. */
+ * recovery. Also used to refresh the recovery generation numbers after
+ * a journal has been recovered by another node.
+ */
 int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
 {
        int ret = 0;
        unsigned int slot;
-        struct buffer_head *di_bh;
+        struct buffer_head *di_bh = NULL;
        struct ocfs2_dinode *di;
-        struct inode *journal = NULL;
+        int journal_dirty = 0;
        for(slot = 0; slot < osb->max_slots; slot++) {
-                journal = ocfs2_get_system_file_inode(osb,
+                ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
-                                                      JOURNAL_SYSTEM_INODE,
+                if (ret) {
-                                                      slot);
-                if (!journal || is_bad_inode(journal)) {
-                        ret = -EACCES;
-                        mlog_errno(ret);
-                        goto out;
-                }
-                di_bh = NULL;
-                ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
-                                       0, journal);
-                if (ret < 0) {
                        mlog_errno(ret);
                        goto out;
                }
                di = (struct ocfs2_dinode *) di_bh->b_data;
+                osb->slot_recovery_generations[slot] =
+                                        ocfs2_get_recovery_generation(di);
                if (le32_to_cpu(di->id1.journal1.ij_flags) &
                    OCFS2_JOURNAL_DIRTY_FL)
-                        ret = -EROFS;
+                        journal_dirty = 1;
                brelse(di_bh);
-                if (ret)
+                di_bh = NULL;
-                        break;
        }
 out:
-        if (journal)
+        if (journal_dirty)
-                iput(journal);
+                ret = -EROFS;
        return ret;
 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532ed..2178ebffa05f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int    ocfs2_journal_init(struct ocfs2_journal *journal,
 void   ocfs2_journal_shutdown(struct ocfs2_super *osb);
 int    ocfs2_journal_wipe(struct ocfs2_journal *journal,
                          int full);
-int    ocfs2_journal_load(struct ocfs2_journal *journal, int local);
+int    ocfs2_journal_load(struct ocfs2_journal *journal, int local,
+                          int replayed);
 int    ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
 void   ocfs2_recovery_thread(struct ocfs2_super *osb,
                             int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef1..7f625f2b1117 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
        struct ocfs2_slot_info *slot_info;
+        u32 *slot_recovery_generations;
        spinlock_t node_map_lock;
        u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f1945177629..4f619850ccf7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
                struct {                /* Info for journal system
                                           inodes */
                        __le32 ij_flags;        /* Mounted, version, etc. */
-                        __le32 ij_pad;
+                        __le32 ij_recovery_generation; /* Incremented when the
+                                                          journal is recovered
+                                                          after an unclean
+                                                          shutdown */
                } journal1;
        } id1;                          /* Inode type dependant 1 */
 /*C0*/  union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2560b33889aa..88255d3f52b4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
        }
        mlog(0, "max_slots for this device: %u\n", osb->max_slots);
+        osb->slot_recovery_generations =
+                kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
+                        GFP_KERNEL);
+        if (!osb->slot_recovery_generations) {
+                status = -ENOMEM;
+                mlog_errno(status);
+                goto bail;
+        }
        init_waitqueue_head(&osb->osb_wipe_event);
        osb->osb_orphan_wipes = kcalloc(osb->max_slots,
                                        sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
        local = ocfs2_mount_local(osb);
        /* will play back anything left in the journal. */
-        status = ocfs2_journal_load(osb->journal, local);
+        status = ocfs2_journal_load(osb->journal, local, dirty);
        if (status < 0) {
                mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
                goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
        ocfs2_free_slot_info(osb);
        kfree(osb->osb_orphan_wipes);
+        kfree(osb->slot_recovery_generations);
        /* FIXME
         * This belongs in journal shutdown, but because we have to
         * allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index dc75f22be3f2..697663b01bae 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -71,10 +71,10 @@ static int set_run(struct super_block *sb, int map,
                }
                if (set) {
                        set_bit(bit, sbi->s_imap[map]);
-                        set_bit(bit, (long *) bh->b_data);
+                        set_bit(bit, (unsigned long *)bh->b_data);
                } else {
                        clear_bit(bit, sbi->s_imap[map]);
-                        clear_bit(bit, (long *) bh->b_data);
+                        clear_bit(bit, (unsigned long *)bh->b_data);
                }
        }
        mark_buffer_dirty(bh);
@@ -109,7 +109,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block)
                if (!bh)
                        goto out;
-                set_bit(bit, (long *) bh->b_data);
+                set_bit(bit, (unsigned long *)bh->b_data);
                mark_buffer_dirty(bh);
                brelse(bh);
        }
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 05a5bc31e4bd..c0757e998876 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -104,7 +104,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
        oi = (struct omfs_inode *) bh->b_data;
        oi->i_head.h_self = cpu_to_be64(inode->i_ino);
-        oi->i_sibling = ~0ULL;
+        oi->i_sibling = ~cpu_to_be64(0ULL);
        mark_buffer_dirty(bh);
        brelse(bh);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 66e01fae4384..7e2499053e4d 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -30,11 +30,11 @@ void omfs_make_empty_table(struct buffer_head *bh, int offset)
 {
        struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
-        oe->e_next = ~0ULL;
+        oe->e_next = ~cpu_to_be64(0ULL);
        oe->e_extent_count = cpu_to_be32(1),
        oe->e_fill = cpu_to_be32(0x22),
-        oe->e_entry.e_cluster = ~0ULL;
+        oe->e_entry.e_cluster = ~cpu_to_be64(0ULL);
-        oe->e_entry.e_blocks = ~0ULL;
+        oe->e_entry.e_blocks = ~cpu_to_be64(0ULL);
 }
 int omfs_shrink_inode(struct inode *inode)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d865f5535436..a95fe5984f4b 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -492,7 +492,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
        if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
                printk(KERN_ERR "omfs: block count discrepancy between "
                        "super and root blocks (%llx, %llx)\n",
-                        sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks));
+                        (unsigned long long)sbi->s_num_blocks,
+                        (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks));
                goto out_brelse_bh2;
        }
diff --git a/fs/open.c b/fs/open.c
index 52647be277a2..07da9359481c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 }
 EXPORT_SYMBOL(dentry_open);
-/*
- * Find an empty file descriptor entry, and mark it busy.
- */
-int get_unused_fd_flags(int flags)
-{
-        struct files_struct * files = current->files;
-        int fd, error;
-        struct fdtable *fdt;
-        spin_lock(&files->file_lock);
-repeat:
-        fdt = files_fdtable(files);
-        fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
-                                files->next_fd);
-        /* Do we need to expand the fd array or fd set?  */
-        error = expand_files(files, fd);
-        if (error < 0)
-                goto out;
-        if (error) {
-                /*
-                 * If we needed to expand the fs array we
-                 * might have blocked - try again.
-                 */
-                goto repeat;
-        }
-        FD_SET(fd, fdt->open_fds);
-        if (flags & O_CLOEXEC)
-                FD_SET(fd, fdt->close_on_exec);
-        else
-                FD_CLR(fd, fdt->close_on_exec);
-        files->next_fd = fd + 1;
-#if 1
-        /* Sanity check */
-        if (fdt->fd[fd] != NULL) {
-                printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
-                fdt->fd[fd] = NULL;
-        }
-#endif
-        error = fd;
-out:
-        spin_unlock(&files->file_lock);
-        return error;
-}
-int get_unused_fd(void)
-{
-        return get_unused_fd_flags(0);
-}
-EXPORT_SYMBOL(get_unused_fd);
 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
        struct fdtable *fdt = files_fdtable(files);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01ed610f9b87..a28840b11b89 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2423,10 +2423,13 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
                        "read_bytes: %llu\n"
                        "write_bytes: %llu\n"
                        "cancelled_write_bytes: %llu\n",
-                        acct.rchar, acct.wchar,
+                        (unsigned long long)acct.rchar,
-                        acct.syscr, acct.syscw,
+                        (unsigned long long)acct.wchar,
-                        acct.read_bytes, acct.write_bytes,
+                        (unsigned long long)acct.syscr,
-                        acct.cancelled_write_bytes);
+                        (unsigned long long)acct.syscw,
+                        (unsigned long long)acct.read_bytes,
+                        (unsigned long long)acct.write_bytes,
+                        (unsigned long long)acct.cancelled_write_bytes);
 }
 static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cb4096cc3fb7..4fb81e9c94e3 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
        return rtn;
 }
-static DEFINE_IDR(proc_inum_idr);
+static DEFINE_IDA(proc_inum_ida);
 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
-#define PROC_DYNAMIC_FIRST 0xF0000000UL
+#define PROC_DYNAMIC_FIRST 0xF0000000U
 /*
 * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,33 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 */
 static unsigned int get_inode_number(void)
 {
-        int i, inum = 0;
+        unsigned int i;
        int error;
 retry:
-        if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
+        if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
                return 0;
        spin_lock(&proc_inum_lock);
-        error = idr_get_new(&proc_inum_idr, NULL, &i);
+        error = ida_get_new(&proc_inum_ida, &i);
        spin_unlock(&proc_inum_lock);
        if (error == -EAGAIN)
                goto retry;
        else if (error)
                return 0;
-        inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
+        if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
+                spin_lock(&proc_inum_lock);
-        /* inum will never be more than 0xf0ffffff, so no check
+                ida_remove(&proc_inum_ida, i);
-         * for overflow.
+                spin_unlock(&proc_inum_lock);
-         */
+        }
+        return PROC_DYNAMIC_FIRST + i;
-        return inum;
 }
 static void release_inode_number(unsigned int inum)
 {
-        int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
        spin_lock(&proc_inum_lock);
-        idr_remove(&proc_inum_idr, id);
+        ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
        spin_unlock(&proc_inum_lock);
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8a..5699171212ae 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
                if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
                        lock_buffer(bh);
                } else {
-                        if (test_set_buffer_locked(bh)) {
+                        if (!trylock_buffer(bh)) {
                                redirty_page_for_writepage(wbc, page);
                                continue;
                        }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c8f60ee183b5..c21df71943a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
 static void release_buffer_page(struct buffer_head *bh)
 {
        struct page *page = bh->b_page;
-        if (!page->mapping && !TestSetPageLocked(page)) {
+        if (!page->mapping && trylock_page(page)) {
                page_cache_get(page);
                put_bh(bh);
                if (!page->mapping)
@@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
                jh = JH_ENTRY(list->next);
                bh = jh->bh;
                get_bh(bh);
-                if (test_set_buffer_locked(bh)) {
+                if (!trylock_buffer(bh)) {
                        if (!buffer_dirty(bh)) {
                                list_move(&jh->list, &tmp);
                                goto loop_next;
@@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
 {
        PROC_INFO_INC(p_s_sb, journal.prepare);
-        if (test_set_buffer_locked(bh)) {
+        if (!trylock_buffer(bh)) {
                if (!wait)
                        return 0;
                lock_buffer(bh);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 879e54d35c2d..d318c7e663fa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -27,7 +27,6 @@
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/quotaops.h>
 struct file_system_type reiserfs_fs_type;
@@ -2076,8 +2075,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                return err;
        /* Quotafile not on the same filesystem? */
        if (nd.path.mnt->mnt_sb != sb) {
-                path_put(&nd.path);
+                err = -EXDEV;
-                return -EXDEV;
+                goto out;
        }
        inode = nd.path.dentry->d_inode;
        /* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2087,8 +2086,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                        reiserfs_warning(sb,
                                "reiserfs: Unpacking tail of quota file failed"
                                " (%d). Cannot turn on quotas.", err);
-                        path_put(&nd.path);
+                        err = -EINVAL;
-                        return -EINVAL;
+                        goto out;
                }
                mark_inode_dirty(inode);
        }
@@ -2109,13 +2108,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                /* Just start temporary transaction and finish it */
                err = journal_begin(&th, sb, 1);
                if (err)
-                        return err;
+                        goto out;
                err = journal_end_sync(&th, sb, 1);
                if (err)
-                        return err;
+                        goto out;
        }
+        err = vfs_quota_on_path(sb, type, format_id, &nd.path);
+out:
        path_put(&nd.path);
-        return vfs_quota_on(sb, type, format_id, path, 0);
+        return err;
 }
 /* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 8e51a2aaa977..60d2f822e87b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
 romfs_readpage(struct file *file, struct page * page)
 {
        struct inode *inode = page->mapping->host;
-        loff_t offset, avail, readlen;
+        loff_t offset, size;
+        unsigned long filled;
        void *buf;
        int result = -EIO;
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
        /* 32 bit warning -- but not for us :) */
        offset = page_offset(page);
-        if (offset < i_size_read(inode)) {
+        size = i_size_read(inode);
-                avail = inode->i_size-offset;
+        filled = 0;
-                readlen = min_t(unsigned long, avail, PAGE_SIZE);
+        result = 0;
-                if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
+        if (offset < size) {
-                        if (readlen < PAGE_SIZE) {
+                unsigned long readlen;
-                                memset(buf + readlen,0,PAGE_SIZE-readlen);
-                        }
+                size -= offset;
-                        SetPageUptodate(page);
+                readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
-                        result = 0;
+                filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
+                if (filled != readlen) {
+                        SetPageError(page);
+                        filled = 0;
+                        result = -EIO;
                }
        }
-        if (result) {
-                memset(buf, 0, PAGE_SIZE);
+        if (filled < PAGE_SIZE)
-                SetPageError(page);
+                memset(buf + filled, 0, PAGE_SIZE-filled);
-        }
+        if (!result)
+                SetPageUptodate(page);
        flush_dcache_page(page);
        unlock_page(page);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3f54dbd6c49b..5d54205e486b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -443,6 +443,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
        return -1;
 }
+int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
+{
+        size_t len = bitmap_scnprintf_len(nr_bits);
+        if (m->count + len < m->size) {
+                bitmap_scnprintf(m->buf + m->count, m->size - m->count,
+                                 bits, nr_bits);
+                m->count += len;
+                return 0;
+        }
+        m->count = m->size;
+        return -1;
+}
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
        return NULL + (*pos == 0);
diff --git a/fs/splice.c b/fs/splice.c
index b30311ba8af6..1bbc6f4bb09c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
                         * for an in-flight io page
                         */
                        if (flags & SPLICE_F_NONBLOCK) {
-                                if (TestSetPageLocked(page)) {
+                                if (!trylock_page(page)) {
                                        error = -EAGAIN;
                                        break;
                                }
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3e30e40aa24d..3141969b456d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
        struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
        unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-        const struct match_token *tp = tokens;
+        struct match_token *tp = tokens;
        while (tp->token != Opt_onerror_panic && tp->token != mval)
                ++tp;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 36ec614e699a..737c9a425361 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -106,7 +106,8 @@ xfs-y				+= $(addprefix $(XFS_LINUX)/, \
                                   xfs_iops.o \
                                   xfs_lrw.o \
                                   xfs_super.o \
-                                   xfs_vnode.o)
+                                   xfs_vnode.o \
+                                   xfs_xattr.o)
 # Objects in support/
 xfs-y                           += $(addprefix support/, \
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 9b1bb17a0501..1cd3b55ee3d2 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
 }
 void
-kmem_free(void *ptr, size_t size)
+kmem_free(const void *ptr)
 {
        if (!is_vmalloc_addr(ptr)) {
                kfree(ptr);
@@ -100,7 +100,7 @@ kmem_free(void *ptr, size_t size)
 }
 void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
             unsigned int __nocast flags)
 {
        void    *new;
@@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
                if (new)
                        memcpy(new, ptr,
                                ((oldsize < newsize) ? oldsize : newsize));
-                kmem_free(ptr, oldsize);
+                kmem_free(ptr);
        }
        return new;
 }
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index a20683cf74dd..af6843c7ee4b 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags)
 extern void *kmem_alloc(size_t, unsigned int __nocast);
 extern void *kmem_zalloc(size_t, unsigned int __nocast);
 extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
-extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
-extern void  kmem_free(void *, size_t);
+extern void  kmem_free(const void *);
 /*
 * Zone interfaces
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a55c3b26d840..fa47e43b8b41 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -409,7 +409,6 @@ xfs_start_buffer_writeback(
 STATIC void
 xfs_start_page_writeback(
        struct page             *page,
-        struct writeback_control *wbc,
        int                     clear_dirty,
        int                     buffers)
 {
@@ -676,7 +675,7 @@ xfs_probe_cluster(
                        } else
                                pg_offset = PAGE_CACHE_SIZE;
-                        if (page->index == tindex && !TestSetPageLocked(page)) {
+                        if (page->index == tindex && trylock_page(page)) {
                                pg_len = xfs_probe_page(page, pg_offset, mapped);
                                unlock_page(page);
                        }
@@ -760,7 +759,7 @@ xfs_convert_page(
        if (page->index != tindex)
                goto fail;
-        if (TestSetPageLocked(page))
+        if (!trylock_page(page))
                goto fail;
        if (PageWriteback(page))
                goto fail_unlock_page;
@@ -858,7 +857,7 @@ xfs_convert_page(
                                done = 1;
                        }
                }
-                xfs_start_page_writeback(page, wbc, !page_dirty, count);
+                xfs_start_page_writeback(page, !page_dirty, count);
        }
        return done;
@@ -1105,7 +1104,7 @@ xfs_page_state_convert(
                         * that we are writing into for the first time.
                         */
                        type = IOMAP_NEW;
-                        if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+                        if (trylock_buffer(bh)) {
                                ASSERT(buffer_mapped(bh));
                                if (iomap_valid)
                                        all_bh = 1;
@@ -1130,7 +1129,7 @@ xfs_page_state_convert(
                SetPageUptodate(page);
        if (startio)
-                xfs_start_page_writeback(page, wbc, 1, count);
+                xfs_start_page_writeback(page, 1, count);
        if (ioend && iomap_valid) {
                offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b4..9cc8f0213095 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -310,8 +310,7 @@ _xfs_buf_free_pages(
        xfs_buf_t       *bp)
 {
        if (bp->b_pages != bp->b_page_array) {
-                kmem_free(bp->b_pages,
+                kmem_free(bp->b_pages);
-                          bp->b_page_count * sizeof(struct page *));
        }
 }
@@ -1398,7 +1397,7 @@ STATIC void
 xfs_free_bufhash(
        xfs_buftarg_t           *btp)
 {
-        kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+        kmem_free(btp->bt_hash);
        btp->bt_hash = NULL;
 }
@@ -1428,13 +1427,10 @@ xfs_unregister_buftarg(
 void
 xfs_free_buftarg(
-        xfs_buftarg_t           *btp,
+        xfs_buftarg_t           *btp)
-        int                     external)
 {
        xfs_flush_buftarg(btp, 1);
        xfs_blkdev_issue_flush(btp);
-        if (external)
-                xfs_blkdev_put(btp->bt_bdev);
        xfs_free_bufhash(btp);
        iput(btp->bt_mapping->host);
@@ -1444,7 +1440,7 @@ xfs_free_buftarg(
        xfs_unregister_buftarg(btp);
        kthread_stop(btp->bt_task);
-        kmem_free(btp, sizeof(*btp));
+        kmem_free(btp);
 }
 STATIC int
@@ -1575,7 +1571,7 @@ xfs_alloc_buftarg(
        return btp;
 error:
-        kmem_free(btp, sizeof(*btp));
+        kmem_free(btp);
        return NULL;
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a4..29d1d4adc078 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
 *      Handling of buftargs.
 */
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
-extern void xfs_free_buftarg(xfs_buftarg_t *, int);
+extern void xfs_free_buftarg(xfs_buftarg_t *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
 extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b14..987fe84f7b13 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -215,7 +215,7 @@ xfs_fs_get_parent(
        struct xfs_inode        *cip;
        struct dentry           *parent;
-        error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip);
+        error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
        if (unlikely(error))
                return ERR_PTR(-error);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 01939ba2d8de..acb978d9d085 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -48,6 +48,8 @@
 #include "xfs_dfrag.h"
 #include "xfs_fsops.h"
 #include "xfs_vnodeops.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
 #include <linux/capability.h>
 #include <linux/dcache.h>
@@ -468,6 +470,12 @@ xfs_attrlist_by_handle(
        if (al_hreq.buflen > XATTR_LIST_MAX)
                return -XFS_ERROR(EINVAL);
+        /*
+         * Reject flags, only allow namespaces.
+         */
+        if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+                return -XFS_ERROR(EINVAL);
        error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
        if (error)
                goto out;
@@ -587,7 +595,7 @@ xfs_attrmulti_by_handle(
                goto out;
        error = E2BIG;
-        size = am_hreq.opcount * sizeof(attr_multiop_t);
+        size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
        if (!size || size > 16 * PAGE_SIZE)
                goto out_vn_rele;
@@ -680,9 +688,9 @@ xfs_ioc_space(
                return -XFS_ERROR(EFAULT);
        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-                attr_flags |= ATTR_NONBLOCK;
+                attr_flags |= XFS_ATTR_NONBLOCK;
        if (ioflags & IO_INVIS)
-                attr_flags |= ATTR_DMI;
+                attr_flags |= XFS_ATTR_DMI;
        error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
                                              NULL, attr_flags);
@@ -873,6 +881,322 @@ xfs_ioc_fsgetxattr(
        return 0;
 }
+STATIC void
+xfs_set_diflags(
+        struct xfs_inode        *ip,
+        unsigned int            xflags)
+{
+        unsigned int            di_flags;
+        /* can't set PREALLOC this way, just preserve it */
+        di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+        if (xflags & XFS_XFLAG_IMMUTABLE)
+                di_flags |= XFS_DIFLAG_IMMUTABLE;
+        if (xflags & XFS_XFLAG_APPEND)
+                di_flags |= XFS_DIFLAG_APPEND;
+        if (xflags & XFS_XFLAG_SYNC)
+                di_flags |= XFS_DIFLAG_SYNC;
+        if (xflags & XFS_XFLAG_NOATIME)
+                di_flags |= XFS_DIFLAG_NOATIME;
+        if (xflags & XFS_XFLAG_NODUMP)
+                di_flags |= XFS_DIFLAG_NODUMP;
+        if (xflags & XFS_XFLAG_PROJINHERIT)
+                di_flags |= XFS_DIFLAG_PROJINHERIT;
+        if (xflags & XFS_XFLAG_NODEFRAG)
+                di_flags |= XFS_DIFLAG_NODEFRAG;
+        if (xflags & XFS_XFLAG_FILESTREAM)
+                di_flags |= XFS_DIFLAG_FILESTREAM;
+        if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+                if (xflags & XFS_XFLAG_RTINHERIT)
+                        di_flags |= XFS_DIFLAG_RTINHERIT;
+                if (xflags & XFS_XFLAG_NOSYMLINKS)
+                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
+                if (xflags & XFS_XFLAG_EXTSZINHERIT)
+                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+        } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+                if (xflags & XFS_XFLAG_REALTIME)
+                        di_flags |= XFS_DIFLAG_REALTIME;
+                if (xflags & XFS_XFLAG_EXTSIZE)
+                        di_flags |= XFS_DIFLAG_EXTSIZE;
+        }
+        ip->i_d.di_flags = di_flags;
+}
+STATIC void
+xfs_diflags_to_linux(
+        struct xfs_inode        *ip)
+{
+        struct inode            *inode = XFS_ITOV(ip);
+        unsigned int            xflags = xfs_ip2xflags(ip);
+        if (xflags & XFS_XFLAG_IMMUTABLE)
+                inode->i_flags |= S_IMMUTABLE;
+        else
+                inode->i_flags &= ~S_IMMUTABLE;
+        if (xflags & XFS_XFLAG_APPEND)
+                inode->i_flags |= S_APPEND;
+        else
+                inode->i_flags &= ~S_APPEND;
+        if (xflags & XFS_XFLAG_SYNC)
+                inode->i_flags |= S_SYNC;
+        else
+                inode->i_flags &= ~S_SYNC;
+        if (xflags & XFS_XFLAG_NOATIME)
+                inode->i_flags |= S_NOATIME;
+        else
+                inode->i_flags &= ~S_NOATIME;
+}
+#define FSX_PROJID      1
+#define FSX_EXTSIZE     2
+#define FSX_XFLAGS      4
+#define FSX_NONBLOCK    8
+STATIC int
+xfs_ioctl_setattr(
+        xfs_inode_t             *ip,
+        struct fsxattr          *fa,
+        int                     mask)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        struct xfs_trans        *tp;
+        unsigned int            lock_flags = 0;
+        struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
+        struct xfs_dquot        *olddquot = NULL;
+        int                     code;
+        xfs_itrace_entry(ip);
+        if (mp->m_flags & XFS_MOUNT_RDONLY)
+                return XFS_ERROR(EROFS);
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return XFS_ERROR(EIO);
+        /*
+         * If disk quotas is on, we make sure that the dquots do exist on disk,
+         * before we start any other transactions. Trying to do this later
+         * is messy. We don't care to take a readlock to look at the ids
+         * in inode here, because we can't hold it across the trans_reserve.
+         * If the IDs do change before we take the ilock, we're covered
+         * because the i_*dquot fields will get updated anyway.
+         */
+        if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+                code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+                                         ip->i_d.di_gid, fa->fsx_projid,
+                                         XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+                if (code)
+                        return code;
+        }
+        /*
+         * For the other attributes, we acquire the inode lock and
+         * first do an error checking pass.
+         */
+        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+        code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+        if (code)
+                goto error_return;
+        lock_flags = XFS_ILOCK_EXCL;
+        xfs_ilock(ip, lock_flags);
+        /*
+         * CAP_FOWNER overrides the following restrictions:
+         *
+         * The user ID of the calling process must be equal
+         * to the file owner ID, except in cases where the
+         * CAP_FSETID capability is applicable.
+         */
+        if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+                code = XFS_ERROR(EPERM);
+                goto error_return;
+        }
+        /*
+         * Do a quota reservation only if projid is actually going to change.
+         */
+        if (mask & FSX_PROJID) {
+                if (XFS_IS_PQUOTA_ON(mp) &&
+                    ip->i_d.di_projid != fa->fsx_projid) {
+                        ASSERT(tp);
+                        code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+                                                capable(CAP_FOWNER) ?
+                                                XFS_QMOPT_FORCE_RES : 0);
+                        if (code)       /* out of quota */
+                                goto error_return;
+                }
+        }
+        if (mask & FSX_EXTSIZE) {
+                /*
+                 * Can't change extent size if any extents are allocated.
+                 */
+                if (ip->i_d.di_nextents &&
+                    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+                     fa->fsx_extsize)) {
+                        code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                        goto error_return;
+                }
+                /*
+                 * Extent size must be a multiple of the appropriate block
+                 * size, if set at all.
+                 */
+                if (fa->fsx_extsize != 0) {
+                        xfs_extlen_t    size;
+                        if (XFS_IS_REALTIME_INODE(ip) ||
+                            ((mask & FSX_XFLAGS) &&
+                            (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+                                size = mp->m_sb.sb_rextsize <<
+                                       mp->m_sb.sb_blocklog;
+                        } else {
+                                size = mp->m_sb.sb_blocksize;
+                        }
+                        if (fa->fsx_extsize % size) {
+                                code = XFS_ERROR(EINVAL);
+                                goto error_return;
+                        }
+                }
+        }
+        if (mask & FSX_XFLAGS) {
+                /*
+                 * Can't change realtime flag if any extents are allocated.
+                 */
+                if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+                    (XFS_IS_REALTIME_INODE(ip)) !=
+                    (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                        code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                        goto error_return;
+                }
+                /*
+                 * If realtime flag is set then must have realtime data.
+                 */
+                if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                        if ((mp->m_sb.sb_rblocks == 0) ||
+                            (mp->m_sb.sb_rextsize == 0) ||
+                            (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+                                code = XFS_ERROR(EINVAL);
+                                goto error_return;
+                        }
+                }
+                /*
+                 * Can't modify an immutable/append-only file unless
+                 * we have appropriate permission.
+                 */
+                if ((ip->i_d.di_flags &
+                                (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+                     (fa->fsx_xflags &
+                                (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+                    !capable(CAP_LINUX_IMMUTABLE)) {
+                        code = XFS_ERROR(EPERM);
+                        goto error_return;
+                }
+        }
+        xfs_trans_ijoin(tp, ip, lock_flags);
+        xfs_trans_ihold(tp, ip);
+        /*
+         * Change file ownership.  Must be the owner or privileged.
+         * If the system was configured with the "restricted_chown"
+         * option, the owner is not permitted to give away the file,
+         * and can change the group id only to a group of which he
+         * or she is a member.
+         */
+        if (mask & FSX_PROJID) {
+                /*
+                 * CAP_FSETID overrides the following restrictions:
+                 *
+                 * The set-user-ID and set-group-ID bits of a file will be
+                 * cleared upon successful return from chown()
+                 */
+                if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                    !capable(CAP_FSETID))
+                        ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+                /*
+                 * Change the ownerships and register quota modifications
+                 * in the transaction.
+                 */
+                if (ip->i_d.di_projid != fa->fsx_projid) {
+                        if (XFS_IS_PQUOTA_ON(mp)) {
+                                olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+                                                        &ip->i_gdquot, gdqp);
+                        }
+                        ip->i_d.di_projid = fa->fsx_projid;
+                        /*
+                         * We may have to rev the inode as well as
+                         * the superblock version number since projids didn't
+                         * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+                         */
+                        if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+                                xfs_bump_ino_vers2(tp, ip);
+                }
+        }
+        if (mask & FSX_EXTSIZE)
+                ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+        if (mask & FSX_XFLAGS) {
+                xfs_set_diflags(ip, fa->fsx_xflags);
+                xfs_diflags_to_linux(ip);
+        }
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+        XFS_STATS_INC(xs_ig_attrchg);
+        /*
+         * If this is a synchronous mount, make sure that the
+         * transaction goes to disk before returning to the user.
+         * This is slightly sub-optimal in that truncates require
+         * two sync transactions instead of one for wsync filesystems.
+         * One for the truncate and one for the timestamps since we
+         * don't want to change the timestamps unless we're sure the
+         * truncate worked.  Truncates are less than 1% of the laddis
+         * mix so this probably isn't worth the trouble to optimize.
+         */
+        if (mp->m_flags & XFS_MOUNT_WSYNC)
+                xfs_trans_set_sync(tp);
+        code = xfs_trans_commit(tp, 0);
+        xfs_iunlock(ip, lock_flags);
+        /*
+         * Release any dquot(s) the inode had kept before chown.
+         */
+        XFS_QM_DQRELE(mp, olddquot);
+        XFS_QM_DQRELE(mp, udqp);
+        XFS_QM_DQRELE(mp, gdqp);
+        if (code)
+                return code;
+        if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
+                XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
+                                NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
+                                (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
+        }
+        return 0;
+ error_return:
+        XFS_QM_DQRELE(mp, udqp);
+        XFS_QM_DQRELE(mp, gdqp);
+        xfs_trans_cancel(tp, 0);
+        if (lock_flags)
+                xfs_iunlock(ip, lock_flags);
+        return code;
+}
 STATIC int
 xfs_ioc_fssetxattr(
        xfs_inode_t             *ip,
@@ -880,31 +1204,16 @@ xfs_ioc_fssetxattr(
        void                    __user *arg)
 {
        struct fsxattr          fa;
-        struct bhv_vattr        *vattr;
+        unsigned int            mask;
-        int                     error;
-        int                     attr_flags;
        if (copy_from_user(&fa, arg, sizeof(fa)))
                return -EFAULT;
-        vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+        mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
-        if (unlikely(!vattr))
-                return -ENOMEM;
-        attr_flags = 0;
        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-                attr_flags |= ATTR_NONBLOCK;
+                mask |= FSX_NONBLOCK;
-        vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
-        vattr->va_xflags  = fa.fsx_xflags;
-        vattr->va_extsize = fa.fsx_extsize;
-        vattr->va_projid  = fa.fsx_projid;
-        error = -xfs_setattr(ip, vattr, attr_flags, NULL);
+        return -xfs_ioctl_setattr(ip, &fa, mask);
-        if (!error)
-                vn_revalidate(XFS_ITOV(ip));    /* update flags */
-        kfree(vattr);
-        return 0;
 }
 STATIC int
@@ -926,10 +1235,9 @@ xfs_ioc_setxflags(
        struct file             *filp,
        void                    __user *arg)
 {
-        struct bhv_vattr        *vattr;
+        struct fsxattr          fa;
        unsigned int            flags;
-        int                     attr_flags;
+        unsigned int            mask;
-        int                     error;
        if (copy_from_user(&flags, arg, sizeof(flags)))
                return -EFAULT;
@@ -939,22 +1247,12 @@ xfs_ioc_setxflags(
                      FS_SYNC_FL))
                return -EOPNOTSUPP;
-        vattr = kmalloc(sizeof(*vattr), GFP_KERNEL);
+        mask = FSX_XFLAGS;
-        if (unlikely(!vattr))
-                return -ENOMEM;
-        attr_flags = 0;
        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-                attr_flags |= ATTR_NONBLOCK;
+                mask |= FSX_NONBLOCK;
+        fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-        vattr->va_mask = XFS_AT_XFLAGS;
-        vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-        error = -xfs_setattr(ip, vattr, attr_flags, NULL);
+        return -xfs_ioctl_setattr(ip, &fa, mask);
-        if (likely(!error))
-                vn_revalidate(XFS_ITOV(ip));    /* update flags */
-        kfree(vattr);
-        return error;
 }
 STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 5fc61c824bb9..e88f51028086 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -181,23 +181,6 @@ xfs_ichgtime_fast(
                mark_inode_dirty_sync(inode);
 }
-/*
- * Pull the link count and size up from the xfs inode to the linux inode
- */
-STATIC void
-xfs_validate_fields(
-        struct inode            *inode)
-{
-        struct xfs_inode        *ip = XFS_I(inode);
-        loff_t size;
-        /* we're under i_sem so i_size can't change under us */
-        size = XFS_ISIZE(ip);
-        if (i_size_read(inode) != size)
-                i_size_write(inode, size);
-}
 /*
 * Hook in SELinux.  This is not quite correct yet, what we really need
 * here (as we do for default ACLs) is a mechanism by which creation of
@@ -245,8 +228,7 @@ STATIC void
 xfs_cleanup_inode(
        struct inode    *dir,
        struct inode    *inode,
-        struct dentry   *dentry,
+        struct dentry   *dentry)
-        int             mode)
 {
        struct xfs_name teardown;
@@ -257,10 +239,7 @@ xfs_cleanup_inode(
         */
        xfs_dentry_to_name(&teardown, dentry);
-        if (S_ISDIR(mode))
+        xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-                xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
-        else
-                xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
        iput(inode);
 }
@@ -275,7 +254,7 @@ xfs_vn_mknod(
        struct xfs_inode *ip = NULL;
        xfs_acl_t       *default_acl = NULL;
        struct xfs_name name;
-        attrexists_t    test_default_acl = _ACL_DEFAULT_EXISTS;
+        int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
        int             error;
        /*
@@ -335,14 +314,11 @@ xfs_vn_mknod(
        }
-        if (S_ISDIR(mode))
-                xfs_validate_fields(inode);
        d_instantiate(dentry, inode);
-        xfs_validate_fields(dir);
        return -error;
 out_cleanup_inode:
-        xfs_cleanup_inode(dir, inode, dentry, mode);
+        xfs_cleanup_inode(dir, inode, dentry);
 out_free_acl:
        if (default_acl)
                _ACL_FREE(default_acl);
@@ -382,7 +358,7 @@ xfs_vn_lookup(
                return ERR_PTR(-ENAMETOOLONG);
        xfs_dentry_to_name(&name, dentry);
-        error = xfs_lookup(XFS_I(dir), &name, &cip);
+        error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
        if (unlikely(error)) {
                if (unlikely(error != ENOENT))
                        return ERR_PTR(-error);
@@ -393,6 +369,46 @@ xfs_vn_lookup(
        return d_splice_alias(cip->i_vnode, dentry);
 }
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        struct nameidata *nd)
+{
+        struct xfs_inode *ip;
+        struct xfs_name xname;
+        struct xfs_name ci_name;
+        struct qstr     dname;
+        int             error;
+        if (dentry->d_name.len >= MAXNAMELEN)
+                return ERR_PTR(-ENAMETOOLONG);
+        xfs_dentry_to_name(&xname, dentry);
+        error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+        if (unlikely(error)) {
+                if (unlikely(error != ENOENT))
+                        return ERR_PTR(-error);
+                /*
+                 * call d_add(dentry, NULL) here when d_drop_negative_children
+                 * is called in xfs_vn_mknod (ie. allow negative dentries
+                 * with CI filesystems).
+                 */
+                return NULL;
+        }
+        /* if exact match, just splice and exit */
+        if (!ci_name.name)
+                return d_splice_alias(ip->i_vnode, dentry);
+        /* else case-insensitive match... */
+        dname.name = ci_name.name;
+        dname.len = ci_name.len;
+        dentry = d_add_ci(ip->i_vnode, dentry, &dname);
+        kmem_free(ci_name.name);
+        return dentry;
+}
 STATIC int
 xfs_vn_link(
        struct dentry   *old_dentry,
@@ -414,7 +430,6 @@ xfs_vn_link(
        }
        xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
-        xfs_validate_fields(inode);
        d_instantiate(dentry, inode);
        return 0;
 }
@@ -424,19 +439,23 @@ xfs_vn_unlink(
        struct inode    *dir,
        struct dentry   *dentry)
 {
-        struct inode    *inode;
        struct xfs_name name;
        int             error;
-        inode = dentry->d_inode;
        xfs_dentry_to_name(&name, dentry);
-        error = xfs_remove(XFS_I(dir), &name, XFS_I(inode));
+        error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
-        if (likely(!error)) {
+        if (error)
-                xfs_validate_fields(dir);       /* size needs update */
+                return error;
-                xfs_validate_fields(inode);
-        }
+        /*
-        return -error;
+         * With unlink, the VFS makes the dentry "negative": no inode,
+         * but still hashed. This is incompatible with case-insensitive
+         * mode, so invalidate (unhash) the dentry in CI-mode.
+         */
+        if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+                d_invalidate(dentry);
+        return 0;
 }
 STATIC int
@@ -466,36 +485,15 @@ xfs_vn_symlink(
                goto out_cleanup_inode;
        d_instantiate(dentry, inode);
-        xfs_validate_fields(dir);
-        xfs_validate_fields(inode);
        return 0;
 out_cleanup_inode:
-        xfs_cleanup_inode(dir, inode, dentry, 0);
+        xfs_cleanup_inode(dir, inode, dentry);
 out:
        return -error;
 }
 STATIC int
-xfs_vn_rmdir(
-        struct inode    *dir,
-        struct dentry   *dentry)
-{
-        struct inode    *inode = dentry->d_inode;
-        struct xfs_name name;
-        int             error;
-        xfs_dentry_to_name(&name, dentry);
-        error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
-        if (likely(!error)) {
-                xfs_validate_fields(inode);
-                xfs_validate_fields(dir);
-        }
-        return -error;
-}
-STATIC int
 xfs_vn_rename(
        struct inode    *odir,
        struct dentry   *odentry,
@@ -505,22 +503,13 @@ xfs_vn_rename(
        struct inode    *new_inode = ndentry->d_inode;
        struct xfs_name oname;
        struct xfs_name nname;
-        int             error;
        xfs_dentry_to_name(&oname, odentry);
        xfs_dentry_to_name(&nname, ndentry);
-        error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+        return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
                           XFS_I(ndir), &nname, new_inode ?
                                                XFS_I(new_inode) : NULL);
-        if (likely(!error)) {
-                if (new_inode)
-                        xfs_validate_fields(new_inode);
-                xfs_validate_fields(odir);
-                if (ndir != odir)
-                        xfs_validate_fields(ndir);
-        }
-        return -error;
 }
 /*
@@ -659,57 +648,9 @@ xfs_vn_getattr(
 STATIC int
 xfs_vn_setattr(
        struct dentry   *dentry,
-        struct iattr    *attr)
+        struct iattr    *iattr)
 {
-        struct inode    *inode = dentry->d_inode;
+        return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
-        unsigned int    ia_valid = attr->ia_valid;
-        bhv_vattr_t     vattr = { 0 };
-        int             flags = 0;
-        int             error;
-        if (ia_valid & ATTR_UID) {
-                vattr.va_mask |= XFS_AT_UID;
-                vattr.va_uid = attr->ia_uid;
-        }
-        if (ia_valid & ATTR_GID) {
-                vattr.va_mask |= XFS_AT_GID;
-                vattr.va_gid = attr->ia_gid;
-        }
-        if (ia_valid & ATTR_SIZE) {
-                vattr.va_mask |= XFS_AT_SIZE;
-                vattr.va_size = attr->ia_size;
-        }
-        if (ia_valid & ATTR_ATIME) {
-                vattr.va_mask |= XFS_AT_ATIME;
-                vattr.va_atime = attr->ia_atime;
-                inode->i_atime = attr->ia_atime;
-        }
-        if (ia_valid & ATTR_MTIME) {
-                vattr.va_mask |= XFS_AT_MTIME;
-                vattr.va_mtime = attr->ia_mtime;
-        }
-        if (ia_valid & ATTR_CTIME) {
-                vattr.va_mask |= XFS_AT_CTIME;
-                vattr.va_ctime = attr->ia_ctime;
-        }
-        if (ia_valid & ATTR_MODE) {
-                vattr.va_mask |= XFS_AT_MODE;
-                vattr.va_mode = attr->ia_mode;
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        inode->i_mode &= ~S_ISGID;
-        }
-        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
-                flags |= ATTR_UTIME;
-#ifdef ATTR_NO_BLOCK
-        if ((ia_valid & ATTR_NO_BLOCK))
-                flags |= ATTR_NONBLOCK;
-#endif
-        error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
-        if (likely(!error))
-                vn_revalidate(vn_from_inode(inode));
-        return -error;
 }
 /*
@@ -727,109 +668,6 @@ xfs_vn_truncate(
        WARN_ON(error);
 }
-STATIC int
-xfs_vn_setxattr(
-        struct dentry   *dentry,
-        const char      *name,
-        const void      *data,
-        size_t          size,
-        int             flags)
-{
-        bhv_vnode_t     *vp = vn_from_inode(dentry->d_inode);
-        char            *attr = (char *)name;
-        attrnames_t     *namesp;
-        int             xflags = 0;
-        int             error;
-        namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
-        if (!namesp)
-                return -EOPNOTSUPP;
-        attr += namesp->attr_namelen;
-        error = namesp->attr_capable(vp, NULL);
-        if (error)
-                return error;
-        /* Convert Linux syscall to XFS internal ATTR flags */
-        if (flags & XATTR_CREATE)
-                xflags |= ATTR_CREATE;
-        if (flags & XATTR_REPLACE)
-                xflags |= ATTR_REPLACE;
-        xflags |= namesp->attr_flag;
-        return namesp->attr_set(vp, attr, (void *)data, size, xflags);
-}
-STATIC ssize_t
-xfs_vn_getxattr(
-        struct dentry   *dentry,
-        const char      *name,
-        void            *data,
-        size_t          size)
-{
-        bhv_vnode_t     *vp = vn_from_inode(dentry->d_inode);
-        char            *attr = (char *)name;
-        attrnames_t     *namesp;
-        int             xflags = 0;
-        ssize_t         error;
-        namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
-        if (!namesp)
-                return -EOPNOTSUPP;
-        attr += namesp->attr_namelen;
-        error = namesp->attr_capable(vp, NULL);
-        if (error)
-                return error;
-        /* Convert Linux syscall to XFS internal ATTR flags */
-        if (!size) {
-                xflags |= ATTR_KERNOVAL;
-                data = NULL;
-        }
-        xflags |= namesp->attr_flag;
-        return namesp->attr_get(vp, attr, (void *)data, size, xflags);
-}
-STATIC ssize_t
-xfs_vn_listxattr(
-        struct dentry           *dentry,
-        char                    *data,
-        size_t                  size)
-{
-        bhv_vnode_t             *vp = vn_from_inode(dentry->d_inode);
-        int                     error, xflags = ATTR_KERNAMELS;
-        ssize_t                 result;
-        if (!size)
-                xflags |= ATTR_KERNOVAL;
-        xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
-        error = attr_generic_list(vp, data, size, xflags, &result);
-        if (error < 0)
-                return error;
-        return result;
-}
-STATIC int
-xfs_vn_removexattr(
-        struct dentry   *dentry,
-        const char      *name)
-{
-        bhv_vnode_t     *vp = vn_from_inode(dentry->d_inode);
-        char            *attr = (char *)name;
-        attrnames_t     *namesp;
-        int             xflags = 0;
-        int             error;
-        namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
-        if (!namesp)
-                return -EOPNOTSUPP;
-        attr += namesp->attr_namelen;
-        error = namesp->attr_capable(vp, NULL);
-        if (error)
-                return error;
-        xflags |= namesp->attr_flag;
-        return namesp->attr_remove(vp, attr, xflags);
-}
 STATIC long
 xfs_vn_fallocate(
        struct inode    *inode,
@@ -853,18 +691,18 @@ xfs_vn_fallocate(
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
        error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
-                                                0, NULL, ATTR_NOLOCK);
+                                      0, NULL, XFS_ATTR_NOLOCK);
        if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
            offset + len > i_size_read(inode))
                new_size = offset + len;
        /* Change file size if needed */
        if (new_size) {
-                bhv_vattr_t     va;
+                struct iattr iattr;
-                va.va_mask = XFS_AT_SIZE;
+                iattr.ia_valid = ATTR_SIZE;
-                va.va_size = new_size;
+                iattr.ia_size = new_size;
-                error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL);
+                error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
        }
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -877,10 +715,10 @@ const struct inode_operations xfs_inode_operations = {
        .truncate               = xfs_vn_truncate,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
-        .setxattr               = xfs_vn_setxattr,
+        .setxattr               = generic_setxattr,
-        .getxattr               = xfs_vn_getxattr,
+        .getxattr               = generic_getxattr,
+        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
-        .removexattr            = xfs_vn_removexattr,
        .fallocate              = xfs_vn_fallocate,
 };
@@ -891,16 +729,47 @@ const struct inode_operations xfs_dir_inode_operations = {
        .unlink                 = xfs_vn_unlink,
        .symlink                = xfs_vn_symlink,
        .mkdir                  = xfs_vn_mkdir,
-        .rmdir                  = xfs_vn_rmdir,
+        /*
+         * Yes, XFS uses the same method for rmdir and unlink.
+         *
+         * There are some subtile differences deeper in the code,
+         * but we use S_ISDIR to check for those.
+         */
+        .rmdir                  = xfs_vn_unlink,
+        .mknod                  = xfs_vn_mknod,
+        .rename                 = xfs_vn_rename,
+        .permission             = xfs_vn_permission,
+        .getattr                = xfs_vn_getattr,
+        .setattr                = xfs_vn_setattr,
+        .setxattr               = generic_setxattr,
+        .getxattr               = generic_getxattr,
+        .removexattr            = generic_removexattr,
+        .listxattr              = xfs_vn_listxattr,
+};
+const struct inode_operations xfs_dir_ci_inode_operations = {
+        .create                 = xfs_vn_create,
+        .lookup                 = xfs_vn_ci_lookup,
+        .link                   = xfs_vn_link,
+        .unlink                 = xfs_vn_unlink,
+        .symlink                = xfs_vn_symlink,
+        .mkdir                  = xfs_vn_mkdir,
+        /*
+         * Yes, XFS uses the same method for rmdir and unlink.
+         *
+         * There are some subtile differences deeper in the code,
+         * but we use S_ISDIR to check for those.
+         */
+        .rmdir                  = xfs_vn_unlink,
        .mknod                  = xfs_vn_mknod,
        .rename                 = xfs_vn_rename,
        .permission             = xfs_vn_permission,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
-        .setxattr               = xfs_vn_setxattr,
+        .setxattr               = generic_setxattr,
-        .getxattr               = xfs_vn_getxattr,
+        .getxattr               = generic_getxattr,
+        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
-        .removexattr            = xfs_vn_removexattr,
 };
 const struct inode_operations xfs_symlink_inode_operations = {
@@ -910,8 +779,8 @@ const struct inode_operations xfs_symlink_inode_operations = {
        .permission             = xfs_vn_permission,
        .getattr                = xfs_vn_getattr,
        .setattr                = xfs_vn_setattr,
-        .setxattr               = xfs_vn_setxattr,
+        .setxattr               = generic_setxattr,
-        .getxattr               = xfs_vn_getxattr,
+        .getxattr               = generic_getxattr,
+        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
-        .removexattr            = xfs_vn_removexattr,
 };
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 14d0deb7afff..d97ba934a2ac 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -20,12 +20,14 @@
 extern const struct inode_operations xfs_inode_operations;
 extern const struct inode_operations xfs_dir_inode_operations;
+extern const struct inode_operations xfs_dir_ci_inode_operations;
 extern const struct inode_operations xfs_symlink_inode_operations;
 extern const struct file_operations xfs_file_operations;
 extern const struct file_operations xfs_dir_file_operations;
 extern const struct file_operations xfs_invis_file_operations;
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 struct xfs_inode;
 extern void xfs_ichgtime(struct xfs_inode *, int);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4edc46915b57..4d45d9351a6c 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -76,6 +76,7 @@
 #include <linux/log2.h>
 #include <linux/spinlock.h>
 #include <linux/random.h>
+#include <linux/ctype.h>
 #include <asm/page.h>
 #include <asm/div64.h>
@@ -299,4 +300,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
        return x;
 }
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
 #endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index e480b6102051..3d5b67c075c7 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -98,12 +98,21 @@ xfs_read_xfsstats(
        return len;
 }
-void
+int
 xfs_init_procfs(void)
 {
        if (!proc_mkdir("fs/xfs", NULL))
-                return;
+                goto out;
-        create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL);
+        if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
+                        xfs_read_xfsstats, NULL))
+                goto out_remove_entry;
+        return 0;
+ out_remove_entry:
+        remove_proc_entry("fs/xfs", NULL);
+ out:
+        return -ENOMEM;
 }
 void
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index afd0b0d5fdb2..e83820febc9f 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats);
 #define XFS_STATS_DEC(v)        (per_cpu(xfsstats, current_cpu()).v--)
 #define XFS_STATS_ADD(v, inc)   (per_cpu(xfsstats, current_cpu()).v += (inc))
-extern void xfs_init_procfs(void);
+extern int xfs_init_procfs(void);
 extern void xfs_cleanup_procfs(void);
@@ -144,8 +144,14 @@ extern void xfs_cleanup_procfs(void);
 # define XFS_STATS_DEC(count)
 # define XFS_STATS_ADD(count, inc)
-static inline void xfs_init_procfs(void) { };
+static inline int xfs_init_procfs(void)
-static inline void xfs_cleanup_procfs(void) { };
+{
+        return 0;
+}
+static inline void xfs_cleanup_procfs(void)
+{
+}
 #endif  /* !CONFIG_PROC_FS */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 943381284e2e..30ae96397e31 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -52,6 +52,12 @@
 #include "xfs_version.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_trace.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
 #include <linux/namei.h>
 #include <linux/init.h>
@@ -60,6 +66,7 @@
 #include <linux/writeback.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+#include <linux/parser.h>
 static struct quotactl_ops xfs_quotactl_operations;
 static struct super_operations xfs_super_operations;
@@ -74,7 +81,10 @@ xfs_args_allocate(
 {
        struct xfs_mount_args   *args;
-        args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
+        args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
+        if (!args)
+                return NULL;
        args->logbufs = args->logbufsize = -1;
        strncpy(args->fsname, sb->s_id, MAXNAMELEN);
@@ -138,6 +148,23 @@ xfs_args_allocate(
 #define MNTOPT_XDSM     "xdsm"          /* DMI enabled (DMAPI / XDSM) */
 #define MNTOPT_DMI      "dmi"           /* DMI enabled (DMAPI / XDSM) */
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+        Opt_barrier, Opt_nobarrier, Opt_err
+};
+static match_table_t tokens = {
+        {Opt_barrier, "barrier"},
+        {Opt_nobarrier, "nobarrier"},
+        {Opt_err, NULL}
+};
 STATIC unsigned long
 suffix_strtoul(char *s, char **endp, unsigned int base)
 {
@@ -314,6 +341,7 @@ xfs_parseargs(
                        args->flags |= XFSMNT_ATTR2;
                } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
                        args->flags &= ~XFSMNT_ATTR2;
+                        args->flags |= XFSMNT_NOATTR2;
                } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
                        args->flags2 |= XFSMNT2_FILESTREAMS;
                } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
@@ -564,7 +592,10 @@ xfs_set_inodeops(
                inode->i_mapping->a_ops = &xfs_address_space_operations;
                break;
        case S_IFDIR:
-                inode->i_op = &xfs_dir_inode_operations;
+                if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+                        inode->i_op = &xfs_dir_ci_inode_operations;
+                else
+                        inode->i_op = &xfs_dir_inode_operations;
                inode->i_fop = &xfs_dir_file_operations;
                break;
        case S_IFLNK:
@@ -733,14 +764,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
                return;
        }
-        if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
-                                        QUEUE_ORDERED_NONE) {
-                xfs_fs_cmn_err(CE_NOTE, mp,
-                  "Disabling barriers, not supported by the underlying device");
-                mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                return;
-        }
        if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
                xfs_fs_cmn_err(CE_NOTE, mp,
                  "Disabling barriers, underlying device is readonly");
@@ -764,6 +787,139 @@ xfs_blkdev_issue_flush(
        blkdev_issue_flush(buftarg->bt_bdev, NULL);
 }
+STATIC void
+xfs_close_devices(
+        struct xfs_mount        *mp)
+{
+        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+                struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+                xfs_free_buftarg(mp->m_logdev_targp);
+                xfs_blkdev_put(logdev);
+        }
+        if (mp->m_rtdev_targp) {
+                struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+                xfs_free_buftarg(mp->m_rtdev_targp);
+                xfs_blkdev_put(rtdev);
+        }
+        xfs_free_buftarg(mp->m_ddev_targp);
+}
+/*
+ * The file system configurations are:
+ *      (1) device (partition) with data and internal log
+ *      (2) logical volume with data and log subvolumes.
+ *      (3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+        struct xfs_mount        *mp,
+        struct xfs_mount_args   *args)
+{
+        struct block_device     *ddev = mp->m_super->s_bdev;
+        struct block_device     *logdev = NULL, *rtdev = NULL;
+        int                     error;
+        /*
+         * Open real time and log devices - order is important.
+         */
+        if (args->logname[0]) {
+                error = xfs_blkdev_get(mp, args->logname, &logdev);
+                if (error)
+                        goto out;
+        }
+        if (args->rtname[0]) {
+                error = xfs_blkdev_get(mp, args->rtname, &rtdev);
+                if (error)
+                        goto out_close_logdev;
+                if (rtdev == ddev || rtdev == logdev) {
+                        cmn_err(CE_WARN,
+        "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
+                        error = EINVAL;
+                        goto out_close_rtdev;
+                }
+        }
+        /*
+         * Setup xfs_mount buffer target pointers
+         */
+        error = ENOMEM;
+        mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+        if (!mp->m_ddev_targp)
+                goto out_close_rtdev;
+        if (rtdev) {
+                mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+                if (!mp->m_rtdev_targp)
+                        goto out_free_ddev_targ;
+        }
+        if (logdev && logdev != ddev) {
+                mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+                if (!mp->m_logdev_targp)
+                        goto out_free_rtdev_targ;
+        } else {
+                mp->m_logdev_targp = mp->m_ddev_targp;
+        }
+        return 0;
+ out_free_rtdev_targ:
+        if (mp->m_rtdev_targp)
+                xfs_free_buftarg(mp->m_rtdev_targp);
+ out_free_ddev_targ:
+        xfs_free_buftarg(mp->m_ddev_targp);
+ out_close_rtdev:
+        if (rtdev)
+                xfs_blkdev_put(rtdev);
+ out_close_logdev:
+        if (logdev && logdev != ddev)
+                xfs_blkdev_put(logdev);
+ out:
+        return error;
+}
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+        struct xfs_mount        *mp)
+{
+        int                     error;
+        error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+                                    mp->m_sb.sb_sectsize);
+        if (error)
+                return error;
+        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+                unsigned int    log_sector_size = BBSIZE;
+                if (xfs_sb_version_hassector(&mp->m_sb))
+                        log_sector_size = mp->m_sb.sb_logsectsize;
+                error = xfs_setsize_buftarg(mp->m_logdev_targp,
+                                            mp->m_sb.sb_blocksize,
+                                            log_sector_size);
+                if (error)
+                        return error;
+        }
+        if (mp->m_rtdev_targp) {
+                error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+                                            mp->m_sb.sb_blocksize,
+                                            mp->m_sb.sb_sectsize);
+                if (error)
+                        return error;
+        }
+        return 0;
+}
 /*
 * XFS AIL push thread support
 */
@@ -848,42 +1004,6 @@ xfs_fs_inode_init_once(
        inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
 }
-STATIC int __init
-xfs_init_zones(void)
-{
-        xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
-                                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
-                                        KM_ZONE_SPREAD,
-                                        xfs_fs_inode_init_once);
-        if (!xfs_vnode_zone)
-                goto out;
-        xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-        if (!xfs_ioend_zone)
-                goto out_destroy_vnode_zone;
-        xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-                                                  xfs_ioend_zone);
-        if (!xfs_ioend_pool)
-                goto out_free_ioend_zone;
-        return 0;
- out_free_ioend_zone:
-        kmem_zone_destroy(xfs_ioend_zone);
- out_destroy_vnode_zone:
-        kmem_zone_destroy(xfs_vnode_zone);
- out:
-        return -ENOMEM;
-}
-STATIC void
-xfs_destroy_zones(void)
-{
-        mempool_destroy(xfs_ioend_pool);
-        kmem_zone_destroy(xfs_vnode_zone);
-        kmem_zone_destroy(xfs_ioend_zone);
-}
 /*
 * Attempt to flush the inode, this will actually fail
 * if the inode is pinned, but we dirty the inode again
@@ -1073,7 +1193,7 @@ xfssyncd(
                        list_del(&work->w_list);
                        if (work == &mp->m_sync_work)
                                continue;
-                        kmem_free(work, sizeof(struct bhv_vfs_sync_work));
+                        kmem_free(work);
                }
        }
@@ -1085,14 +1205,63 @@ xfs_fs_put_super(
        struct super_block      *sb)
 {
        struct xfs_mount        *mp = XFS_M(sb);
+        struct xfs_inode        *rip = mp->m_rootip;
+        int                     unmount_event_flags = 0;
        int                     error;
        kthread_stop(mp->m_sync_task);
        xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
-        error = xfs_unmount(mp, 0, NULL);
-        if (error)
+#ifdef HAVE_DMAPI
-                printk("XFS: unmount got error=%d\n", error);
+        if (mp->m_flags & XFS_MOUNT_DMAPI) {
+                unmount_event_flags =
+                        (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
+                                0 : DM_FLAGS_UNWANTED;
+                /*
+                 * Ignore error from dmapi here, first unmount is not allowed
+                 * to fail anyway, and second we wouldn't want to fail a
+                 * unmount because of dmapi.
+                 */
+                XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
+                                NULL, NULL, 0, 0, unmount_event_flags);
+        }
+#endif
+        /*
+         * Blow away any referenced inode in the filestreams cache.
+         * This can and will cause log traffic as inodes go inactive
+         * here.
+         */
+        xfs_filestream_unmount(mp);
+        XFS_bflush(mp->m_ddev_targp);
+        error = xfs_unmount_flush(mp, 0);
+        WARN_ON(error);
+        IRELE(rip);
+        /*
+         * If we're forcing a shutdown, typically because of a media error,
+         * we want to make sure we invalidate dirty pages that belong to
+         * referenced vnodes as well.
+         */
+        if (XFS_FORCED_SHUTDOWN(mp)) {
+                error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
+                ASSERT(error != EFSCORRUPTED);
+        }
+        if (mp->m_flags & XFS_MOUNT_DMAPI) {
+                XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
+                                unmount_event_flags);
+        }
+        xfs_unmountfs(mp);
+        xfs_icsb_destroy_counters(mp);
+        xfs_close_devices(mp);
+        xfs_qmops_put(mp);
+        xfs_dmops_put(mp);
+        kfree(mp);
 }
 STATIC void
@@ -1215,14 +1384,54 @@ xfs_fs_remount(
        char                    *options)
 {
        struct xfs_mount        *mp = XFS_M(sb);
-        struct xfs_mount_args   *args = xfs_args_allocate(sb, 0);
+        substring_t             args[MAX_OPT_ARGS];
-        int                     error;
+        char                    *p;
-        error = xfs_parseargs(mp, options, args, 1);
+        while ((p = strsep(&options, ",")) != NULL) {
-        if (!error)
+                int token;
-                error = xfs_mntupdate(mp, flags, args);
-        kmem_free(args, sizeof(*args));
+                if (!*p)
-        return -error;
+                        continue;
+                token = match_token(p, tokens, args);
+                switch (token) {
+                case Opt_barrier:
+                        mp->m_flags |= XFS_MOUNT_BARRIER;
+                        /*
+                         * Test if barriers are actually working if we can,
+                         * else delay this check until the filesystem is
+                         * marked writeable.
+                         */
+                        if (!(mp->m_flags & XFS_MOUNT_RDONLY))
+                                xfs_mountfs_check_barriers(mp);
+                        break;
+                case Opt_nobarrier:
+                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                        break;
+                default:
+                        printk(KERN_INFO
+        "XFS: mount option \"%s\" not supported for remount\n", p);
+                        return -EINVAL;
+                }
+        }
+        /* rw/ro -> rw */
+        if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+                mp->m_flags &= ~XFS_MOUNT_RDONLY;
+                if (mp->m_flags & XFS_MOUNT_BARRIER)
+                        xfs_mountfs_check_barriers(mp);
+        }
+        /* rw -> ro */
+        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+                xfs_filestream_flush(mp);
+                xfs_sync(mp, SYNC_DATA_QUIESCE);
+                xfs_attr_quiesce(mp);
+                mp->m_flags |= XFS_MOUNT_RDONLY;
+        }
+        return 0;
 }
 /*
@@ -1299,6 +1508,225 @@ xfs_fs_setxquota(
                                   Q_XSETPQLIM), id, (caddr_t)fdq);
 }
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ */
+STATIC int
+xfs_start_flags(
+        struct xfs_mount_args   *ap,
+        struct xfs_mount        *mp)
+{
+        /* Values are in BBs */
+        if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
+                /*
+                 * At this point the superblock has not been read
+                 * in, therefore we do not know the block size.
+                 * Before the mount call ends we will convert
+                 * these to FSBs.
+                 */
+                mp->m_dalign = ap->sunit;
+                mp->m_swidth = ap->swidth;
+        }
+        if (ap->logbufs != -1 &&
+            ap->logbufs != 0 &&
+            (ap->logbufs < XLOG_MIN_ICLOGS ||
+             ap->logbufs > XLOG_MAX_ICLOGS)) {
+                cmn_err(CE_WARN,
+                        "XFS: invalid logbufs value: %d [not %d-%d]",
+                        ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+                return XFS_ERROR(EINVAL);
+        }
+        mp->m_logbufs = ap->logbufs;
+        if (ap->logbufsize != -1 &&
+            ap->logbufsize !=  0 &&
+            (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
+             ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
+             !is_power_of_2(ap->logbufsize))) {
+                cmn_err(CE_WARN,
+        "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+                        ap->logbufsize);
+                return XFS_ERROR(EINVAL);
+        }
+        mp->m_logbsize = ap->logbufsize;
+        mp->m_fsname_len = strlen(ap->fsname) + 1;
+        mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
+        strcpy(mp->m_fsname, ap->fsname);
+        if (ap->rtname[0]) {
+                mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
+                strcpy(mp->m_rtname, ap->rtname);
+        }
+        if (ap->logname[0]) {
+                mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
+                strcpy(mp->m_logname, ap->logname);
+        }
+        if (ap->flags & XFSMNT_WSYNC)
+                mp->m_flags |= XFS_MOUNT_WSYNC;
+#if XFS_BIG_INUMS
+        if (ap->flags & XFSMNT_INO64) {
+                mp->m_flags |= XFS_MOUNT_INO64;
+                mp->m_inoadd = XFS_INO64_OFFSET;
+        }
+#endif
+        if (ap->flags & XFSMNT_RETERR)
+                mp->m_flags |= XFS_MOUNT_RETERR;
+        if (ap->flags & XFSMNT_NOALIGN)
+                mp->m_flags |= XFS_MOUNT_NOALIGN;
+        if (ap->flags & XFSMNT_SWALLOC)
+                mp->m_flags |= XFS_MOUNT_SWALLOC;
+        if (ap->flags & XFSMNT_OSYNCISOSYNC)
+                mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
+        if (ap->flags & XFSMNT_32BITINODES)
+                mp->m_flags |= XFS_MOUNT_32BITINODES;
+        if (ap->flags & XFSMNT_IOSIZE) {
+                if (ap->iosizelog > XFS_MAX_IO_LOG ||
+                    ap->iosizelog < XFS_MIN_IO_LOG) {
+                        cmn_err(CE_WARN,
+                "XFS: invalid log iosize: %d [not %d-%d]",
+                                ap->iosizelog, XFS_MIN_IO_LOG,
+                                XFS_MAX_IO_LOG);
+                        return XFS_ERROR(EINVAL);
+                }
+                mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+                mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
+        }
+        if (ap->flags & XFSMNT_IKEEP)
+                mp->m_flags |= XFS_MOUNT_IKEEP;
+        if (ap->flags & XFSMNT_DIRSYNC)
+                mp->m_flags |= XFS_MOUNT_DIRSYNC;
+        if (ap->flags & XFSMNT_ATTR2)
+                mp->m_flags |= XFS_MOUNT_ATTR2;
+        if (ap->flags & XFSMNT_NOATTR2)
+                mp->m_flags |= XFS_MOUNT_NOATTR2;
+        if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
+                mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+        /*
+         * no recovery flag requires a read-only mount
+         */
+        if (ap->flags & XFSMNT_NORECOVERY) {
+                if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+                        cmn_err(CE_WARN,
+        "XFS: tried to mount a FS read-write without recovery!");
+                        return XFS_ERROR(EINVAL);
+                }
+                mp->m_flags |= XFS_MOUNT_NORECOVERY;
+        }
+        if (ap->flags & XFSMNT_NOUUID)
+                mp->m_flags |= XFS_MOUNT_NOUUID;
+        if (ap->flags & XFSMNT_BARRIER)
+                mp->m_flags |= XFS_MOUNT_BARRIER;
+        else
+                mp->m_flags &= ~XFS_MOUNT_BARRIER;
+        if (ap->flags2 & XFSMNT2_FILESTREAMS)
+                mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+        if (ap->flags & XFSMNT_DMAPI)
+                mp->m_flags |= XFS_MOUNT_DMAPI;
+        return 0;
+}
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+        struct xfs_mount_args   *ap,
+        struct xfs_mount        *mp)
+{
+        int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+        /* Fail a mount where the logbuf is smaller then the log stripe */
+        if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+                if ((ap->logbufsize <= 0) &&
+                    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
+                        mp->m_logbsize = mp->m_sb.sb_logsunit;
+                } else if (ap->logbufsize > 0 &&
+                           ap->logbufsize < mp->m_sb.sb_logsunit) {
+                        cmn_err(CE_WARN,
+        "XFS: logbuf size must be greater than or equal to log stripe size");
+                        return XFS_ERROR(EINVAL);
+                }
+        } else {
+                /* Fail a mount if the logbuf is larger than 32K */
+                if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
+                        cmn_err(CE_WARN,
+        "XFS: logbuf size for version 1 logs must be 16K or 32K");
+                        return XFS_ERROR(EINVAL);
+                }
+        }
+        /*
+         * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+         * told by noattr2 to turn it off
+         */
+        if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+            !(ap->flags & XFSMNT_NOATTR2))
+                mp->m_flags |= XFS_MOUNT_ATTR2;
+        /*
+         * prohibit r/w mounts of read-only filesystems
+         */
+        if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+                cmn_err(CE_WARN,
+        "XFS: cannot mount a read-only filesystem as read-write");
+                return XFS_ERROR(EROFS);
+        }
+        /*
+         * check for shared mount.
+         */
+        if (ap->flags & XFSMNT_SHARED) {
+                if (!xfs_sb_version_hasshared(&mp->m_sb))
+                        return XFS_ERROR(EINVAL);
+                /*
+                 * For IRIX 6.5, shared mounts must have the shared
+                 * version bit set, have the persistent readonly
+                 * field set, must be version 0 and can only be mounted
+                 * read-only.
+                 */
+                if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
+                     (mp->m_sb.sb_shared_vn != 0))
+                        return XFS_ERROR(EINVAL);
+                mp->m_flags |= XFS_MOUNT_SHARED;
+                /*
+                 * Shared XFS V0 can't deal with DMI.  Return EINVAL.
+                 */
+                if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
+                        return XFS_ERROR(EINVAL);
+        }
+        if (ap->flags & XFSMNT_UQUOTA) {
+                mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+                if (ap->flags & XFSMNT_UQUOTAENF)
+                        mp->m_qflags |= XFS_UQUOTA_ENFD;
+        }
+        if (ap->flags & XFSMNT_GQUOTA) {
+                mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+                if (ap->flags & XFSMNT_GQUOTAENF)
+                        mp->m_qflags |= XFS_OQUOTA_ENFD;
+        } else if (ap->flags & XFSMNT_PQUOTA) {
+                mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+                if (ap->flags & XFSMNT_PQUOTAENF)
+                        mp->m_qflags |= XFS_OQUOTA_ENFD;
+        }
+        return 0;
+}
 STATIC int
 xfs_fs_fill_super(
        struct super_block      *sb,
@@ -1307,11 +1735,21 @@ xfs_fs_fill_super(
 {
        struct inode            *root;
        struct xfs_mount        *mp = NULL;
-        struct xfs_mount_args   *args = xfs_args_allocate(sb, silent);
+        struct xfs_mount_args   *args;
-        int                     error;
+        int                     flags = 0, error = ENOMEM;
+        args = xfs_args_allocate(sb, silent);
+        if (!args)
+                return -ENOMEM;
-        mp = xfs_mount_init();
+        mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+        if (!mp)
+                goto out_free_args;
+        spin_lock_init(&mp->m_sb_lock);
+        mutex_init(&mp->m_ilock);
+        mutex_init(&mp->m_growlock);
+        atomic_set(&mp->m_active_trans, 0);
        INIT_LIST_HEAD(&mp->m_sync_list);
        spin_lock_init(&mp->m_sync_lock);
        init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1324,16 +1762,60 @@ xfs_fs_fill_super(
        error = xfs_parseargs(mp, (char *)data, args, 0);
        if (error)
-                goto fail_vfsop;
+                goto out_free_mp;
        sb_min_blocksize(sb, BBSIZE);
+        sb->s_xattr = xfs_xattr_handlers;
        sb->s_export_op = &xfs_export_operations;
        sb->s_qcop = &xfs_quotactl_operations;
        sb->s_op = &xfs_super_operations;
-        error = xfs_mount(mp, args, NULL);
+        error = xfs_dmops_get(mp, args);
        if (error)
-                goto fail_vfsop;
+                goto out_free_mp;
+        error = xfs_qmops_get(mp, args);
+        if (error)
+                goto out_put_dmops;
+        if (args->flags & XFSMNT_QUIET)
+                flags |= XFS_MFSI_QUIET;
+        error = xfs_open_devices(mp, args);
+        if (error)
+                goto out_put_qmops;
+        if (xfs_icsb_init_counters(mp))
+                mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+        /*
+         * Setup flags based on mount(2) options and then the superblock
+         */
+        error = xfs_start_flags(args, mp);
+        if (error)
+                goto out_destroy_counters;
+        error = xfs_readsb(mp, flags);
+        if (error)
+                goto out_destroy_counters;
+        error = xfs_finish_flags(args, mp);
+        if (error)
+                goto out_free_sb;
+        error = xfs_setup_devices(mp);
+        if (error)
+                goto out_free_sb;
+        if (mp->m_flags & XFS_MOUNT_BARRIER)
+                xfs_mountfs_check_barriers(mp);
+        error = xfs_filestream_mount(mp);
+        if (error)
+                goto out_free_sb;
+        error = xfs_mountfs(mp, flags);
+        if (error)
+                goto out_filestream_unmount;
+        XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
        sb->s_dirt = 1;
        sb->s_magic = XFS_SB_MAGIC;
@@ -1368,10 +1850,27 @@ xfs_fs_fill_super(
        xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
-        kmem_free(args, sizeof(*args));
+        kfree(args);
        return 0;
-fail_vnrele:
+ out_filestream_unmount:
+        xfs_filestream_unmount(mp);
+ out_free_sb:
+        xfs_freesb(mp);
+ out_destroy_counters:
+        xfs_icsb_destroy_counters(mp);
+        xfs_close_devices(mp);
+ out_put_qmops:
+        xfs_qmops_put(mp);
+ out_put_dmops:
+        xfs_dmops_put(mp);
+ out_free_mp:
+        kfree(mp);
+ out_free_args:
+        kfree(args);
+        return -error;
+ fail_vnrele:
        if (sb->s_root) {
                dput(sb->s_root);
                sb->s_root = NULL;
@@ -1379,12 +1878,22 @@ fail_vnrele:
                iput(root);
        }
-fail_unmount:
+ fail_unmount:
-        xfs_unmount(mp, 0, NULL);
+        /*
+         * Blow away any referenced inode in the filestreams cache.
+         * This can and will cause log traffic as inodes go inactive
+         * here.
+         */
+        xfs_filestream_unmount(mp);
-fail_vfsop:
+        XFS_bflush(mp->m_ddev_targp);
-        kmem_free(args, sizeof(*args));
+        error = xfs_unmount_flush(mp, 0);
-        return -error;
+        WARN_ON(error);
+        IRELE(mp->m_rootip);
+        xfs_unmountfs(mp);
+        goto out_destroy_counters;
 }
 STATIC int
@@ -1429,9 +1938,235 @@ static struct file_system_type xfs_fs_type = {
        .fs_flags               = FS_REQUIRES_DEV,
 };
+STATIC int __init
+xfs_alloc_trace_bufs(void)
+{
+#ifdef XFS_ALLOC_TRACE
+        xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
+        if (!xfs_alloc_trace_buf)
+                goto out;
+#endif
+#ifdef XFS_BMAP_TRACE
+        xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
+        if (!xfs_bmap_trace_buf)
+                goto out_free_alloc_trace;
+#endif
+#ifdef XFS_BMBT_TRACE
+        xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
+        if (!xfs_bmbt_trace_buf)
+                goto out_free_bmap_trace;
+#endif
+#ifdef XFS_ATTR_TRACE
+        xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
+        if (!xfs_attr_trace_buf)
+                goto out_free_bmbt_trace;
+#endif
+#ifdef XFS_DIR2_TRACE
+        xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
+        if (!xfs_dir2_trace_buf)
+                goto out_free_attr_trace;
+#endif
+        return 0;
+#ifdef XFS_DIR2_TRACE
+ out_free_attr_trace:
+#endif
+#ifdef XFS_ATTR_TRACE
+        ktrace_free(xfs_attr_trace_buf);
+ out_free_bmbt_trace:
+#endif
+#ifdef XFS_BMBT_TRACE
+        ktrace_free(xfs_bmbt_trace_buf);
+ out_free_bmap_trace:
+#endif
+#ifdef XFS_BMAP_TRACE
+        ktrace_free(xfs_bmap_trace_buf);
+ out_free_alloc_trace:
+#endif
+#ifdef XFS_ALLOC_TRACE
+        ktrace_free(xfs_alloc_trace_buf);
+ out:
+#endif
+        return -ENOMEM;
+}
+STATIC void
+xfs_free_trace_bufs(void)
+{
+#ifdef XFS_DIR2_TRACE
+        ktrace_free(xfs_dir2_trace_buf);
+#endif
+#ifdef XFS_ATTR_TRACE
+        ktrace_free(xfs_attr_trace_buf);
+#endif
+#ifdef XFS_BMBT_TRACE
+        ktrace_free(xfs_bmbt_trace_buf);
+#endif
+#ifdef XFS_BMAP_TRACE
+        ktrace_free(xfs_bmap_trace_buf);
+#endif
+#ifdef XFS_ALLOC_TRACE
+        ktrace_free(xfs_alloc_trace_buf);
+#endif
+}
 STATIC int __init
-init_xfs_fs( void )
+xfs_init_zones(void)
+{
+        xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
+                                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+                                        KM_ZONE_SPREAD,
+                                        xfs_fs_inode_init_once);
+        if (!xfs_vnode_zone)
+                goto out;
+        xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+        if (!xfs_ioend_zone)
+                goto out_destroy_vnode_zone;
+        xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+                                                  xfs_ioend_zone);
+        if (!xfs_ioend_pool)
+                goto out_destroy_ioend_zone;
+        xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+                                                "xfs_log_ticket");
+        if (!xfs_log_ticket_zone)
+                goto out_destroy_ioend_pool;
+        xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+                                                "xfs_bmap_free_item");
+        if (!xfs_bmap_free_item_zone)
+                goto out_destroy_log_ticket_zone;
+        xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+                                                "xfs_btree_cur");
+        if (!xfs_btree_cur_zone)
+                goto out_destroy_bmap_free_item_zone;
+        xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+                                                "xfs_da_state");
+        if (!xfs_da_state_zone)
+                goto out_destroy_btree_cur_zone;
+        xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+        if (!xfs_dabuf_zone)
+                goto out_destroy_da_state_zone;
+        xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+        if (!xfs_ifork_zone)
+                goto out_destroy_dabuf_zone;
+        xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+        if (!xfs_trans_zone)
+                goto out_destroy_ifork_zone;
+        /*
+         * The size of the zone allocated buf log item is the maximum
+         * size possible under XFS.  This wastes a little bit of memory,
+         * but it is much faster.
+         */
+        xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+                                (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
+                                  NBWORD) * sizeof(int))), "xfs_buf_item");
+        if (!xfs_buf_item_zone)
+                goto out_destroy_trans_zone;
+        xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+                        ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+                                 sizeof(xfs_extent_t))), "xfs_efd_item");
+        if (!xfs_efd_zone)
+                goto out_destroy_buf_item_zone;
+        xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+                        ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+                                sizeof(xfs_extent_t))), "xfs_efi_item");
+        if (!xfs_efi_zone)
+                goto out_destroy_efd_zone;
+        xfs_inode_zone =
+                kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+                                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
+                                        KM_ZONE_SPREAD, NULL);
+        if (!xfs_inode_zone)
+                goto out_destroy_efi_zone;
+        xfs_ili_zone =
+                kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+                                        KM_ZONE_SPREAD, NULL);
+        if (!xfs_ili_zone)
+                goto out_destroy_inode_zone;
+#ifdef CONFIG_XFS_POSIX_ACL
+        xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
+        if (!xfs_acl_zone)
+                goto out_destroy_ili_zone;
+#endif
+        return 0;
+#ifdef CONFIG_XFS_POSIX_ACL
+ out_destroy_ili_zone:
+#endif
+        kmem_zone_destroy(xfs_ili_zone);
+ out_destroy_inode_zone:
+        kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+        kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+        kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+        kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_trans_zone:
+        kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+        kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+        kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+        kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+        kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+        kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+        kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+        mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+        kmem_zone_destroy(xfs_ioend_zone);
+ out_destroy_vnode_zone:
+        kmem_zone_destroy(xfs_vnode_zone);
+ out:
+        return -ENOMEM;
+}
+STATIC void
+xfs_destroy_zones(void)
+{
+#ifdef CONFIG_XFS_POSIX_ACL
+        kmem_zone_destroy(xfs_acl_zone);
+#endif
+        kmem_zone_destroy(xfs_ili_zone);
+        kmem_zone_destroy(xfs_inode_zone);
+        kmem_zone_destroy(xfs_efi_zone);
+        kmem_zone_destroy(xfs_efd_zone);
+        kmem_zone_destroy(xfs_buf_item_zone);
+        kmem_zone_destroy(xfs_trans_zone);
+        kmem_zone_destroy(xfs_ifork_zone);
+        kmem_zone_destroy(xfs_dabuf_zone);
+        kmem_zone_destroy(xfs_da_state_zone);
+        kmem_zone_destroy(xfs_btree_cur_zone);
+        kmem_zone_destroy(xfs_bmap_free_item_zone);
+        kmem_zone_destroy(xfs_log_ticket_zone);
+        mempool_destroy(xfs_ioend_pool);
+        kmem_zone_destroy(xfs_ioend_zone);
+        kmem_zone_destroy(xfs_vnode_zone);
+}
+STATIC int __init
+init_xfs_fs(void)
 {
        int                     error;
        static char             message[] __initdata = KERN_INFO \
@@ -1440,42 +2175,73 @@ init_xfs_fs( void )
        printk(message);
        ktrace_init(64);
+        vn_init();
+        xfs_dir_startup();
        error = xfs_init_zones();
-        if (error < 0)
+        if (error)
-                goto undo_zones;
+                goto out;
+        error = xfs_alloc_trace_bufs();
+        if (error)
+                goto out_destroy_zones;
+        error = xfs_mru_cache_init();
+        if (error)
+                goto out_free_trace_buffers;
+        error = xfs_filestream_init();
+        if (error)
+                goto out_mru_cache_uninit;
        error = xfs_buf_init();
-        if (error < 0)
+        if (error)
-                goto undo_buffers;
+                goto out_filestream_uninit;
+        error = xfs_init_procfs();
+        if (error)
+                goto out_buf_terminate;
+        error = xfs_sysctl_register();
+        if (error)
+                goto out_cleanup_procfs;
-        vn_init();
-        xfs_init();
-        uuid_init();
        vfs_initquota();
        error = register_filesystem(&xfs_fs_type);
        if (error)
-                goto undo_register;
+                goto out_sysctl_unregister;
        return 0;
-undo_register:
+ out_sysctl_unregister:
+        xfs_sysctl_unregister();
+ out_cleanup_procfs:
+        xfs_cleanup_procfs();
+ out_buf_terminate:
        xfs_buf_terminate();
+ out_filestream_uninit:
-undo_buffers:
+        xfs_filestream_uninit();
+ out_mru_cache_uninit:
+        xfs_mru_cache_uninit();
+ out_free_trace_buffers:
+        xfs_free_trace_bufs();
+ out_destroy_zones:
        xfs_destroy_zones();
+ out:
-undo_zones:
        return error;
 }
 STATIC void __exit
-exit_xfs_fs( void )
+exit_xfs_fs(void)
 {
        vfs_exitquota();
        unregister_filesystem(&xfs_fs_type);
-        xfs_cleanup();
+        xfs_sysctl_unregister();
+        xfs_cleanup_procfs();
        xfs_buf_terminate();
+        xfs_filestream_uninit();
+        xfs_mru_cache_uninit();
+        xfs_free_trace_bufs();
        xfs_destroy_zones();
        ktrace_uninit();
 }
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efb7c6d3303..b7d13da01bd6 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -107,12 +107,10 @@ extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
 extern void xfs_flush_inode(struct xfs_inode *);
 extern void xfs_flush_device(struct xfs_inode *);
-extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
-                                struct block_device **);
-extern void xfs_blkdev_put(struct block_device *);
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 extern const struct export_operations xfs_export_operations;
+extern struct xattr_handler *xfs_xattr_handlers[];
 #define XFS_M(sb)               ((struct xfs_mount *)((sb)->s_fs_info))
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index bb997d75c05c..7dacb5bbde3f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = {
        {}
 };
-void
+int
 xfs_sysctl_register(void)
 {
        xfs_table_header = register_sysctl_table(xfs_root_table);
+        if (!xfs_table_header)
+                return -ENOMEM;
+        return 0;
 }
 void
 xfs_sysctl_unregister(void)
 {
-        if (xfs_table_header)
+        unregister_sysctl_table(xfs_table_header);
-                unregister_sysctl_table(xfs_table_header);
 }
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 98b97e399d6f..4aadb8056c37 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -93,10 +93,10 @@ enum {
 extern xfs_param_t      xfs_params;
 #ifdef CONFIG_SYSCTL
-extern void xfs_sysctl_register(void);
+extern int xfs_sysctl_register(void);
 extern void xfs_sysctl_unregister(void);
 #else
-# define xfs_sysctl_register()          do { } while (0)
+# define xfs_sysctl_register()          (0)
 # define xfs_sysctl_unregister()        do { } while (0)
 #endif /* CONFIG_SYSCTL */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe007338..25488b6d9881 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -82,56 +82,6 @@ vn_ioerror(
                xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
 }
-/*
- * Revalidate the Linux inode from the XFS inode.
- * Note: i_size _not_ updated; we must hold the inode
- * semaphore when doing that - callers responsibility.
- */
-int
-vn_revalidate(
-        bhv_vnode_t             *vp)
-{
-        struct inode            *inode = vn_to_inode(vp);
-        struct xfs_inode        *ip = XFS_I(inode);
-        struct xfs_mount        *mp = ip->i_mount;
-        unsigned long           xflags;
-        xfs_itrace_entry(ip);
-        if (XFS_FORCED_SHUTDOWN(mp))
-                return -EIO;
-        xfs_ilock(ip, XFS_ILOCK_SHARED);
-        inode->i_mode       = ip->i_d.di_mode;
-        inode->i_uid        = ip->i_d.di_uid;
-        inode->i_gid        = ip->i_d.di_gid;
-        inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
-        inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
-        inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
-        inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
-        xflags = xfs_ip2xflags(ip);
-        if (xflags & XFS_XFLAG_IMMUTABLE)
-                inode->i_flags |= S_IMMUTABLE;
-        else
-                inode->i_flags &= ~S_IMMUTABLE;
-        if (xflags & XFS_XFLAG_APPEND)
-                inode->i_flags |= S_APPEND;
-        else
-                inode->i_flags &= ~S_APPEND;
-        if (xflags & XFS_XFLAG_SYNC)
-                inode->i_flags |= S_SYNC;
-        else
-                inode->i_flags &= ~S_SYNC;
-        if (xflags & XFS_XFLAG_NOATIME)
-                inode->i_flags |= S_NOATIME;
-        else
-                inode->i_flags &= ~S_NOATIME;
-        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-        xfs_iflags_clear(ip, XFS_IMODIFIED);
-        return 0;
-}
 /*
 * Add a reference to a referenced vnode.
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9b..41ca2cec5d31 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -19,7 +19,6 @@
 #define __XFS_VNODE_H__
 struct file;
-struct bhv_vattr;
 struct xfs_iomap;
 struct attrlist_cursor_kern;
@@ -66,87 +65,8 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
                                           Prevent VM access to the pages until
                                           the operation completes. */
-/*
- * Vnode attributes.  va_mask indicates those attributes the caller
- * wants to set or extract.
- */
-typedef struct bhv_vattr {
-        int             va_mask;        /* bit-mask of attributes present */
-        mode_t          va_mode;        /* file access mode and type */
-        xfs_nlink_t     va_nlink;       /* number of references to file */
-        uid_t           va_uid;         /* owner user id */
-        gid_t           va_gid;         /* owner group id */
-        xfs_ino_t       va_nodeid;      /* file id */
-        xfs_off_t       va_size;        /* file size in bytes */
-        u_long          va_blocksize;   /* blocksize preferred for i/o */
-        struct timespec va_atime;       /* time of last access */
-        struct timespec va_mtime;       /* time of last modification */
-        struct timespec va_ctime;       /* time file changed */
-        u_int           va_gen;         /* generation number of file */
-        xfs_dev_t       va_rdev;        /* device the special file represents */
-        __int64_t       va_nblocks;     /* number of blocks allocated */
-        u_long          va_xflags;      /* random extended file flags */
-        u_long          va_extsize;     /* file extent size */
-        u_long          va_nextents;    /* number of extents in file */
-        u_long          va_anextents;   /* number of attr extents in file */
-        prid_t          va_projid;      /* project id */
-} bhv_vattr_t;
-/*
- * setattr or getattr attributes
- */
-#define XFS_AT_TYPE             0x00000001
-#define XFS_AT_MODE             0x00000002
-#define XFS_AT_UID              0x00000004
-#define XFS_AT_GID              0x00000008
-#define XFS_AT_FSID             0x00000010
-#define XFS_AT_NODEID           0x00000020
-#define XFS_AT_NLINK            0x00000040
-#define XFS_AT_SIZE             0x00000080
-#define XFS_AT_ATIME            0x00000100
-#define XFS_AT_MTIME            0x00000200
-#define XFS_AT_CTIME            0x00000400
-#define XFS_AT_RDEV             0x00000800
-#define XFS_AT_BLKSIZE          0x00001000
-#define XFS_AT_NBLOCKS          0x00002000
-#define XFS_AT_VCODE            0x00004000
-#define XFS_AT_MAC              0x00008000
-#define XFS_AT_UPDATIME         0x00010000
-#define XFS_AT_UPDMTIME         0x00020000
-#define XFS_AT_UPDCTIME         0x00040000
-#define XFS_AT_ACL              0x00080000
-#define XFS_AT_CAP              0x00100000
-#define XFS_AT_INF              0x00200000
-#define XFS_AT_XFLAGS           0x00400000
-#define XFS_AT_EXTSIZE          0x00800000
-#define XFS_AT_NEXTENTS         0x01000000
-#define XFS_AT_ANEXTENTS        0x02000000
-#define XFS_AT_PROJID           0x04000000
-#define XFS_AT_SIZE_NOPERM      0x08000000
-#define XFS_AT_GENCOUNT         0x10000000
-#define XFS_AT_ALL      (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
-                XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
-                XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
-                XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
-                XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
-                XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
-#define XFS_AT_STAT     (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
-                XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
-                XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
-                XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
-#define XFS_AT_TIMES    (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
-#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
-#define XFS_AT_NOSET    (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
-                XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
-                XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
 extern void     vn_init(void);
-extern int      vn_revalidate(bhv_vnode_t *);
 /*
 * Yeah, these don't take vnode anymore at all, all this should be
@@ -219,15 +139,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
 #define VN_DIRTY(vp)    mapping_tagged(vn_to_inode(vp)->i_mapping, \
                                        PAGECACHE_TAG_DIRTY)
-/*
- * Flags to vop_setattr/getattr.
- */
-#define ATTR_UTIME      0x01    /* non-default utime(2) request */
-#define ATTR_DMI        0x08    /* invocation from a DMI function */
-#define ATTR_LAZY       0x80    /* set/get attributes lazily */
-#define ATTR_NONBLOCK   0x100   /* return EAGAIN if operation would block */
-#define ATTR_NOLOCK     0x200   /* Don't grab any conflicting locks */
-#define ATTR_NOSIZETOK  0x400   /* Don't get the SIZE token */
 /*
 * Tracking vnode activity.
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
new file mode 100644
index 000000000000..964621fde6ed
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+/*
+ * ACL handling.  Should eventually be moved into xfs_acl.c
+ */
+static int
+xfs_decode_acl(const char *name)
+{
+        if (strcmp(name, "posix_acl_access") == 0)
+                return _ACL_TYPE_ACCESS;
+        else if (strcmp(name, "posix_acl_default") == 0)
+                return _ACL_TYPE_DEFAULT;
+        return -EINVAL;
+}
+/*
+ * Get system extended attributes which at the moment only
+ * includes Posix ACLs.
+ */
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+                void *buffer, size_t size)
+{
+        int acl;
+        acl = xfs_decode_acl(name);
+        if (acl < 0)
+                return acl;
+        return xfs_acl_vget(inode, buffer, size, acl);
+}
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+                const void *value, size_t size, int flags)
+{
+        int acl;
+        acl = xfs_decode_acl(name);
+        if (acl < 0)
+                return acl;
+        if (flags & XATTR_CREATE)
+                return -EINVAL;
+        if (!value)
+                return xfs_acl_vremove(inode, acl);
+        return xfs_acl_vset(inode, (void *)value, size, acl);
+}
+static struct xattr_handler xfs_xattr_system_handler = {
+        .prefix = XATTR_SYSTEM_PREFIX,
+        .get    = xfs_xattr_system_get,
+        .set    = xfs_xattr_system_set,
+};
+/*
+ * Real xattr handling.  The only difference between the namespaces is
+ * a flag passed to the low-level attr code.
+ */
+static int
+__xfs_xattr_get(struct inode *inode, const char *name,
+                void *value, size_t size, int xflags)
+{
+        struct xfs_inode *ip = XFS_I(inode);
+        int error, asize = size;
+        if (strcmp(name, "") == 0)
+                return -EINVAL;
+        /* Convert Linux syscall to XFS internal ATTR flags */
+        if (!size) {
+                xflags |= ATTR_KERNOVAL;
+                value = NULL;
+        }
+        error = -xfs_attr_get(ip, name, value, &asize, xflags);
+        if (error)
+                return error;
+        return asize;
+}
+static int
+__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+                size_t size, int flags, int xflags)
+{
+        struct xfs_inode *ip = XFS_I(inode);
+        if (strcmp(name, "") == 0)
+                return -EINVAL;
+        /* Convert Linux syscall to XFS internal ATTR flags */
+        if (flags & XATTR_CREATE)
+                xflags |= ATTR_CREATE;
+        if (flags & XATTR_REPLACE)
+                xflags |= ATTR_REPLACE;
+        if (!value)
+                return -xfs_attr_remove(ip, name, xflags);
+        return -xfs_attr_set(ip, name, (void *)value, size, xflags);
+}
+static int
+xfs_xattr_user_get(struct inode *inode, const char *name,
+                void *value, size_t size)
+{
+        return __xfs_xattr_get(inode, name, value, size, 0);
+}
+static int
+xfs_xattr_user_set(struct inode *inode, const char *name,
+                const void *value, size_t size, int flags)
+{
+        return __xfs_xattr_set(inode, name, value, size, flags, 0);
+}
+static struct xattr_handler xfs_xattr_user_handler = {
+        .prefix = XATTR_USER_PREFIX,
+        .get    = xfs_xattr_user_get,
+        .set    = xfs_xattr_user_set,
+};
+static int
+xfs_xattr_trusted_get(struct inode *inode, const char *name,
+                void *value, size_t size)
+{
+        return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
+}
+static int
+xfs_xattr_trusted_set(struct inode *inode, const char *name,
+                const void *value, size_t size, int flags)
+{
+        return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
+}
+static struct xattr_handler xfs_xattr_trusted_handler = {
+        .prefix = XATTR_TRUSTED_PREFIX,
+        .get    = xfs_xattr_trusted_get,
+        .set    = xfs_xattr_trusted_set,
+};
+static int
+xfs_xattr_secure_get(struct inode *inode, const char *name,
+                void *value, size_t size)
+{
+        return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
+}
+static int
+xfs_xattr_secure_set(struct inode *inode, const char *name,
+                const void *value, size_t size, int flags)
+{
+        return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
+}
+static struct xattr_handler xfs_xattr_security_handler = {
+        .prefix = XATTR_SECURITY_PREFIX,
+        .get    = xfs_xattr_secure_get,
+        .set    = xfs_xattr_secure_set,
+};
+struct xattr_handler *xfs_xattr_handlers[] = {
+        &xfs_xattr_user_handler,
+        &xfs_xattr_trusted_handler,
+        &xfs_xattr_security_handler,
+        &xfs_xattr_system_handler,
+        NULL
+};
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+        if (flags & XFS_ATTR_SECURE)
+                return sizeof("security");
+        else if (flags & XFS_ATTR_ROOT)
+                return sizeof("trusted");
+        else
+                return sizeof("user");
+}
+static const char *xfs_xattr_prefix(int flags)
+{
+        if (flags & XFS_ATTR_SECURE)
+                return xfs_xattr_security_handler.prefix;
+        else if (flags & XFS_ATTR_ROOT)
+                return xfs_xattr_trusted_handler.prefix;
+        else
+                return xfs_xattr_user_handler.prefix;
+}
+static int
+xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
+                char *name, int namelen, int valuelen, char *value)
+{
+        unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+        char *offset;
+        int arraytop;
+        ASSERT(context->count >= 0);
+        /*
+         * Only show root namespace entries if we are actually allowed to
+         * see them.
+         */
+        if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+                return 0;
+        arraytop = context->count + prefix_len + namelen + 1;
+        if (arraytop > context->firstu) {
+                context->count = -1;    /* insufficient space */
+                return 1;
+        }
+        offset = (char *)context->alist + context->count;
+        strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+        offset += prefix_len;
+        strncpy(offset, name, namelen);                 /* real name */
+        offset += namelen;
+        *offset = '\0';
+        context->count += prefix_len + namelen + 1;
+        return 0;
+}
+static int
+xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags,
+                char *name, int namelen, int valuelen, char *value)
+{
+        context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+        return 0;
+}
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+                size_t size, ssize_t *result)
+{
+        char *p = data + *result;
+        *result += len;
+        if (!size)
+                return 0;
+        if (*result > size)
+                return -ERANGE;
+        strcpy(p, name);
+        return 0;
+}
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+        struct xfs_attr_list_context context;
+        struct attrlist_cursor_kern cursor = { 0 };
+        struct inode            *inode = dentry->d_inode;
+        int                     error;
+        /*
+         * First read the regular on-disk attributes.
+         */
+        memset(&context, 0, sizeof(context));
+        context.dp = XFS_I(inode);
+        context.cursor = &cursor;
+        context.resynch = 1;
+        context.alist = data;
+        context.bufsize = size;
+        context.firstu = context.bufsize;
+        if (size)
+                context.put_listent = xfs_xattr_put_listent;
+        else
+                context.put_listent = xfs_xattr_put_listent_sizes;
+        xfs_attr_list_int(&context);
+        if (context.count < 0)
+                return -ERANGE;
+        /*
+         * Then add the two synthetic ACL attributes.
+         */
+        if (xfs_acl_vhasacl_access(inode)) {
+                error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+                                strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+                                data, size, &context.count);
+                if (error)
+                        return error;
+        }
+        if (xfs_acl_vhasacl_default(inode)) {
+                error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+                                strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+                                data, size, &context.count);
+                if (error)
+                        return error;
+        }
+        return context.count;
+}
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 85df3288efd5..fc9f3fb39b7b 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1435,8 +1435,7 @@ xfs_dqlock2(
 /* ARGSUSED */
 int
 xfs_qm_dqpurge(
-        xfs_dquot_t     *dqp,
+        xfs_dquot_t     *dqp)
-        uint            flags)
 {
        xfs_dqhash_t    *thishash;
        xfs_mount_t     *mp = dqp->q_mount;
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5c371a92e3e2..f7393bba4e95 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -164,7 +164,7 @@ extern void		xfs_qm_dqprint(xfs_dquot_t *);
 extern void             xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int              xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int              xfs_qm_dqpurge(xfs_dquot_t *, uint);
+extern int              xfs_qm_dqpurge(xfs_dquot_t *);
 extern void             xfs_qm_dqunpin_wait(xfs_dquot_t *);
 extern int              xfs_qm_dqlock_nowait(xfs_dquot_t *);
 extern int              xfs_qm_dqflock_nowait(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 36e05ca78412..08d2fc89e6a1 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed(
         * xfs_trans_delete_ail() drops the AIL lock.
         */
        xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
-        kmem_free(qfs, sizeof(xfs_qoff_logitem_t));
+        kmem_free(qfs);
-        kmem_free(qfe, sizeof(xfs_qoff_logitem_t));
+        kmem_free(qfe);
        return (xfs_lsn_t)-1;
 }
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index d31cce1165c5..021934a3d456 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -192,8 +192,8 @@ xfs_qm_destroy(
                xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
                xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
        }
-        kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
+        kmem_free(xqm->qm_usr_dqhtable);
-        kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
+        kmem_free(xqm->qm_grp_dqhtable);
        xqm->qm_usr_dqhtable = NULL;
        xqm->qm_grp_dqhtable = NULL;
        xqm->qm_dqhashmask = 0;
@@ -201,7 +201,7 @@ xfs_qm_destroy(
 #ifdef DEBUG
        mutex_destroy(&qcheck_lock);
 #endif
-        kmem_free(xqm, sizeof(xfs_qm_t));
+        kmem_free(xqm);
 }
 /*
@@ -445,11 +445,11 @@ xfs_qm_unmount_quotas(
                }
        }
        if (uqp) {
-                 XFS_PURGE_INODE(uqp);
+                 IRELE(uqp);
                 mp->m_quotainfo->qi_uquotaip = NULL;
        }
        if (gqp) {
-                XFS_PURGE_INODE(gqp);
+                IRELE(gqp);
                mp->m_quotainfo->qi_gquotaip = NULL;
        }
 out:
@@ -631,7 +631,7 @@ xfs_qm_dqpurge_int(
                 * freelist in INACTIVE state.
                 */
                nextdqp = dqp->MPL_NEXT;
-                nmisses += xfs_qm_dqpurge(dqp, flags);
+                nmisses += xfs_qm_dqpurge(dqp);
                dqp = nextdqp;
        }
        xfs_qm_mplist_unlock(mp);
@@ -1134,7 +1134,7 @@ xfs_qm_init_quotainfo(
         * and change the superblock accordingly.
         */
        if ((error = xfs_qm_init_quotainos(mp))) {
-                kmem_free(qinf, sizeof(xfs_quotainfo_t));
+                kmem_free(qinf);
                mp->m_quotainfo = NULL;
                return error;
        }
@@ -1240,15 +1240,15 @@ xfs_qm_destroy_quotainfo(
        xfs_qm_list_destroy(&qi->qi_dqlist);
        if (qi->qi_uquotaip) {
-                XFS_PURGE_INODE(qi->qi_uquotaip);
+                IRELE(qi->qi_uquotaip);
                qi->qi_uquotaip = NULL; /* paranoia */
        }
        if (qi->qi_gquotaip) {
-                XFS_PURGE_INODE(qi->qi_gquotaip);
+                IRELE(qi->qi_gquotaip);
                qi->qi_gquotaip = NULL;
        }
        mutex_destroy(&qi->qi_quotaofflock);
-        kmem_free(qi, sizeof(xfs_quotainfo_t));
+        kmem_free(qi);
        mp->m_quotainfo = NULL;
 }
@@ -1394,7 +1394,7 @@ xfs_qm_qino_alloc(
         * locked exclusively and joined to the transaction already.
         */
        ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
-        VN_HOLD(XFS_ITOV((*ip)));
+        IHOLD(*ip);
        /*
         * Make the changes in the superblock, and log those too.
@@ -1623,7 +1623,7 @@ xfs_qm_dqiterate(
                        break;
        } while (nmaps > 0);
-        kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
+        kmem_free(map);
        return error;
 }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b6..adfb8723f65a 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff(
         * if we don't need them anymore.
         */
        if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
-                XFS_PURGE_INODE(XFS_QI_UQIP(mp));
+                IRELE(XFS_QI_UQIP(mp));
                XFS_QI_UQIP(mp) = NULL;
        }
        if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
-                XFS_PURGE_INODE(XFS_QI_GQIP(mp));
+                IRELE(XFS_QI_GQIP(mp));
                XFS_QI_GQIP(mp) = NULL;
        }
 out_error:
@@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck(
                for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
                        xfs_dqtest_cmp(d);
                        e = (xfs_dqtest_t *) d->HL_NEXT;
-                        kmem_free(d, sizeof(xfs_dqtest_t));
+                        kmem_free(d);
                        d = e;
                }
                h1 = &qmtest_gdqtab[i];
                for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
                        xfs_dqtest_cmp(d);
                        e = (xfs_dqtest_t *) d->HL_NEXT;
-                        kmem_free(d, sizeof(xfs_dqtest_t));
+                        kmem_free(d);
                        d = e;
                }
        }
@@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck(
        } else {
                cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
        }
-        kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+        kmem_free(qmtest_udqtab);
-        kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t));
+        kmem_free(qmtest_gdqtab);
        mutex_unlock(&qcheck_lock);
        return (qmtest_nfails);
 }
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c565..c4fcea600bc2 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
 #define XFS_IS_SUSER_DQUOT(dqp)         \
        (!((dqp)->q_core.d_id))
-#define XFS_PURGE_INODE(ip)             \
-        IRELE(ip);
 #define DQFLAGTO_TYPESTR(d)     (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
                                 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
                                 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 0b75d302508f..a34ef05489b1 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
                if (sleep & KM_SLEEP)
                        panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
-                kmem_free(ktp, sizeof(*ktp));
+                kmem_free(ktp);
                return NULL;
        }
@@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp)
        } else {
                entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
-                kmem_free(ktp->kt_entries, entries_size);
+                kmem_free(ktp->kt_entries);
        }
        kmem_zone_free(ktrace_hdr_zone, ktp);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 493a6ecf8590..5830c040ea7e 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,7 +17,7 @@
 */
 #include <xfs.h>
-static mutex_t  uuid_monitor;
+static DEFINE_MUTEX(uuid_monitor);
 static int      uuid_table_size;
 static uuid_t   *uuid_table;
@@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid)
        ASSERT(i < uuid_table_size);
        mutex_unlock(&uuid_monitor);
 }
-void __init
-uuid_init(void)
-{
-        mutex_init(&uuid_monitor);
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index b6f5922199ba..cff5b607d445 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,7 +22,6 @@ typedef struct {
        unsigned char   __u_bits[16];
 } uuid_t;
-extern void uuid_init(void);
 extern void uuid_create_nil(uuid_t *uuid);
 extern int uuid_is_nil(uuid_t *uuid);
 extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ebee3a4f703a..3e4648ad9cfc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -341,8 +341,7 @@ xfs_acl_iaccess(
        /* If the file has no ACL return -1. */
        rval = sizeof(xfs_acl_t);
-        if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
+        if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
-                                        ATTR_ROOT | ATTR_KERNACCESS)) {
                _ACL_FREE(acl);
                return -1;
        }
@@ -720,7 +719,7 @@ xfs_acl_setmode(
        xfs_acl_t       *acl,
        int             *basicperms)
 {
-        bhv_vattr_t     va;
+        struct iattr    iattr;
        xfs_acl_entry_t *ap;
        xfs_acl_entry_t *gap = NULL;
        int             i, nomask = 1;
@@ -734,25 +733,25 @@ xfs_acl_setmode(
         * Copy the u::, g::, o::, and m:: bits from the ACL into the
         * mode.  The m:: bits take precedence over the g:: bits.
         */
-        va.va_mask = XFS_AT_MODE;
+        iattr.ia_valid = ATTR_MODE;
-        va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
+        iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode;
-        va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
+        iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
        ap = acl->acl_entry;
        for (i = 0; i < acl->acl_cnt; ++i) {
                switch (ap->ae_tag) {
                case ACL_USER_OBJ:
-                        va.va_mode |= ap->ae_perm << 6;
+                        iattr.ia_mode |= ap->ae_perm << 6;
                        break;
                case ACL_GROUP_OBJ:
                        gap = ap;
                        break;
                case ACL_MASK:  /* more than just standard modes */
                        nomask = 0;
-                        va.va_mode |= ap->ae_perm << 3;
+                        iattr.ia_mode |= ap->ae_perm << 3;
                        *basicperms = 0;
                        break;
                case ACL_OTHER:
-                        va.va_mode |= ap->ae_perm;
+                        iattr.ia_mode |= ap->ae_perm;
                        break;
                default:        /* more than just standard modes */
                        *basicperms = 0;
@@ -763,9 +762,9 @@ xfs_acl_setmode(
        /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
        if (gap && nomask)
-                va.va_mode |= gap->ae_perm << 3;
+                iattr.ia_mode |= gap->ae_perm << 3;
-        return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred);
+        return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred);
 }
 /*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 332a772461c4..323ee94cf831 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,6 +46,8 @@ typedef struct xfs_acl {
 #define SGI_ACL_FILE_SIZE       (sizeof(SGI_ACL_FILE)-1)
 #define SGI_ACL_DEFAULT_SIZE    (sizeof(SGI_ACL_DEFAULT)-1)
+#define _ACL_TYPE_ACCESS        1
+#define _ACL_TYPE_DEFAULT       2
 #ifdef CONFIG_XFS_POSIX_ACL
@@ -66,8 +68,6 @@ extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int);
 extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int);
 extern int xfs_acl_vremove(bhv_vnode_t *, int);
-#define _ACL_TYPE_ACCESS        1
-#define _ACL_TYPE_DEFAULT       2
 #define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
 #define _ACL_INHERIT(c,m,d)     (xfs_acl_inherit(c,m,d))
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index df151a859186..78de80e3caa2 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -16,8 +16,6 @@
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
-#include <linux/capability.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
@@ -57,11 +55,6 @@
 * Provide the external interfaces to manage attribute lists.
 */
-#define ATTR_SYSCOUNT   2
-static struct attrnames posix_acl_access;
-static struct attrnames posix_acl_default;
-static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
 /*========================================================================
 * Function prototypes for the kernel.
 *========================================================================*/
@@ -116,6 +109,17 @@ xfs_attr_name_to_xname(
        return 0;
 }
+STATIC int
+xfs_inode_hasattr(
+        struct xfs_inode        *ip)
+{
+        if (!XFS_IFORK_Q(ip) ||
+            (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+             ip->i_d.di_anextents == 0))
+                return 0;
+        return 1;
+}
 /*========================================================================
 * Overall external interface routines.
 *========================================================================*/
@@ -127,10 +131,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
        xfs_da_args_t   args;
        int             error;
-        if ((XFS_IFORK_Q(ip) == 0) ||
+        if (!xfs_inode_hasattr(ip))
-            (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
+                return ENOATTR;
-             ip->i_d.di_anextents == 0))
-                return(ENOATTR);
        /*
         * Fill in the arg structure for this request.
@@ -148,11 +150,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
        /*
         * Decide on what work routines to call based on the inode size.
         */
-        if (XFS_IFORK_Q(ip) == 0 ||
+        if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-            (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-             ip->i_d.di_anextents == 0)) {
-                error = XFS_ERROR(ENOATTR);
-        } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
                error = xfs_attr_shortform_getvalue(&args);
        } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
                error = xfs_attr_leaf_get(&args);
@@ -241,8 +239,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
        args.firstblock = &firstblock;
        args.flist = &flist;
        args.whichfork = XFS_ATTR_FORK;
-        args.addname = 1;
+        args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-        args.oknoent = 1;
        /*
         * Determine space new attribute will use, and if it would be
@@ -529,9 +526,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
        /*
         * Decide on what work routines to call based on the inode size.
         */
-        if (XFS_IFORK_Q(dp) == 0 ||
+        if (!xfs_inode_hasattr(dp)) {
-            (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-             dp->i_d.di_anextents == 0)) {
                error = XFS_ERROR(ENOATTR);
                goto out;
        }
@@ -601,29 +596,33 @@ xfs_attr_remove(
                return error;
        xfs_ilock(dp, XFS_ILOCK_SHARED);
-        if (XFS_IFORK_Q(dp) == 0 ||
+        if (!xfs_inode_hasattr(dp)) {
-                   (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-                    dp->i_d.di_anextents == 0)) {
                xfs_iunlock(dp, XFS_ILOCK_SHARED);
-                return(XFS_ERROR(ENOATTR));
+                return XFS_ERROR(ENOATTR);
        }
        xfs_iunlock(dp, XFS_ILOCK_SHARED);
        return xfs_attr_remove_int(dp, &xname, flags);
 }
-STATIC int
+int
 xfs_attr_list_int(xfs_attr_list_context_t *context)
 {
        int error;
        xfs_inode_t *dp = context->dp;
+        XFS_STATS_INC(xs_attr_list);
+        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+                return EIO;
+        xfs_ilock(dp, XFS_ILOCK_SHARED);
+        xfs_attr_trace_l_c("syscall start", context);
        /*
         * Decide on what work routines to call based on the inode size.
         */
-        if (XFS_IFORK_Q(dp) == 0 ||
+        if (!xfs_inode_hasattr(dp)) {
-            (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-             dp->i_d.di_anextents == 0)) {
                error = 0;
        } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
                error = xfs_attr_shortform_list(context);
@@ -632,6 +631,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
        } else {
                error = xfs_attr_node_list(context);
        }
+        xfs_iunlock(dp, XFS_ILOCK_SHARED);
+        xfs_attr_trace_l_c("syscall end", context);
        return error;
 }
@@ -648,74 +651,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
 */
 /*ARGSUSED*/
 STATIC int
-xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
+xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
                     char *name, int namelen,
                     int valuelen, char *value)
 {
+        struct attrlist *alist = (struct attrlist *)context->alist;
        attrlist_ent_t *aep;
        int arraytop;
        ASSERT(!(context->flags & ATTR_KERNOVAL));
        ASSERT(context->count >= 0);
        ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
-        ASSERT(context->firstu >= sizeof(*context->alist));
+        ASSERT(context->firstu >= sizeof(*alist));
        ASSERT(context->firstu <= context->bufsize);
-        arraytop = sizeof(*context->alist) +
+        /*
-                        context->count * sizeof(context->alist->al_offset[0]);
+         * Only list entries in the right namespace.
+         */
+        if (((context->flags & ATTR_SECURE) == 0) !=
+            ((flags & XFS_ATTR_SECURE) == 0))
+                return 0;
+        if (((context->flags & ATTR_ROOT) == 0) !=
+            ((flags & XFS_ATTR_ROOT) == 0))
+                return 0;
+        arraytop = sizeof(*alist) +
+                        context->count * sizeof(alist->al_offset[0]);
        context->firstu -= ATTR_ENTSIZE(namelen);
        if (context->firstu < arraytop) {
                xfs_attr_trace_l_c("buffer full", context);
-                context->alist->al_more = 1;
+                alist->al_more = 1;
                context->seen_enough = 1;
                return 1;
        }
-        aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
+        aep = (attrlist_ent_t *)&context->alist[context->firstu];
        aep->a_valuelen = valuelen;
        memcpy(aep->a_name, name, namelen);
-        aep->a_name[ namelen ] = 0;
+        aep->a_name[namelen] = 0;
-        context->alist->al_offset[ context->count++ ] = context->firstu;
+        alist->al_offset[context->count++] = context->firstu;
-        context->alist->al_count = context->count;
+        alist->al_count = context->count;
        xfs_attr_trace_l_c("add", context);
        return 0;
 }
-STATIC int
-xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
-                     char *name, int namelen,
-                     int valuelen, char *value)
-{
-        char *offset;
-        int arraytop;
-        ASSERT(context->count >= 0);
-        arraytop = context->count + namesp->attr_namelen + namelen + 1;
-        if (arraytop > context->firstu) {
-                context->count = -1;    /* insufficient space */
-                return 1;
-        }
-        offset = (char *)context->alist + context->count;
-        strncpy(offset, namesp->attr_name, namesp->attr_namelen);
-        offset += namesp->attr_namelen;
-        strncpy(offset, name, namelen);                 /* real name */
-        offset += namelen;
-        *offset = '\0';
-        context->count += namesp->attr_namelen + namelen + 1;
-        return 0;
-}
-/*ARGSUSED*/
-STATIC int
-xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
-                     char *name, int namelen,
-                     int valuelen, char *value)
-{
-        context->count += namesp->attr_namelen + namelen + 1;
-        return 0;
-}
 /*
 * Generate a list of extended attribute names and optionally
 * also value lengths.  Positive return value follows the XFS
@@ -732,10 +711,9 @@ xfs_attr_list(
        attrlist_cursor_kern_t *cursor)
 {
        xfs_attr_list_context_t context;
+        struct attrlist *alist;
        int error;
-        XFS_STATS_INC(xs_attr_list);
        /*
         * Validate the cursor.
         */
@@ -756,52 +734,23 @@ xfs_attr_list(
        /*
         * Initialize the output buffer.
         */
+        memset(&context, 0, sizeof(context));
        context.dp = dp;
        context.cursor = cursor;
-        context.count = 0;
-        context.dupcnt = 0;
        context.resynch = 1;
        context.flags = flags;
-        context.seen_enough = 0;
+        context.alist = buffer;
-        context.alist = (attrlist_t *)buffer;
+        context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
-        context.put_value = 0;
+        context.firstu = context.bufsize;
+        context.put_listent = xfs_attr_put_listent;
-        if (flags & ATTR_KERNAMELS) {
-                context.bufsize = bufsize;
-                context.firstu = context.bufsize;
-                if (flags & ATTR_KERNOVAL)
-                        context.put_listent = xfs_attr_kern_list_sizes;
-                else
-                        context.put_listent = xfs_attr_kern_list;
-        } else {
-                context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
-                context.firstu = context.bufsize;
-                context.alist->al_count = 0;
-                context.alist->al_more = 0;
-                context.alist->al_offset[0] = context.bufsize;
-                context.put_listent = xfs_attr_put_listent;
-        }
-        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+        alist = (struct attrlist *)context.alist;
-                return EIO;
+        alist->al_count = 0;
+        alist->al_more = 0;
-        xfs_ilock(dp, XFS_ILOCK_SHARED);
+        alist->al_offset[0] = context.bufsize;
-        xfs_attr_trace_l_c("syscall start", &context);
        error = xfs_attr_list_int(&context);
+        ASSERT(error >= 0);
-        xfs_iunlock(dp, XFS_ILOCK_SHARED);
-        xfs_attr_trace_l_c("syscall end", &context);
-        if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
-                /* must return negated buffer size or the error */
-                if (context.count < 0)
-                        error = XFS_ERROR(ERANGE);
-                else
-                        error = -context.count;
-        } else
-                ASSERT(error >= 0);
        return error;
 }
@@ -816,12 +765,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
        ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
        xfs_ilock(dp, XFS_ILOCK_SHARED);
-        if ((XFS_IFORK_Q(dp) == 0) ||
+        if (!xfs_inode_hasattr(dp) ||
-            (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
+            dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-            (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-             dp->i_d.di_anextents == 0)) {
                xfs_iunlock(dp, XFS_ILOCK_SHARED);
-                return(0);
+                return 0;
        }
        xfs_iunlock(dp, XFS_ILOCK_SHARED);
@@ -854,10 +801,8 @@ xfs_attr_inactive(xfs_inode_t *dp)
        /*
         * Decide on what work routines to call based on the inode size.
         */
-        if ((XFS_IFORK_Q(dp) == 0) ||
+        if (!xfs_inode_hasattr(dp) ||
-            (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
+            dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-            (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
-             dp->i_d.di_anextents == 0)) {
                error = 0;
                goto out;
        }
@@ -974,7 +919,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                        xfs_da_brelse(args->trans, bp);
                        return(retval);
                }
-                args->rename = 1;                       /* an atomic rename */
+                args->op_flags |= XFS_DA_OP_RENAME;     /* an atomic rename */
                args->blkno2 = args->blkno;             /* set 2nd entry info*/
                args->index2 = args->index;
                args->rmtblkno2 = args->rmtblkno;
@@ -1054,7 +999,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
         * so that one disappears and one appears atomically.  Then we
         * must remove the "old" attribute/value pair.
         */
-        if (args->rename) {
+        if (args->op_flags & XFS_DA_OP_RENAME) {
                /*
                 * In a separate transaction, set the incomplete flag on the
                 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -1307,7 +1252,7 @@ restart:
        } else if (retval == EEXIST) {
                if (args->flags & ATTR_CREATE)
                        goto out;
-                args->rename = 1;                       /* atomic rename op */
+                args->op_flags |= XFS_DA_OP_RENAME;     /* atomic rename op */
                args->blkno2 = args->blkno;             /* set 2nd entry info*/
                args->index2 = args->index;
                args->rmtblkno2 = args->rmtblkno;
@@ -1425,7 +1370,7 @@ restart:
         * so that one disappears and one appears atomically.  Then we
         * must remove the "old" attribute/value pair.
         */
-        if (args->rename) {
+        if (args->op_flags & XFS_DA_OP_RENAME) {
                /*
                 * In a separate transaction, set the incomplete flag on the
                 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -2300,23 +2245,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
 void
 xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
 {
-        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
+        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
-                (__psunsigned_t)context->dp,
-                (__psunsigned_t)context->cursor->hashval,
-                (__psunsigned_t)context->cursor->blkno,
-                (__psunsigned_t)context->cursor->offset,
-                (__psunsigned_t)context->alist,
-                (__psunsigned_t)context->bufsize,
-                (__psunsigned_t)context->count,
-                (__psunsigned_t)context->firstu,
-                (__psunsigned_t)
-                        ((context->count > 0) &&
-                        !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-                                ? (ATTR_ENTRY(context->alist,
-                                              context->count-1)->a_valuelen)
-                                : 0,
-                (__psunsigned_t)context->dupcnt,
-                (__psunsigned_t)context->flags,
                (__psunsigned_t)NULL,
                (__psunsigned_t)NULL,
                (__psunsigned_t)NULL);
@@ -2329,23 +2258,7 @@ void
 xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
                         struct xfs_da_intnode *node)
 {
-        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
+        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
-                (__psunsigned_t)context->dp,
-                (__psunsigned_t)context->cursor->hashval,
-                (__psunsigned_t)context->cursor->blkno,
-                (__psunsigned_t)context->cursor->offset,
-                (__psunsigned_t)context->alist,
-                (__psunsigned_t)context->bufsize,
-                (__psunsigned_t)context->count,
-                (__psunsigned_t)context->firstu,
-                (__psunsigned_t)
-                        ((context->count > 0) &&
-                        !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-                                ? (ATTR_ENTRY(context->alist,
-                                              context->count-1)->a_valuelen)
-                                : 0,
-                (__psunsigned_t)context->dupcnt,
-                (__psunsigned_t)context->flags,
                (__psunsigned_t)be16_to_cpu(node->hdr.count),
                (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
                (__psunsigned_t)be32_to_cpu(node->btree[
@@ -2359,23 +2272,7 @@ void
 xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
                          struct xfs_da_node_entry *btree)
 {
-        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
+        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
-                (__psunsigned_t)context->dp,
-                (__psunsigned_t)context->cursor->hashval,
-                (__psunsigned_t)context->cursor->blkno,
-                (__psunsigned_t)context->cursor->offset,
-                (__psunsigned_t)context->alist,
-                (__psunsigned_t)context->bufsize,
-                (__psunsigned_t)context->count,
-                (__psunsigned_t)context->firstu,
-                (__psunsigned_t)
-                        ((context->count > 0) &&
-                        !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-                                ? (ATTR_ENTRY(context->alist,
-                                              context->count-1)->a_valuelen)
-                                : 0,
-                (__psunsigned_t)context->dupcnt,
-                (__psunsigned_t)context->flags,
                (__psunsigned_t)be32_to_cpu(btree->hashval),
                (__psunsigned_t)be32_to_cpu(btree->before),
                (__psunsigned_t)NULL);
@@ -2388,23 +2285,7 @@ void
 xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
                              struct xfs_attr_leafblock *leaf)
 {
-        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
+        xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
-                (__psunsigned_t)context->dp,
-                (__psunsigned_t)context->cursor->hashval,
-                (__psunsigned_t)context->cursor->blkno,
-                (__psunsigned_t)context->cursor->offset,
-                (__psunsigned_t)context->alist,
-                (__psunsigned_t)context->bufsize,
-                (__psunsigned_t)context->count,
-                (__psunsigned_t)context->firstu,
-                (__psunsigned_t)
-                        ((context->count > 0) &&
-                        !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
-                                ? (ATTR_ENTRY(context->alist,
-                                              context->count-1)->a_valuelen)
-                                : 0,
-                (__psunsigned_t)context->dupcnt,
-                (__psunsigned_t)context->flags,
                (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
                (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
                (__psunsigned_t)be32_to_cpu(leaf->entries[
@@ -2417,329 +2298,24 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
 */
 void
 xfs_attr_trace_enter(int type, char *where,
-                         __psunsigned_t a2, __psunsigned_t a3,
+                         struct xfs_attr_list_context *context,
-                         __psunsigned_t a4, __psunsigned_t a5,
+                         __psunsigned_t a13, __psunsigned_t a14,
-                         __psunsigned_t a6, __psunsigned_t a7,
+                         __psunsigned_t a15)
-                         __psunsigned_t a8, __psunsigned_t a9,
-                         __psunsigned_t a10, __psunsigned_t a11,
-                         __psunsigned_t a12, __psunsigned_t a13,
-                         __psunsigned_t a14, __psunsigned_t a15)
 {
        ASSERT(xfs_attr_trace_buf);
        ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
-                                         (void *)where,
+                (void *)((__psunsigned_t)where),
-                                         (void *)a2,  (void *)a3,  (void *)a4,
+                (void *)((__psunsigned_t)context->dp),
-                                         (void *)a5,  (void *)a6,  (void *)a7,
+                (void *)((__psunsigned_t)context->cursor->hashval),
-                                         (void *)a8,  (void *)a9,  (void *)a10,
+                (void *)((__psunsigned_t)context->cursor->blkno),
-                                         (void *)a11, (void *)a12, (void *)a13,
+                (void *)((__psunsigned_t)context->cursor->offset),
-                                         (void *)a14, (void *)a15);
+                (void *)((__psunsigned_t)context->alist),
+                (void *)((__psunsigned_t)context->bufsize),
+                (void *)((__psunsigned_t)context->count),
+                (void *)((__psunsigned_t)context->firstu),
+                NULL,
+                (void *)((__psunsigned_t)context->dupcnt),
+                (void *)((__psunsigned_t)context->flags),
+                (void *)a13, (void *)a14, (void *)a15);
 }
 #endif  /* XFS_ATTR_TRACE */
-/*========================================================================
- * System (pseudo) namespace attribute interface routines.
- *========================================================================*/
-STATIC int
-posix_acl_access_set(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
-}
-STATIC int
-posix_acl_access_remove(
-        bhv_vnode_t *vp, char *name, int xflags)
-{
-        return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
-}
-STATIC int
-posix_acl_access_get(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
-}
-STATIC int
-posix_acl_access_exists(
-        bhv_vnode_t *vp)
-{
-        return xfs_acl_vhasacl_access(vp);
-}
-STATIC int
-posix_acl_default_set(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-STATIC int
-posix_acl_default_get(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
-}
-STATIC int
-posix_acl_default_remove(
-        bhv_vnode_t *vp, char *name, int xflags)
-{
-        return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
-}
-STATIC int
-posix_acl_default_exists(
-        bhv_vnode_t *vp)
-{
-        return xfs_acl_vhasacl_default(vp);
-}
-static struct attrnames posix_acl_access = {
-        .attr_name      = "posix_acl_access",
-        .attr_namelen   = sizeof("posix_acl_access") - 1,
-        .attr_get       = posix_acl_access_get,
-        .attr_set       = posix_acl_access_set,
-        .attr_remove    = posix_acl_access_remove,
-        .attr_exists    = posix_acl_access_exists,
-};
-static struct attrnames posix_acl_default = {
-        .attr_name      = "posix_acl_default",
-        .attr_namelen   = sizeof("posix_acl_default") - 1,
-        .attr_get       = posix_acl_default_get,
-        .attr_set       = posix_acl_default_set,
-        .attr_remove    = posix_acl_default_remove,
-        .attr_exists    = posix_acl_default_exists,
-};
-static struct attrnames *attr_system_names[] =
-        { &posix_acl_access, &posix_acl_default };
-/*========================================================================
- * Namespace-prefix-style attribute name interface routines.
- *========================================================================*/
-STATIC int
-attr_generic_set(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
-}
-STATIC int
-attr_generic_get(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        int     error, asize = size;
-        error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
-        if (!error)
-                return asize;
-        return -error;
-}
-STATIC int
-attr_generic_remove(
-        bhv_vnode_t *vp, char *name, int xflags)
-{
-        return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
-}
-STATIC int
-attr_generic_listadd(
-        attrnames_t             *prefix,
-        attrnames_t             *namesp,
-        void                    *data,
-        size_t                  size,
-        ssize_t                 *result)
-{
-        char                    *p = data + *result;
-        *result += prefix->attr_namelen;
-        *result += namesp->attr_namelen + 1;
-        if (!size)
-                return 0;
-        if (*result > size)
-                return -ERANGE;
-        strcpy(p, prefix->attr_name);
-        p += prefix->attr_namelen;
-        strcpy(p, namesp->attr_name);
-        p += namesp->attr_namelen + 1;
-        return 0;
-}
-STATIC int
-attr_system_list(
-        bhv_vnode_t             *vp,
-        void                    *data,
-        size_t                  size,
-        ssize_t                 *result)
-{
-        attrnames_t             *namesp;
-        int                     i, error = 0;
-        for (i = 0; i < ATTR_SYSCOUNT; i++) {
-                namesp = attr_system_names[i];
-                if (!namesp->attr_exists || !namesp->attr_exists(vp))
-                        continue;
-                error = attr_generic_listadd(&attr_system, namesp,
-                                                data, size, result);
-                if (error)
-                        break;
-        }
-        return error;
-}
-int
-attr_generic_list(
-        bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
-{
-        attrlist_cursor_kern_t  cursor = { 0 };
-        int                     error;
-        error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
-        if (error > 0)
-                return -error;
-        *result = -error;
-        return attr_system_list(vp, data, size, result);
-}
-attrnames_t *
-attr_lookup_namespace(
-        char                    *name,
-        struct attrnames        **names,
-        int                     nnames)
-{
-        int                     i;
-        for (i = 0; i < nnames; i++)
-                if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
-                        return names[i];
-        return NULL;
-}
-/*
- * Some checks to prevent people abusing EAs to get over quota:
- * - Don't allow modifying user EAs on devices/symlinks;
- * - Don't allow modifying user EAs if sticky bit set;
- */
-STATIC int
-attr_user_capable(
-        bhv_vnode_t     *vp,
-        cred_t          *cred)
-{
-        struct inode    *inode = vn_to_inode(vp);
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                return -EPERM;
-        if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
-            !capable(CAP_SYS_ADMIN))
-                return -EPERM;
-        if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
-            (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
-                return -EPERM;
-        return 0;
-}
-STATIC int
-attr_trusted_capable(
-        bhv_vnode_t     *vp,
-        cred_t          *cred)
-{
-        struct inode    *inode = vn_to_inode(vp);
-        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                return -EPERM;
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
-        return 0;
-}
-STATIC int
-attr_system_set(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        attrnames_t     *namesp;
-        int             error;
-        if (xflags & ATTR_CREATE)
-                return -EINVAL;
-        namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
-        if (!namesp)
-                return -EOPNOTSUPP;
-        error = namesp->attr_set(vp, name, data, size, xflags);
-        if (!error)
-                error = vn_revalidate(vp);
-        return error;
-}
-STATIC int
-attr_system_get(
-        bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
-{
-        attrnames_t     *namesp;
-        namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
-        if (!namesp)
-                return -EOPNOTSUPP;
-        return namesp->attr_get(vp, name, data, size, xflags);
-}
-STATIC int
-attr_system_remove(
-        bhv_vnode_t *vp, char *name, int xflags)
-{
-        attrnames_t     *namesp;
-        namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
-        if (!namesp)
-                return -EOPNOTSUPP;
-        return namesp->attr_remove(vp, name, xflags);
-}
-struct attrnames attr_system = {
-        .attr_name      = "system.",
-        .attr_namelen   = sizeof("system.") - 1,
-        .attr_flag      = ATTR_SYSTEM,
-        .attr_get       = attr_system_get,
-        .attr_set       = attr_system_set,
-        .attr_remove    = attr_system_remove,
-        .attr_capable   = (attrcapable_t)fs_noerr,
-};
-struct attrnames attr_trusted = {
-        .attr_name      = "trusted.",
-        .attr_namelen   = sizeof("trusted.") - 1,
-        .attr_flag      = ATTR_ROOT,
-        .attr_get       = attr_generic_get,
-        .attr_set       = attr_generic_set,
-        .attr_remove    = attr_generic_remove,
-        .attr_capable   = attr_trusted_capable,
-};
-struct attrnames attr_secure = {
-        .attr_name      = "security.",
-        .attr_namelen   = sizeof("security.") - 1,
-        .attr_flag      = ATTR_SECURE,
-        .attr_get       = attr_generic_get,
-        .attr_set       = attr_generic_set,
-        .attr_remove    = attr_generic_remove,
-        .attr_capable   = (attrcapable_t)fs_noerr,
-};
-struct attrnames attr_user = {
-        .attr_name      = "user.",
-        .attr_namelen   = sizeof("user.") - 1,
-        .attr_get       = attr_generic_get,
-        .attr_set       = attr_generic_set,
-        .attr_remove    = attr_generic_remove,
-        .attr_capable   = attr_user_capable,
-};
-struct attrnames *attr_namespaces[] =
-        { &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 6cfc9384fe35..8b2d31c19e4d 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -18,9 +18,11 @@
 #ifndef __XFS_ATTR_H__
 #define __XFS_ATTR_H__
+struct xfs_inode;
+struct xfs_da_args;
+struct xfs_attr_list_context;
 /*
- * xfs_attr.h
- *
 * Large attribute lists are structured around Btrees where all the data
 * elements are in the leaf nodes.  Attribute names are hashed into an int,
 * then that int is used as the index into the Btree.  Since the hashval
@@ -35,35 +37,6 @@
 * External interfaces
 *========================================================================*/
-struct cred;
-struct xfs_attr_list_context;
-typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
-typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
-typedef int (*attrexists_t)(bhv_vnode_t *);
-typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *);
-typedef struct attrnames {
-        char *          attr_name;
-        unsigned int    attr_namelen;
-        unsigned int    attr_flag;
-        attrget_t       attr_get;
-        attrset_t       attr_set;
-        attrremove_t    attr_remove;
-        attrexists_t    attr_exists;
-        attrcapable_t   attr_capable;
-} attrnames_t;
-#define ATTR_NAMECOUNT  4
-extern struct attrnames attr_user;
-extern struct attrnames attr_secure;
-extern struct attrnames attr_system;
-extern struct attrnames attr_trusted;
-extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
-extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
-extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
 #define ATTR_DONTFOLLOW 0x0001  /* -- unused, from IRIX -- */
 #define ATTR_ROOT       0x0002  /* use attrs in root (trusted) namespace */
@@ -71,16 +44,9 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
 #define ATTR_SECURE     0x0008  /* use attrs in security namespace */
 #define ATTR_CREATE     0x0010  /* pure create: fail if attr already exists */
 #define ATTR_REPLACE    0x0020  /* pure set: fail if attr does not exist */
-#define ATTR_SYSTEM     0x0100  /* use attrs in system (pseudo) namespace */
-#define ATTR_KERNACCESS 0x0400  /* [kernel] iaccess, inode held io-locked */
 #define ATTR_KERNOTIME  0x1000  /* [kernel] don't update inode timestamps */
 #define ATTR_KERNOVAL   0x2000  /* [kernel] get attr size only, not value */
-#define ATTR_KERNAMELS  0x4000  /* [kernel] list attr names (simple list) */
-#define ATTR_KERNORMALS 0x0800  /* [kernel] normal attr list: user+secure */
-#define ATTR_KERNROOTLS 0x8000  /* [kernel] include root in the attr list */
-#define ATTR_KERNFULLS  (ATTR_KERNORMALS|ATTR_KERNROOTLS)
 /*
 * The maximum size (into the kernel or returned from the kernel) of an
@@ -119,22 +85,6 @@ typedef struct attrlist_ent {	/* data from attr_list() */
         &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
 /*
- * Multi-attribute operation vector.
- */
-typedef struct attr_multiop {
-        int     am_opcode;      /* operation to perform (ATTR_OP_GET, etc.) */
-        int     am_error;       /* [out arg] result of this sub-op (an errno) */
-        char    *am_attrname;   /* attribute name to work with */
-        char    *am_attrvalue;  /* [in/out arg] attribute value (raw bytes) */
-        int     am_length;      /* [in/out arg] length of value */
-        int     am_flags;       /* bitwise OR of attr API flags defined above */
-} attr_multiop_t;
-#define ATTR_OP_GET     1       /* return the indicated attr's value */
-#define ATTR_OP_SET     2       /* set/create the indicated attr/value pair */
-#define ATTR_OP_REMOVE  3       /* remove the indicated attr */
-/*
 * Kernel-internal version of the attrlist cursor.
 */
 typedef struct attrlist_cursor_kern {
@@ -148,20 +98,40 @@ typedef struct attrlist_cursor_kern {
 /*========================================================================
- * Function prototypes for the kernel.
+ * Structure used to pass context around among the routines.
 *========================================================================*/
-struct xfs_inode;
-struct attrlist_cursor_kern;
+typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
-struct xfs_da_args;
+                                      char *, int, int, char *);
+typedef struct xfs_attr_list_context {
+        struct xfs_inode                *dp;            /* inode */
+        struct attrlist_cursor_kern     *cursor;        /* position in list */
+        char                            *alist;         /* output buffer */
+        int                             seen_enough;    /* T/F: seen enough of list? */
+        ssize_t                         count;          /* num used entries */
+        int                             dupcnt;         /* count dup hashvals seen */
+        int                             bufsize;        /* total buffer size */
+        int                             firstu;         /* first used byte in buffer */
+        int                             flags;          /* from VOP call */
+        int                             resynch;        /* T/F: resynch with cursor */
+        int                             put_value;      /* T/F: need value for listent */
+        put_listent_func_t              put_listent;    /* list output fmt function */
+        int                             index;          /* index into output buffer */
+} xfs_attr_list_context_t;
+/*========================================================================
+ * Function prototypes for the kernel.
+ *========================================================================*/
 /*
 * Overall external interface routines.
 */
 int xfs_attr_inactive(struct xfs_inode *dp);
-int xfs_attr_shortform_getvalue(struct xfs_da_args *);
 int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
 int xfs_attr_rmtval_get(struct xfs_da_args *args);
+int xfs_attr_list_int(struct xfs_attr_list_context *);
 #endif  /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 303d41e4217b..23ef5d7c87e1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
 * Namespace helper routines
 *========================================================================*/
-STATIC_INLINE attrnames_t *
-xfs_attr_flags_namesp(int flags)
-{
-        return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
-                  ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
-}
 /*
 * If namespace bits don't match return 0.
 * If all match then return 1.
@@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
        return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
 }
-/*
- * If namespace bits don't match and we don't have an override for it
- * then return 0.
- * If all match or are overridable then return 1.
- */
-STATIC_INLINE int
-xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
-{
-        if (((arg_flags & ATTR_SECURE) == 0) !=
-            ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
-            !(arg_flags & ATTR_KERNORMALS))
-                return 0;
-        if (((arg_flags & ATTR_ROOT) == 0) !=
-            ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
-            !(arg_flags & ATTR_KERNROOTLS))
-                return 0;
-        return 1;
-}
 /*========================================================================
 * External routines when attribute fork size < XFS_LITINO(mp).
@@ -369,9 +343,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
         * Fix up the start offset of the attribute fork
         */
        totsize -= size;
-        if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
+        if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
-            (mp->m_flags & XFS_MOUNT_ATTR2) && 
+                                !(args->op_flags & XFS_DA_OP_ADDNAME) &&
-            (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
+                                (mp->m_flags & XFS_MOUNT_ATTR2) &&
+                                (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
                /*
                 * Last attribute now removed, revert to original
                 * inode format making all literal area available
@@ -389,9 +364,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
                xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
                dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
                ASSERT(dp->i_d.di_forkoff);
-                ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
+                ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
-                        !(mp->m_flags & XFS_MOUNT_ATTR2) ||
+                                (args->op_flags & XFS_DA_OP_ADDNAME) ||
-                        dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
+                                !(mp->m_flags & XFS_MOUNT_ATTR2) ||
+                                dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
                dp->i_afp->if_ext_max =
                        XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
                dp->i_df.if_ext_max =
@@ -531,7 +507,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
        nargs.total = args->total;
        nargs.whichfork = XFS_ATTR_FORK;
        nargs.trans = args->trans;
-        nargs.oknoent = 1;
+        nargs.op_flags = XFS_DA_OP_OKNOENT;
        sfe = &sf->list[0];
        for (i = 0; i < sf->hdr.count; i++) {
@@ -555,7 +531,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
 out:
        if(bp)
                xfs_da_buf_done(bp);
-        kmem_free(tmpbuffer, size);
+        kmem_free(tmpbuffer);
        return(error);
 }
@@ -624,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
            (XFS_ISRESET_CURSOR(cursor) &&
             (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
                for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
-                        attrnames_t     *namesp;
-                        if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
-                                sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
-                                continue;
-                        }
-                        namesp = xfs_attr_flags_namesp(sfe->flags);
                        error = context->put_listent(context,
-                                           namesp,
+                                           sfe->flags,
                                           (char *)sfe->nameval,
                                           (int)sfe->namelen,
                                           (int)sfe->valuelen,
@@ -676,13 +645,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
                                             XFS_ERRLEVEL_LOW,
                                             context->dp->i_mount, sfe);
                        xfs_attr_trace_l_c("sf corrupted", context);
-                        kmem_free(sbuf, sbsize);
+                        kmem_free(sbuf);
                        return XFS_ERROR(EFSCORRUPTED);
                }
-                if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
-                        sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
-                        continue;
-                }
                sbp->entno = i;
                sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
                sbp->name = (char *)sfe->nameval;
@@ -717,7 +683,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
                }
        }
        if (i == nsbuf) {
-                kmem_free(sbuf, sbsize);
+                kmem_free(sbuf);
                xfs_attr_trace_l_c("blk end", context);
                return(0);
        }
@@ -726,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
         * Loop putting entries into the user buffer.
         */
        for ( ; i < nsbuf; i++, sbp++) {
-                attrnames_t     *namesp;
-                namesp = xfs_attr_flags_namesp(sbp->flags);
                if (cursor->hashval != sbp->hash) {
                        cursor->hashval = sbp->hash;
                        cursor->offset = 0;
                }
                error = context->put_listent(context,
-                                        namesp,
+                                        sbp->flags,
                                        sbp->name,
                                        sbp->namelen,
                                        sbp->valuelen,
@@ -747,7 +709,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
                cursor->offset++;
        }
-        kmem_free(sbuf, sbsize);
+        kmem_free(sbuf);
        xfs_attr_trace_l_c("sf E-O-F", context);
        return(0);
 }
@@ -853,7 +815,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
        nargs.total = args->total;
        nargs.whichfork = XFS_ATTR_FORK;
        nargs.trans = args->trans;
-        nargs.oknoent = 1;
+        nargs.op_flags = XFS_DA_OP_OKNOENT;
        entry = &leaf->entries[0];
        for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
                if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -873,7 +835,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
        error = 0;
 out:
-        kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
+        kmem_free(tmpbuffer);
        return(error);
 }
@@ -1155,7 +1117,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
        entry->hashval = cpu_to_be32(args->hashval);
        entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
        entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
-        if (args->rename) {
+        if (args->op_flags & XFS_DA_OP_RENAME) {
                entry->flags |= XFS_ATTR_INCOMPLETE;
                if ((args->blkno2 == args->blkno) &&
                    (args->index2 <= args->index)) {
@@ -1271,7 +1233,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
                                be16_to_cpu(hdr_s->count), mp);
        xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
-        kmem_free(tmpbuffer, XFS_LBSIZE(mp));
+        kmem_free(tmpbuffer);
 }
 /*
@@ -1921,7 +1883,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
                                be16_to_cpu(drop_hdr->count), mp);
                }
                memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
-                kmem_free(tmpbuffer, state->blocksize);
+                kmem_free(tmpbuffer);
        }
        xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
@@ -2400,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
         */
        retval = 0;
        for (  ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
-                attrnames_t *namesp;
                if (be32_to_cpu(entry->hashval) != cursor->hashval) {
                        cursor->hashval = be32_to_cpu(entry->hashval);
                        cursor->offset = 0;
@@ -2409,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
                if (entry->flags & XFS_ATTR_INCOMPLETE)
                        continue;               /* skip incomplete entries */
-                if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
-                        continue;
-                namesp = xfs_attr_flags_namesp(entry->flags);
                if (entry->flags & XFS_ATTR_LOCAL) {
                        xfs_attr_leaf_name_local_t *name_loc =
                                XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
                        retval = context->put_listent(context,
-                                                namesp,
+                                                entry->flags,
                                                (char *)name_loc->nameval,
                                                (int)name_loc->namelen,
                                                be16_to_cpu(name_loc->valuelen),
@@ -2446,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
                                if (retval)
                                        return retval;
                                retval = context->put_listent(context,
-                                                namesp,
+                                                entry->flags,
                                                (char *)name_rmt->name,
                                                (int)name_rmt->namelen,
                                                valuelen,
                                                (char*)args.value);
-                                kmem_free(args.value, valuelen);
+                                kmem_free(args.value);
-                        }
+                        } else {
-                        else {
                                retval = context->put_listent(context,
-                                                namesp,
+                                                entry->flags,
                                                (char *)name_rmt->name,
                                                (int)name_rmt->namelen,
                                                valuelen,
@@ -2954,7 +2909,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
                        error = tmp;    /* save only the 1st errno */
        }
-        kmem_free((xfs_caddr_t)list, size);
+        kmem_free((xfs_caddr_t)list);
        return(error);
 }
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 040f732ce1e2..5ecf437b7825 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -30,7 +30,7 @@
 struct attrlist;
 struct attrlist_cursor_kern;
-struct attrnames;
+struct xfs_attr_list_context;
 struct xfs_dabuf;
 struct xfs_da_args;
 struct xfs_da_state;
@@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
        return (((bsize) >> 1) + ((bsize) >> 2));
 }
-/*========================================================================
- * Structure used to pass context around among the routines.
- *========================================================================*/
-struct xfs_attr_list_context;
-typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
-                                      char *, int, int, char *);
-typedef struct xfs_attr_list_context {
-        struct xfs_inode                *dp;            /* inode */
-        struct attrlist_cursor_kern     *cursor;        /* position in list */
-        struct attrlist                 *alist;         /* output buffer */
-        int                             seen_enough;    /* T/F: seen enough of list? */
-        int                             count;          /* num used entries */
-        int                             dupcnt;         /* count dup hashvals seen */
-        int                             bufsize;        /* total buffer size */
-        int                             firstu;         /* first used byte in buffer */
-        int                             flags;          /* from VOP call */
-        int                             resynch;        /* T/F: resynch with cursor */
-        int                             put_value;      /* T/F: need value for listent */
-        put_listent_func_t              put_listent;    /* list output fmt function */
-        int                             index;          /* index into output buffer */
-} xfs_attr_list_context_t;
 /*
 * Used to keep a list of "remote value" extents when unlinking an inode.
 */
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index f67f917803b1..ea22839caed2 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
 void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
                              struct xfs_attr_leafblock *leaf);
 void xfs_attr_trace_enter(int type, char *where,
-                             __psunsigned_t a2, __psunsigned_t a3,
+                             struct xfs_attr_list_context *context,
-                             __psunsigned_t a4, __psunsigned_t a5,
+                             __psunsigned_t a13, __psunsigned_t a14,
-                             __psunsigned_t a6, __psunsigned_t a7,
+                             __psunsigned_t a15);
-                             __psunsigned_t a8, __psunsigned_t a9,
-                             __psunsigned_t a10, __psunsigned_t a11,
-                             __psunsigned_t a12, __psunsigned_t a13,
-                             __psunsigned_t a14, __psunsigned_t a15);
 #else
 #define xfs_attr_trace_l_c(w,c)
 #define xfs_attr_trace_l_cn(w,c,n)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c259f5a5af..3c4beb3a4326 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree(
                cur->bc_private.b.firstblock = *firstblock;
                if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
                        goto error0;
-                ASSERT(stat == 1);      /* must be at least one entry */
+                /* must be at least one entry */
+                XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
                if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
                        goto error0;
                if (stat == 0) {
@@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real(
                                        RIGHT.br_startblock,
                                        RIGHT.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_delete(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_decrement(cur, 0, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                        LEFT.br_startblock,
                                        LEFT.br_blockcount +
@@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real(
                                        LEFT.br_startblock, LEFT.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                        LEFT.br_startblock,
                                        LEFT.br_blockcount +
@@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real(
                                        RIGHT.br_startblock,
                                        RIGHT.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
                                        new->br_startblock,
                                        PREV.br_blockcount +
@@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 0);
+                        XFS_WANT_CORRUPTED_GOTO(i == 0, done);
                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                *dnew = 0;
                /* DELTA: The in-core extent described by new changed type. */
@@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real(
                                        LEFT.br_startblock, LEFT.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                        LEFT.br_startblock,
                                        LEFT.br_blockcount +
@@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 0);
+                        XFS_WANT_CORRUPTED_GOTO(i == 0, done);
                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
                    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real(
                                        RIGHT.br_startblock,
                                        RIGHT.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                        new->br_startblock,
                                        new->br_blockcount +
@@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 0);
+                        XFS_WANT_CORRUPTED_GOTO(i == 0, done);
                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
                    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 0);
+                        XFS_WANT_CORRUPTED_GOTO(i == 0, done);
                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
                    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real(
                                        RIGHT.br_startblock,
                                        RIGHT.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_delete(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_decrement(cur, 0, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_delete(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_decrement(cur, 0, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                LEFT.br_startblock,
                                LEFT.br_blockcount + PREV.br_blockcount +
@@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_delete(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_decrement(cur, 0, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                LEFT.br_startblock,
                                LEFT.br_blockcount + PREV.br_blockcount,
@@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real(
                                        RIGHT.br_startblock,
                                        RIGHT.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_delete(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_decrement(cur, 0, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                new->br_startblock,
                                new->br_blockcount + RIGHT.br_blockcount,
@@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                new->br_startblock, new->br_blockcount,
                                newext)))
@@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur,
                                PREV.br_startoff + new->br_blockcount,
                                PREV.br_startblock + new->br_blockcount,
@@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur,
                                PREV.br_startoff + new->br_blockcount,
                                PREV.br_startblock + new->br_blockcount,
@@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real(
                        cur->bc_rec.b = *new;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                /* DELTA: One in-core extent is split in two. */
                temp = PREV.br_startoff;
@@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock,
                                        PREV.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
                                PREV.br_startblock,
                                PREV.br_blockcount - new->br_blockcount,
@@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
                                PREV.br_startblock,
                                PREV.br_blockcount - new->br_blockcount,
@@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 0);
+                        XFS_WANT_CORRUPTED_GOTO(i == 0, done);
                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                /* DELTA: One in-core extent is split in two. */
                temp = PREV.br_startoff;
@@ -1732,27 +1733,34 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        /* new right extent - oldext */
                        if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
                                r[1].br_startblock, r[1].br_blockcount,
                                r[1].br_state)))
                                goto done;
                        /* new left extent - oldext */
-                        PREV.br_blockcount =
-                                new->br_startoff - PREV.br_startoff;
                        cur->bc_rec.b = PREV;
+                        cur->bc_rec.b.br_blockcount =
+                                new->br_startoff - PREV.br_startoff;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
-                        if ((error = xfs_bmbt_increment(cur, 0, &i)))
+                        /*
+                         * Reset the cursor to the position of the new extent
+                         * we are about to insert as we can't trust it after
+                         * the previous insert.
+                         */
+                        if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+                                        new->br_startblock, new->br_blockcount,
+                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 0, done);
                        /* new middle extent - newext */
-                        cur->bc_rec.b = *new;
+                        cur->bc_rec.b.br_state = new->br_state;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                /* DELTA: One in-core extent is split in three. */
                temp = PREV.br_startoff;
@@ -2097,13 +2105,13 @@ xfs_bmap_add_extent_hole_real(
                                        right.br_startblock,
                                        right.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_delete(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_decrement(cur, 0, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, left.br_startoff,
                                        left.br_startblock,
                                        left.br_blockcount +
@@ -2139,7 +2147,7 @@ xfs_bmap_add_extent_hole_real(
                                        left.br_startblock,
                                        left.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, left.br_startoff,
                                        left.br_startblock,
                                        left.br_blockcount +
@@ -2174,7 +2182,7 @@ xfs_bmap_add_extent_hole_real(
                                        right.br_startblock,
                                        right.br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                        new->br_startblock,
                                        new->br_blockcount +
@@ -2208,11 +2216,11 @@ xfs_bmap_add_extent_hole_real(
                                        new->br_startblock,
                                        new->br_blockcount, &i)))
                                goto done;
-                        ASSERT(i == 0);
+                        XFS_WANT_CORRUPTED_GOTO(i == 0, done);
                        cur->bc_rec.b.br_state = new->br_state;
                        if ((error = xfs_bmbt_insert(cur, &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                /* DELTA: A new extent was added in a hole. */
                temp = new->br_startoff;
@@ -3131,7 +3139,7 @@ xfs_bmap_del_extent(
                                        got.br_startblock, got.br_blockcount,
                                        &i)))
                                goto done;
-                        ASSERT(i == 1);
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                }
                da_old = da_new = 0;
        } else {
@@ -3164,7 +3172,7 @@ xfs_bmap_del_extent(
                }
                if ((error = xfs_bmbt_delete(cur, &i)))
                        goto done;
-                ASSERT(i == 1);
+                XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                break;
        case 2:
@@ -3268,7 +3276,7 @@ xfs_bmap_del_extent(
                                                        got.br_startblock,
                                                        temp, &i)))
                                                goto done;
-                                        ASSERT(i == 1);
+                                        XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                                        /*
                                         * Update the btree record back
                                         * to the original value.
@@ -3289,7 +3297,7 @@ xfs_bmap_del_extent(
                                        error = XFS_ERROR(ENOSPC);
                                        goto done;
                                }
-                                ASSERT(i == 1);
+                                XFS_WANT_CORRUPTED_GOTO(i == 1, done);
                        } else
                                flags |= XFS_ILOG_FEXT(whichfork);
                        XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5970,7 +5978,7 @@ unlock_and_return:
        xfs_iunlock_map_shared(ip, lock);
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-        kmem_free(map, subnex * sizeof(*map));
+        kmem_free(map);
        return error;
 }
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 6ff70cda451c..9f3e3a836d15 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item
 /*
 * Header for free extent list.
+ *
+ * xbf_low is used by the allocator to activate the lowspace algorithm -
+ * when free space is running low the extent allocator may choose to
+ * allocate an extent from an AG without leaving sufficient space for
+ * a btree split when inserting the new extent.  In this case the allocator
+ * will enable the lowspace algorithm which is supposed to allow further
+ * allocations (such as btree splits and newroots) to allocate from
+ * sequential AGs.  In order to avoid locking AGs out of order the lowspace
+ * algorithm will start searching for free space from AG 0.  If the correct
+ * transaction reservations have been made then this algorithm will eventually
+ * find all the space it needs.
 */
 typedef struct xfs_bmap_free
 {
        xfs_bmap_free_item_t    *xbf_first;     /* list of to-be-free extents */
        int                     xbf_count;      /* count of items on list */
-        int                     xbf_low;        /* kludge: alloc in low mode */
+        int                     xbf_low;        /* alloc in low mode */
 } xfs_bmap_free_t;
 #define XFS_BMAP_MAX_NMAP       4
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4f0e849d973e..23efad29a5cd 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1493,12 +1493,27 @@ xfs_bmbt_split(
        left = XFS_BUF_TO_BMBT_BLOCK(lbp);
        args.fsbno = cur->bc_private.b.firstblock;
        args.firstblock = args.fsbno;
+        args.minleft = 0;
        if (args.fsbno == NULLFSBLOCK) {
                args.fsbno = lbno;
                args.type = XFS_ALLOCTYPE_START_BNO;
-        } else
+                /*
+                 * Make sure there is sufficient room left in the AG to
+                 * complete a full tree split for an extent insert.  If
+                 * we are converting the middle part of an extent then
+                 * we may need space for two tree splits.
+                 *
+                 * We are relying on the caller to make the correct block
+                 * reservation for this operation to succeed.  If the
+                 * reservation amount is insufficient then we may fail a
+                 * block allocation here and corrupt the filesystem.
+                 */
+                args.minleft = xfs_trans_get_block_res(args.tp);
+        } else if (cur->bc_private.b.flist->xbf_low)
+                args.type = XFS_ALLOCTYPE_START_BNO;
+        else
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
-        args.mod = args.minleft = args.alignment = args.total = args.isfl =
+        args.mod = args.alignment = args.total = args.isfl =
                args.userdata = args.minalignslop = 0;
        args.minlen = args.maxlen = args.prod = 1;
        args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
@@ -1510,6 +1525,21 @@ xfs_bmbt_split(
                XFS_BMBT_TRACE_CURSOR(cur, ERROR);
                return error;
        }
+        if (args.fsbno == NULLFSBLOCK && args.minleft) {
+                /*
+                 * Could not find an AG with enough free space to satisfy
+                 * a full btree split.  Try again without minleft and if
+                 * successful activate the lowspace algorithm.
+                 */
+                args.fsbno = 0;
+                args.type = XFS_ALLOCTYPE_FIRST_AG;
+                args.minleft = 0;
+                if ((error = xfs_alloc_vextent(&args))) {
+                        XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                        return error;
+                }
+                cur->bc_private.b.flist->xbf_low = 1;
+        }
        if (args.fsbno == NULLFSBLOCK) {
                XFS_BMBT_TRACE_CURSOR(cur, EXIT);
                *stat = 0;
@@ -2029,22 +2059,8 @@ xfs_bmbt_increment(
 * Insert the current record at the point referenced by cur.
 *
 * A multi-level split of the tree on insert will invalidate the original
- * cursor. It appears, however, that some callers assume that the cursor is
+ * cursor.  All callers of this function should assume that the cursor is
- * always valid. Hence if we do a multi-level split we need to revalidate the
+ * no longer valid and revalidate it.
- * cursor.
- *
- * When a split occurs, we will see a new cursor returned. Use that as a
- * trigger to determine if we need to revalidate the original cursor. If we get
- * a split, then use the original irec to lookup up the path of the record we
- * just inserted.
- *
- * Note that the fact that the btree root is in the inode means that we can
- * have the level of the tree change without a "split" occurring at the root
- * level. What happens is that the root is migrated to an allocated block and
- * the inode root is pointed to it. This means a single split can change the
- * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
- * the level change should be accounted as a split so as to correctly trigger a
- * revalidation of the old cursor.
 */
 int                                     /* error */
 xfs_bmbt_insert(
@@ -2057,14 +2073,11 @@ xfs_bmbt_insert(
        xfs_fsblock_t   nbno;
        xfs_btree_cur_t *ncur;
        xfs_bmbt_rec_t  nrec;
-        xfs_bmbt_irec_t oirec;          /* original irec */
        xfs_btree_cur_t *pcur;
-        int             splits = 0;
        XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
        level = 0;
        nbno = NULLFSBLOCK;
-        oirec = cur->bc_rec.b;
        xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
        ncur = NULL;
        pcur = cur;
@@ -2073,13 +2086,11 @@ xfs_bmbt_insert(
                                &i))) {
                        if (pcur != cur)
                                xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
-                        goto error0;
+                        XFS_BMBT_TRACE_CURSOR(cur, ERROR);
+                        return error;
                }
                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
                if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
-                        /* allocating a new root is effectively a split */
-                        if (cur->bc_nlevels != pcur->bc_nlevels)
-                                splits++;
                        cur->bc_nlevels = pcur->bc_nlevels;
                        cur->bc_private.b.allocated +=
                                pcur->bc_private.b.allocated;
@@ -2093,21 +2104,10 @@ xfs_bmbt_insert(
                        xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
                }
                if (ncur) {
-                        splits++;
                        pcur = ncur;
                        ncur = NULL;
                }
        } while (nbno != NULLFSBLOCK);
-        if (splits > 1) {
-                /* revalidate the old cursor as we had a multi-level split */
-                error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
-                                oirec.br_startblock, oirec.br_blockcount, &i);
-                if (error)
-                        goto error0;
-                ASSERT(i == 1);
-        }
        XFS_BMBT_TRACE_CURSOR(cur, EXIT);
        *stat = i;
        return 0;
@@ -2254,7 +2254,9 @@ xfs_bmbt_newroot(
 #endif
                args.fsbno = be64_to_cpu(*pp);
                args.type = XFS_ALLOCTYPE_START_BNO;
-        } else
+        } else if (cur->bc_private.b.flist->xbf_low)
+                args.type = XFS_ALLOCTYPE_START_BNO;
+        else
                args.type = XFS_ALLOCTYPE_NEAR_BNO;
        if ((error = xfs_alloc_vextent(&args))) {
                XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 53a71c62025d..d86ca2c03a70 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -889,9 +889,9 @@ xfs_buf_item_relse(
        }
 #ifdef XFS_TRANS_DEBUG
-        kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+        kmem_free(bip->bli_orig);
        bip->bli_orig = NULL;
-        kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+        kmem_free(bip->bli_logged);
        bip->bli_logged = NULL;
 #endif /* XFS_TRANS_DEBUG */
@@ -1138,9 +1138,9 @@ xfs_buf_iodone(
        xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
 #ifdef XFS_TRANS_DEBUG
-        kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp));
+        kmem_free(bip->bli_orig);
        bip->bli_orig = NULL;
-        kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY);
+        kmem_free(bip->bli_logged);
        bip->bli_logged = NULL;
 #endif /* XFS_TRANS_DEBUG */
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d5d1e60ee224..d2ce5dd70d87 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -78,6 +78,7 @@ struct xfs_mount_args {
 #define XFSMNT_IOSIZE           0x00002000      /* optimize for I/O size */
 #define XFSMNT_OSYNCISOSYNC     0x00004000      /* o_sync is REALLY o_sync */
                                                /* (osyncisdsync is default) */
+#define XFSMNT_NOATTR2          0x00008000      /* turn off ATTR2 EA format */
 #define XFSMNT_32BITINODES      0x00200000      /* restrict inodes to 32
                                                 * bits of address space */
 #define XFSMNT_GQUOTA           0x00400000      /* group quota accounting */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 021a8f7e563f..9e561a9cefca 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
        }
        if (level < 0) {
                *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
-                ASSERT(args->oknoent);
+                ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
                return(0);
        }
@@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen)
        }
 }
+enum xfs_dacmp
+xfs_da_compname(
+        struct xfs_da_args *args,
+        const char      *name,
+        int             len)
+{
+        return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
+                                        XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
+}
+static xfs_dahash_t
+xfs_default_hashname(
+        struct xfs_name *name)
+{
+        return xfs_da_hashname(name->name, name->len);
+}
+const struct xfs_nameops xfs_default_nameops = {
+        .hashname       = xfs_default_hashname,
+        .compname       = xfs_da_compname
+};
 /*
 * Add a block to the btree ahead of the file.
 * Return the new block number to the caller.
@@ -1598,7 +1620,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
                                        args->firstblock, args->total,
                                        &mapp[mapi], &nmap, args->flist,
                                        NULL))) {
-                                kmem_free(mapp, sizeof(*mapp) * count);
+                                kmem_free(mapp);
                                return error;
                        }
                        if (nmap < 1)
@@ -1620,11 +1642,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
            mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
            bno + count) {
                if (mapp != &map)
-                        kmem_free(mapp, sizeof(*mapp) * count);
+                        kmem_free(mapp);
                return XFS_ERROR(ENOSPC);
        }
        if (mapp != &map)
-                kmem_free(mapp, sizeof(*mapp) * count);
+                kmem_free(mapp);
        *new_blkno = (xfs_dablk_t)bno;
        return 0;
 }
@@ -2090,10 +2112,10 @@ xfs_da_do_buf(
                }
        }
        if (bplist) {
-                kmem_free(bplist, sizeof(*bplist) * nmap);
+                kmem_free(bplist);
        }
        if (mapp != &map) {
-                kmem_free(mapp, sizeof(*mapp) * nfsb);
+                kmem_free(mapp);
        }
        if (bpp)
                *bpp = rbp;
@@ -2102,11 +2124,11 @@ exit1:
        if (bplist) {
                for (i = 0; i < nbplist; i++)
                        xfs_trans_brelse(trans, bplist[i]);
-                kmem_free(bplist, sizeof(*bplist) * nmap);
+                kmem_free(bplist);
        }
 exit0:
        if (mapp != &map)
-                kmem_free(mapp, sizeof(*mapp) * nfsb);
+                kmem_free(mapp);
        if (bpp)
                *bpp = NULL;
        return error;
@@ -2218,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state)
 #ifdef XFS_DABUF_DEBUG
 xfs_dabuf_t     *xfs_dabuf_global_list;
-spinlock_t      xfs_dabuf_global_lock;
+static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
 #endif
 /*
@@ -2315,7 +2337,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
        if (dabuf->dirty)
                xfs_da_buf_clean(dabuf);
        if (dabuf->nbuf > 1)
-                kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
+                kmem_free(dabuf->data);
 #ifdef XFS_DABUF_DEBUG
        {
                spin_lock(&xfs_dabuf_global_lock);
@@ -2332,7 +2354,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
        if (dabuf->nbuf == 1)
                kmem_zone_free(xfs_dabuf_zone, dabuf);
        else
-                kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
+                kmem_free(dabuf);
 }
 /*
@@ -2403,7 +2425,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
        for (i = 0; i < nbuf; i++)
                xfs_trans_brelse(tp, bplist[i]);
        if (bplist != &bp)
-                kmem_free(bplist, nbuf * sizeof(*bplist));
+                kmem_free(bplist);
 }
 /*
@@ -2429,7 +2451,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
        for (i = 0; i < nbuf; i++)
                xfs_trans_binval(tp, bplist[i]);
        if (bplist != &bp)
-                kmem_free(bplist, nbuf * sizeof(*bplist));
+                kmem_free(bplist);
 }
 /*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 7facf86f74f9..8be0b00ede9a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -99,6 +99,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
 *========================================================================*/
 /*
+ * Search comparison results
+ */
+enum xfs_dacmp {
+        XFS_CMP_DIFFERENT,      /* names are completely different */
+        XFS_CMP_EXACT,          /* names are exactly the same */
+        XFS_CMP_CASE            /* names are same but differ in case */
+};
+/*
 * Structure to ease passing around component names.
 */
 typedef struct xfs_da_args {
@@ -123,13 +132,20 @@ typedef struct xfs_da_args {
        int             index2;         /* index of 2nd attr in blk */
        xfs_dablk_t     rmtblkno2;      /* remote attr value starting blkno */
        int             rmtblkcnt2;     /* remote attr value block count */
-        unsigned char   justcheck;      /* T/F: check for ok with no space */
+        int             op_flags;       /* operation flags */
-        unsigned char   rename;         /* T/F: this is an atomic rename op */
+        enum xfs_dacmp  cmpresult;      /* name compare result for lookups */
-        unsigned char   addname;        /* T/F: this is an add operation */
-        unsigned char   oknoent;        /* T/F: ok to return ENOENT, else die */
 } xfs_da_args_t;
 /*
+ * Operation flags:
+ */
+#define XFS_DA_OP_JUSTCHECK     0x0001  /* check for ok with no space */
+#define XFS_DA_OP_RENAME        0x0002  /* this is an atomic rename op */
+#define XFS_DA_OP_ADDNAME       0x0004  /* this is an add operation */
+#define XFS_DA_OP_OKNOENT       0x0008  /* lookup/add op, ENOENT ok, else die */
+#define XFS_DA_OP_CILOOKUP      0x0010  /* lookup to return CI name if found */
+/*
 * Structure to describe buffer(s) for a block.
 * This is needed in the directory version 2 format case, when
 * multiple non-contiguous fsblocks might be needed to cover one
@@ -201,6 +217,14 @@ typedef struct xfs_da_state {
                (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
                (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
+/*
+ * Name ops for directory and/or attr name operations
+ */
+struct xfs_nameops {
+        xfs_dahash_t    (*hashname)(struct xfs_name *);
+        enum xfs_dacmp  (*compname)(struct xfs_da_args *, const char *, int);
+};
 #ifdef __KERNEL__
 /*========================================================================
@@ -249,6 +273,10 @@ int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
                                          xfs_dabuf_t *dead_buf);
 uint xfs_da_hashname(const uchar_t *name_string, int name_length);
+enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
+                                const char *name, int len);
 xfs_da_state_t *xfs_da_state_alloc(void);
 void xfs_da_state_free(xfs_da_state_t *state);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5f3647cb9885..2211e885ef24 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -116,7 +116,7 @@ xfs_swapext(
 out_put_file:
        fput(file);
 out_free_sxp:
-        kmem_free(sxp, sizeof(xfs_swapext_t));
+        kmem_free(sxp);
 out:
        return error;
 }
@@ -381,6 +381,6 @@ xfs_swap_extents(
                xfs_iunlock(tip, lock_flags);
        }
        if (tempifp != NULL)
-                kmem_free(tempifp, sizeof(xfs_ifork_t));
+                kmem_free(tempifp);
        return error;
 }
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 7cb26529766b..80e0dc51361c 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,6 +46,54 @@
 struct xfs_name xfs_name_dotdot = {"..", 2};
+extern const struct xfs_nameops xfs_default_nameops;
+/*
+ * ASCII case-insensitive (ie. A-Z) support for directories that was
+ * used in IRIX.
+ */
+STATIC xfs_dahash_t
+xfs_ascii_ci_hashname(
+        struct xfs_name *name)
+{
+        xfs_dahash_t    hash;
+        int             i;
+        for (i = 0, hash = 0; i < name->len; i++)
+                hash = tolower(name->name[i]) ^ rol32(hash, 7);
+        return hash;
+}
+STATIC enum xfs_dacmp
+xfs_ascii_ci_compname(
+        struct xfs_da_args *args,
+        const char      *name,
+        int             len)
+{
+        enum xfs_dacmp  result;
+        int             i;
+        if (args->namelen != len)
+                return XFS_CMP_DIFFERENT;
+        result = XFS_CMP_EXACT;
+        for (i = 0; i < len; i++) {
+                if (args->name[i] == name[i])
+                        continue;
+                if (tolower(args->name[i]) != tolower(name[i]))
+                        return XFS_CMP_DIFFERENT;
+                result = XFS_CMP_CASE;
+        }
+        return result;
+}
+static struct xfs_nameops xfs_ascii_ci_nameops = {
+        .hashname       = xfs_ascii_ci_hashname,
+        .compname       = xfs_ascii_ci_compname,
+};
 void
 xfs_dir_mount(
        xfs_mount_t     *mp)
@@ -65,6 +113,10 @@ xfs_dir_mount(
                (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
                (uint)sizeof(xfs_da_node_entry_t);
        mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
+        if (xfs_sb_version_hasasciici(&mp->m_sb))
+                mp->m_dirnameops = &xfs_ascii_ci_nameops;
+        else
+                mp->m_dirnameops = &xfs_default_nameops;
 }
 /*
@@ -162,9 +214,10 @@ xfs_dir_createname(
                return rval;
        XFS_STATS_INC(xs_dir_create);
+        memset(&args, 0, sizeof(xfs_da_args_t));
        args.name = name->name;
        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name->name, name->len);
+        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
        args.inumber = inum;
        args.dp = dp;
        args.firstblock = first;
@@ -172,8 +225,7 @@ xfs_dir_createname(
        args.total = total;
        args.whichfork = XFS_DATA_FORK;
        args.trans = tp;
-        args.justcheck = 0;
+        args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-        args.addname = args.oknoent = 1;
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
                rval = xfs_dir2_sf_addname(&args);
@@ -191,14 +243,43 @@ xfs_dir_createname(
 }
 /*
+ * If doing a CI lookup and case-insensitive match, dup actual name into
+ * args.value. Return EEXIST for success (ie. name found) or an error.
+ */
+int
+xfs_dir_cilookup_result(
+        struct xfs_da_args *args,
+        const char      *name,
+        int             len)
+{
+        if (args->cmpresult == XFS_CMP_DIFFERENT)
+                return ENOENT;
+        if (args->cmpresult != XFS_CMP_CASE ||
+                                        !(args->op_flags & XFS_DA_OP_CILOOKUP))
+                return EEXIST;
+        args->value = kmem_alloc(len, KM_MAYFAIL);
+        if (!args->value)
+                return ENOMEM;
+        memcpy(args->value, name, len);
+        args->valuelen = len;
+        return EEXIST;
+}
+/*
 * Lookup a name in a directory, give back the inode number.
+ * If ci_name is not NULL, returns the actual name in ci_name if it differs
+ * to name, or ci_name->name is set to NULL for an exact match.
 */
 int
 xfs_dir_lookup(
        xfs_trans_t     *tp,
        xfs_inode_t     *dp,
        struct xfs_name *name,
-        xfs_ino_t       *inum)          /* out: inode number */
+        xfs_ino_t       *inum,          /* out: inode number */
+        struct xfs_name *ci_name)       /* out: actual name if CI match */
 {
        xfs_da_args_t   args;
        int             rval;
@@ -206,15 +287,17 @@ xfs_dir_lookup(
        ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
        XFS_STATS_INC(xs_dir_lookup);
-        memset(&args, 0, sizeof(xfs_da_args_t));
+        memset(&args, 0, sizeof(xfs_da_args_t));
        args.name = name->name;
        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name->name, name->len);
+        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
        args.dp = dp;
        args.whichfork = XFS_DATA_FORK;
        args.trans = tp;
-        args.oknoent = 1;
+        args.op_flags = XFS_DA_OP_OKNOENT;
+        if (ci_name)
+                args.op_flags |= XFS_DA_OP_CILOOKUP;
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
                rval = xfs_dir2_sf_lookup(&args);
@@ -230,8 +313,13 @@ xfs_dir_lookup(
                rval = xfs_dir2_node_lookup(&args);
        if (rval == EEXIST)
                rval = 0;
-        if (rval == 0)
+        if (!rval) {
                *inum = args.inumber;
+                if (ci_name) {
+                        ci_name->name = args.value;
+                        ci_name->len = args.valuelen;
+                }
+        }
        return rval;
 }
@@ -255,9 +343,10 @@ xfs_dir_removename(
        ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
        XFS_STATS_INC(xs_dir_remove);
+        memset(&args, 0, sizeof(xfs_da_args_t));
        args.name = name->name;
        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name->name, name->len);
+        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
        args.inumber = ino;
        args.dp = dp;
        args.firstblock = first;
@@ -265,7 +354,6 @@ xfs_dir_removename(
        args.total = total;
        args.whichfork = XFS_DATA_FORK;
        args.trans = tp;
-        args.justcheck = args.addname = args.oknoent = 0;
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
                rval = xfs_dir2_sf_removename(&args);
@@ -338,9 +426,10 @@ xfs_dir_replace(
        if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
                return rval;
+        memset(&args, 0, sizeof(xfs_da_args_t));
        args.name = name->name;
        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name->name, name->len);
+        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
        args.inumber = inum;
        args.dp = dp;
        args.firstblock = first;
@@ -348,7 +437,6 @@ xfs_dir_replace(
        args.total = total;
        args.whichfork = XFS_DATA_FORK;
        args.trans = tp;
-        args.justcheck = args.addname = args.oknoent = 0;
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
                rval = xfs_dir2_sf_replace(&args);
@@ -384,15 +472,16 @@ xfs_dir_canenter(
                return 0;
        ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
-        memset(&args, 0, sizeof(xfs_da_args_t));
+        memset(&args, 0, sizeof(xfs_da_args_t));
        args.name = name->name;
        args.namelen = name->len;
-        args.hashval = xfs_da_hashname(name->name, name->len);
+        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
        args.dp = dp;
        args.whichfork = XFS_DATA_FORK;
        args.trans = tp;
-        args.justcheck = args.addname = args.oknoent = 1;
+        args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+                                                        XFS_DA_OP_OKNOENT;
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
                rval = xfs_dir2_sf_addname(&args);
@@ -493,7 +582,7 @@ xfs_dir2_grow_inode(
                                        args->firstblock, args->total,
                                        &mapp[mapi], &nmap, args->flist,
                                        NULL))) {
-                                kmem_free(mapp, sizeof(*mapp) * count);
+                                kmem_free(mapp);
                                return error;
                        }
                        if (nmap < 1)
@@ -525,14 +614,14 @@ xfs_dir2_grow_inode(
            mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
            bno + count) {
                if (mapp != &map)
-                        kmem_free(mapp, sizeof(*mapp) * count);
+                        kmem_free(mapp);
                return XFS_ERROR(ENOSPC);
        }
        /*
         * Done with the temporary mapping table.
         */
        if (mapp != &map)
-                kmem_free(mapp, sizeof(*mapp) * count);
+                kmem_free(mapp);
        *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
        /*
         * Update file's size if this is the data space and it grew.
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 6392f939029f..1d9ef96f33aa 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
                                xfs_fsblock_t *first,
                                struct xfs_bmap_free *flist, xfs_extlen_t tot);
 extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
-                                struct xfs_name *name, xfs_ino_t *inum);
+                                struct xfs_name *name, xfs_ino_t *inum,
+                                struct xfs_name *ci_name);
 extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
                                struct xfs_name *name, xfs_ino_t ino,
                                xfs_fsblock_t *first,
@@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
 extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
                                struct xfs_dabuf *bp);
+extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name,
+                                int len);
 #endif  /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fb5a556725b3..e2fa0a1d8e96 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -215,7 +215,7 @@ xfs_dir2_block_addname(
        /*
         * If this isn't a real add, we're done with the buffer.
         */
-        if (args->justcheck)
+        if (args->op_flags & XFS_DA_OP_JUSTCHECK)
                xfs_da_brelse(tp, bp);
        /*
         * If we don't have space for the new entry & leaf ...
@@ -225,7 +225,7 @@ xfs_dir2_block_addname(
                 * Not trying to actually do anything, or don't have
                 * a space reservation: return no-space.
                 */
-                if (args->justcheck || args->total == 0)
+                if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
                        return XFS_ERROR(ENOSPC);
                /*
                 * Convert to the next larger format.
@@ -240,7 +240,7 @@ xfs_dir2_block_addname(
        /*
         * Just checking, and it would work, so say so.
         */
-        if (args->justcheck)
+        if (args->op_flags & XFS_DA_OP_JUSTCHECK)
                return 0;
        needlog = needscan = 0;
        /*
@@ -610,14 +610,15 @@ xfs_dir2_block_lookup(
        /*
         * Get the offset from the leaf entry, to point to the data.
         */
-        dep = (xfs_dir2_data_entry_t *)
+        dep = (xfs_dir2_data_entry_t *)((char *)block +
-              ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+                xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
        /*
-         * Fill in inode number, release the block.
+         * Fill in inode number, CI name if appropriate, release the block.
         */
        args->inumber = be64_to_cpu(dep->inumber);
+        error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
        xfs_da_brelse(args->trans, bp);
-        return XFS_ERROR(EEXIST);
+        return XFS_ERROR(error);
 }
 /*
@@ -643,6 +644,7 @@ xfs_dir2_block_lookup_int(
        int                     mid;            /* binary search current idx */
        xfs_mount_t             *mp;            /* filesystem mount point */
        xfs_trans_t             *tp;            /* transaction pointer */
+        enum xfs_dacmp          cmp;            /* comparison result */
        dp = args->dp;
        tp = args->trans;
@@ -673,7 +675,7 @@ xfs_dir2_block_lookup_int(
                else
                        high = mid - 1;
                if (low > high) {
-                        ASSERT(args->oknoent);
+                        ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
                        xfs_da_brelse(tp, bp);
                        return XFS_ERROR(ENOENT);
                }
@@ -697,20 +699,31 @@ xfs_dir2_block_lookup_int(
                dep = (xfs_dir2_data_entry_t *)
                        ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
                /*
-                 * Compare, if it's right give back buffer & entry number.
+                 * Compare name and if it's an exact match, return the index
+                 * and buffer. If it's the first case-insensitive match, store
+                 * the index and buffer and continue looking for an exact match.
                 */
-                if (dep->namelen == args->namelen &&
+                cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
-                    dep->name[0] == args->name[0] &&
+                if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
-                    memcmp(dep->name, args->name, args->namelen) == 0) {
+                        args->cmpresult = cmp;
                        *bpp = bp;
                        *entno = mid;
-                        return 0;
+                        if (cmp == XFS_CMP_EXACT)
+                                return 0;
                }
-        } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash);
+        } while (++mid < be32_to_cpu(btp->count) &&
+                        be32_to_cpu(blp[mid].hashval) == hash);
+        ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+        /*
+         * Here, we can only be doing a lookup (not a rename or replace).
+         * If a case-insensitive match was found earlier, return success.
+         */
+        if (args->cmpresult == XFS_CMP_CASE)
+                return 0;
        /*
         * No match, release the buffer and return ENOENT.
         */
-        ASSERT(args->oknoent);
        xfs_da_brelse(tp, bp);
        return XFS_ERROR(ENOENT);
 }
@@ -1033,6 +1046,7 @@ xfs_dir2_sf_to_block(
        xfs_dir2_sf_t           *sfp;           /* shortform structure */
        __be16                  *tagp;          /* end of data entry */
        xfs_trans_t             *tp;            /* transaction pointer */
+        struct xfs_name         name;
        xfs_dir2_trace_args("sf_to_block", args);
        dp = args->dp;
@@ -1071,7 +1085,7 @@ xfs_dir2_sf_to_block(
         */
        error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
        if (error) {
-                kmem_free(buf, buf_len);
+                kmem_free(buf);
                return error;
        }
        /*
@@ -1079,7 +1093,7 @@ xfs_dir2_sf_to_block(
         */
        error = xfs_dir2_data_init(args, blkno, &bp);
        if (error) {
-                kmem_free(buf, buf_len);
+                kmem_free(buf);
                return error;
        }
        block = bp->data;
@@ -1187,8 +1201,10 @@ xfs_dir2_sf_to_block(
                tagp = xfs_dir2_data_entry_tag_p(dep);
                *tagp = cpu_to_be16((char *)dep - (char *)block);
                xfs_dir2_data_log_entry(tp, bp, dep);
-                blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname(
+                name.name = sfep->name;
-                                        (char *)sfep->name, sfep->namelen));
+                name.len = sfep->namelen;
+                blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
+                                                        hashname(&name));
                blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
                                                 (char *)dep - (char *)block));
                offset = (int)((char *)(tagp + 1) - (char *)block);
@@ -1198,7 +1214,7 @@ xfs_dir2_sf_to_block(
                        sfep = xfs_dir2_sf_nextentry(sfp, sfep);
        }
        /* Done with the temporary buffer */
-        kmem_free(buf, buf_len);
+        kmem_free(buf);
        /*
         * Sort the leaf entries by hash value.
         */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fb8c9e08b23d..498f8d694330 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -65,6 +65,7 @@ xfs_dir2_data_check(
        xfs_mount_t             *mp;            /* filesystem mount point */
        char                    *p;             /* current data position */
        int                     stale;          /* count of stale leaves */
+        struct xfs_name         name;
        mp = dp->i_mount;
        d = bp->data;
@@ -140,7 +141,9 @@ xfs_dir2_data_check(
                        addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
                                (xfs_dir2_data_aoff_t)
                                ((char *)dep - (char *)d));
-                        hash = xfs_da_hashname((char *)dep->name, dep->namelen);
+                        name.name = dep->name;
+                        name.len = dep->namelen;
+                        hash = mp->m_dirnameops->hashname(&name);
                        for (i = 0; i < be32_to_cpu(btp->count); i++) {
                                if (be32_to_cpu(lep[i].address) == addr &&
                                    be32_to_cpu(lep[i].hashval) == hash)
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bc52b803d79b..93535992cb60 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -263,20 +263,21 @@ xfs_dir2_leaf_addname(
         * If we don't have enough free bytes but we can make enough
         * by compacting out stale entries, we'll do that.
         */
-        if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes &&
+        if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
-            be16_to_cpu(leaf->hdr.stale) > 1) {
+                                needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
                compact = 1;
        }
        /*
         * Otherwise if we don't have enough free bytes we need to
         * convert to node form.
         */
-        else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
+        else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
-                 needbytes) {
+                                                leaf->hdr.count)] < needbytes) {
                /*
                 * Just checking or no space reservation, give up.
                 */
-                if (args->justcheck || args->total == 0) {
+                if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+                                                        args->total == 0) {
                        xfs_da_brelse(tp, lbp);
                        return XFS_ERROR(ENOSPC);
                }
@@ -301,7 +302,7 @@ xfs_dir2_leaf_addname(
         * If just checking, then it will fit unless we needed to allocate
         * a new data block.
         */
-        if (args->justcheck) {
+        if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
                xfs_da_brelse(tp, lbp);
                return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
        }
@@ -1110,7 +1111,7 @@ xfs_dir2_leaf_getdents(
                *offset = XFS_DIR2_MAX_DATAPTR;
        else
                *offset = xfs_dir2_byte_to_dataptr(mp, curoff);
-        kmem_free(map, map_size * sizeof(*map));
+        kmem_free(map);
        if (bp)
                xfs_da_brelse(NULL, bp);
        return error;
@@ -1298,12 +1299,13 @@ xfs_dir2_leaf_lookup(
              ((char *)dbp->data +
               xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
        /*
-         * Return the found inode number.
+         * Return the found inode number & CI name if appropriate
         */
        args->inumber = be64_to_cpu(dep->inumber);
+        error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
        xfs_da_brelse(tp, dbp);
        xfs_da_brelse(tp, lbp);
-        return XFS_ERROR(EEXIST);
+        return XFS_ERROR(error);
 }
 /*
@@ -1319,8 +1321,8 @@ xfs_dir2_leaf_lookup_int(
        int                     *indexp,        /* out: index in leaf block */
        xfs_dabuf_t             **dbpp)         /* out: data buffer */
 {
-        xfs_dir2_db_t           curdb;          /* current data block number */
+        xfs_dir2_db_t           curdb = -1;     /* current data block number */
-        xfs_dabuf_t             *dbp;           /* data buffer */
+        xfs_dabuf_t             *dbp = NULL;    /* data buffer */
        xfs_dir2_data_entry_t   *dep;           /* data entry */
        xfs_inode_t             *dp;            /* incore directory inode */
        int                     error;          /* error return code */
@@ -1331,6 +1333,8 @@ xfs_dir2_leaf_lookup_int(
        xfs_mount_t             *mp;            /* filesystem mount point */
        xfs_dir2_db_t           newdb;          /* new data block number */
        xfs_trans_t             *tp;            /* transaction pointer */
+        xfs_dir2_db_t           cidb = -1;      /* case match data block no. */
+        enum xfs_dacmp          cmp;            /* name compare result */
        dp = args->dp;
        tp = args->trans;
@@ -1338,11 +1342,10 @@ xfs_dir2_leaf_lookup_int(
        /*
         * Read the leaf block into the buffer.
         */
-        if ((error =
+        error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
-            xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
+                                                        XFS_DATA_FORK);
-                    XFS_DATA_FORK))) {
+        if (error)
                return error;
-        }
        *lbpp = lbp;
        leaf = lbp->data;
        xfs_dir2_leaf_check(dp, lbp);
@@ -1354,9 +1357,9 @@ xfs_dir2_leaf_lookup_int(
         * Loop over all the entries with the right hash value
         * looking to match the name.
         */
-        for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
+        for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
-             index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
+                                be32_to_cpu(lep->hashval) == args->hashval;
-             lep++, index++) {
+                                lep++, index++) {
                /*
                 * Skip over stale leaf entries.
                 */
@@ -1373,10 +1376,10 @@ xfs_dir2_leaf_lookup_int(
                if (newdb != curdb) {
                        if (dbp)
                                xfs_da_brelse(tp, dbp);
-                        if ((error =
+                        error = xfs_da_read_buf(tp, dp,
-                            xfs_da_read_buf(tp, dp,
+                                                xfs_dir2_db_to_da(mp, newdb),
-                                    xfs_dir2_db_to_da(mp, newdb), -1, &dbp,
+                                                -1, &dbp, XFS_DATA_FORK);
-                                    XFS_DATA_FORK))) {
+                        if (error) {
                                xfs_da_brelse(tp, lbp);
                                return error;
                        }
@@ -1386,24 +1389,50 @@ xfs_dir2_leaf_lookup_int(
                /*
                 * Point to the data entry.
                 */
-                dep = (xfs_dir2_data_entry_t *)
+                dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
-                      ((char *)dbp->data +
+                        xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
-                       xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
                /*
-                 * If it matches then return it.
+                 * Compare name and if it's an exact match, return the index
+                 * and buffer. If it's the first case-insensitive match, store
+                 * the index and buffer and continue looking for an exact match.
                 */
-                if (dep->namelen == args->namelen &&
+                cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
-                    dep->name[0] == args->name[0] &&
+                if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
-                    memcmp(dep->name, args->name, args->namelen) == 0) {
+                        args->cmpresult = cmp;
-                        *dbpp = dbp;
                        *indexp = index;
-                        return 0;
+                        /* case exact match: return the current buffer. */
+                        if (cmp == XFS_CMP_EXACT) {
+                                *dbpp = dbp;
+                                return 0;
+                        }
+                        cidb = curdb;
                }
        }
+        ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+        /*
+         * Here, we can only be doing a lookup (not a rename or remove).
+         * If a case-insensitive match was found earlier, re-read the
+         * appropriate data block if required and return it.
+         */
+        if (args->cmpresult == XFS_CMP_CASE) {
+                ASSERT(cidb != -1);
+                if (cidb != curdb) {
+                        xfs_da_brelse(tp, dbp);
+                        error = xfs_da_read_buf(tp, dp,
+                                                xfs_dir2_db_to_da(mp, cidb),
+                                                -1, &dbp, XFS_DATA_FORK);
+                        if (error) {
+                                xfs_da_brelse(tp, lbp);
+                                return error;
+                        }
+                }
+                *dbpp = dbp;
+                return 0;
+        }
        /*
         * No match found, return ENOENT.
         */
-        ASSERT(args->oknoent);
+        ASSERT(cidb == -1);
        if (dbp)
                xfs_da_brelse(tp, dbp);
        xfs_da_brelse(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 8dade711f099..fa6c3a5ddbc6 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -226,7 +226,7 @@ xfs_dir2_leafn_add(
        ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
               be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
-        if (args->justcheck)
+        if (args->op_flags & XFS_DA_OP_JUSTCHECK)
                return 0;
        /*
@@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash(
 }
 /*
- * Look up a leaf entry in a node-format leaf block.
+ * Look up a leaf entry for space to add a name in a node-format leaf block.
- * If this is an addname then the extrablk in state is a freespace block,
+ * The extrablk in state is a freespace block.
- * otherwise it's a data block.
 */
-int
+STATIC int
-xfs_dir2_leafn_lookup_int(
+xfs_dir2_leafn_lookup_for_addname(
        xfs_dabuf_t             *bp,            /* leaf buffer */
        xfs_da_args_t           *args,          /* operation arguments */
        int                     *indexp,        /* out: leaf entry index */
        xfs_da_state_t          *state)         /* state to fill in */
 {
-        xfs_dabuf_t             *curbp;         /* current data/free buffer */
+        xfs_dabuf_t             *curbp = NULL;  /* current data/free buffer */
-        xfs_dir2_db_t           curdb;          /* current data block number */
+        xfs_dir2_db_t           curdb = -1;     /* current data block number */
-        xfs_dir2_db_t           curfdb;         /* current free block number */
+        xfs_dir2_db_t           curfdb = -1;    /* current free block number */
-        xfs_dir2_data_entry_t   *dep;           /* data block entry */
        xfs_inode_t             *dp;            /* incore directory inode */
        int                     error;          /* error return value */
        int                     fi;             /* free entry index */
-        xfs_dir2_free_t         *free=NULL;     /* free block structure */
+        xfs_dir2_free_t         *free = NULL;   /* free block structure */
        int                     index;          /* leaf entry index */
        xfs_dir2_leaf_t         *leaf;          /* leaf structure */
-        int                     length=0;       /* length of new data entry */
+        int                     length;         /* length of new data entry */
        xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
        xfs_mount_t             *mp;            /* filesystem mount point */
        xfs_dir2_db_t           newdb;          /* new data block number */
@@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int(
        /*
         * Do we have a buffer coming in?
         */
-        if (state->extravalid)
+        if (state->extravalid) {
+                /* If so, it's a free block buffer, get the block number. */
                curbp = state->extrablk.bp;
-        else
+                curfdb = state->extrablk.blkno;
-                curbp = NULL;
+                free = curbp->data;
-        /*
+                ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
-         * For addname, it's a free block buffer, get the block number.
-         */
-        if (args->addname) {
-                curfdb = curbp ? state->extrablk.blkno : -1;
-                curdb = -1;
-                length = xfs_dir2_data_entsize(args->namelen);
-                if ((free = (curbp ? curbp->data : NULL)))
-                        ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
-        }
-        /*
-         * For others, it's a data block buffer, get the block number.
-         */
-        else {
-                curfdb = -1;
-                curdb = curbp ? state->extrablk.blkno : -1;
        }
+        length = xfs_dir2_data_entsize(args->namelen);
        /*
         * Loop over leaf entries with the right hash value.
         */
-        for (lep = &leaf->ents[index];
+        for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
-             index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
+                                be32_to_cpu(lep->hashval) == args->hashval;
-             lep++, index++) {
+                                lep++, index++) {
                /*
                 * Skip stale leaf entries.
                 */
@@ -471,161 +456,244 @@ xfs_dir2_leafn_lookup_int(
                 * For addname, we're looking for a place to put the new entry.
                 * We want to use a data block with an entry of equal
                 * hash value to ours if there is one with room.
+                 *
+                 * If this block isn't the data block we already have
+                 * in hand, take a look at it.
                 */
-                if (args->addname) {
+                if (newdb != curdb) {
+                        curdb = newdb;
                        /*
-                         * If this block isn't the data block we already have
+                         * Convert the data block to the free block
-                         * in hand, take a look at it.
+                         * holding its freespace information.
                         */
-                        if (newdb != curdb) {
+                        newfdb = xfs_dir2_db_to_fdb(mp, newdb);
-                                curdb = newdb;
-                                /*
-                                 * Convert the data block to the free block
-                                 * holding its freespace information.
-                                 */
-                                newfdb = xfs_dir2_db_to_fdb(mp, newdb);
-                                /*
-                                 * If it's not the one we have in hand,
-                                 * read it in.
-                                 */
-                                if (newfdb != curfdb) {
-                                        /*
-                                         * If we had one before, drop it.
-                                         */
-                                        if (curbp)
-                                                xfs_da_brelse(tp, curbp);
-                                        /*
-                                         * Read the free block.
-                                         */
-                                        if ((error = xfs_da_read_buf(tp, dp,
-                                                        xfs_dir2_db_to_da(mp,
-                                                                newfdb),
-                                                        -1, &curbp,
-                                                        XFS_DATA_FORK))) {
-                                                return error;
-                                        }
-                                        free = curbp->data;
-                                        ASSERT(be32_to_cpu(free->hdr.magic) ==
-                                               XFS_DIR2_FREE_MAGIC);
-                                        ASSERT((be32_to_cpu(free->hdr.firstdb) %
-                                                XFS_DIR2_MAX_FREE_BESTS(mp)) ==
-                                               0);
-                                        ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
-                                        ASSERT(curdb <
-                                               be32_to_cpu(free->hdr.firstdb) +
-                                               be32_to_cpu(free->hdr.nvalid));
-                                }
-                                /*
-                                 * Get the index for our entry.
-                                 */
-                                fi = xfs_dir2_db_to_fdindex(mp, curdb);
-                                /*
-                                 * If it has room, return it.
-                                 */
-                                if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
-                                        XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
-                                                         XFS_ERRLEVEL_LOW, mp);
-                                        if (curfdb != newfdb)
-                                                xfs_da_brelse(tp, curbp);
-                                        return XFS_ERROR(EFSCORRUPTED);
-                                }
-                                curfdb = newfdb;
-                                if (be16_to_cpu(free->bests[fi]) >= length) {
-                                        *indexp = index;
-                                        state->extravalid = 1;
-                                        state->extrablk.bp = curbp;
-                                        state->extrablk.blkno = curfdb;
-                                        state->extrablk.index = fi;
-                                        state->extrablk.magic =
-                                                XFS_DIR2_FREE_MAGIC;
-                                        ASSERT(args->oknoent);
-                                        return XFS_ERROR(ENOENT);
-                                }
-                        }
-                }
-                /*
-                 * Not adding a new entry, so we really want to find
-                 * the name given to us.
-                 */
-                else {
                        /*
-                         * If it's a different data block, go get it.
+                         * If it's not the one we have in hand, read it in.
                         */
-                        if (newdb != curdb) {
+                        if (newfdb != curfdb) {
                                /*
-                                 * If we had a block before, drop it.
+                                 * If we had one before, drop it.
                                 */
                                if (curbp)
                                        xfs_da_brelse(tp, curbp);
                                /*
-                                 * Read the data block.
+                                 * Read the free block.
                                 */
-                                if ((error =
+                                error = xfs_da_read_buf(tp, dp,
-                                    xfs_da_read_buf(tp, dp,
+                                                xfs_dir2_db_to_da(mp, newfdb),
-                                            xfs_dir2_db_to_da(mp, newdb), -1,
+                                                -1, &curbp, XFS_DATA_FORK);
-                                            &curbp, XFS_DATA_FORK))) {
+                                if (error)
                                        return error;
-                                }
+                                free = curbp->data;
-                                xfs_dir2_data_check(dp, curbp);
+                                ASSERT(be32_to_cpu(free->hdr.magic) ==
-                                curdb = newdb;
+                                        XFS_DIR2_FREE_MAGIC);
+                                ASSERT((be32_to_cpu(free->hdr.firstdb) %
+                                        XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+                                ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
+                                ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
+                                        be32_to_cpu(free->hdr.nvalid));
                        }
                        /*
-                         * Point to the data entry.
+                         * Get the index for our entry.
                         */
-                        dep = (xfs_dir2_data_entry_t *)
+                        fi = xfs_dir2_db_to_fdindex(mp, curdb);
-                              ((char *)curbp->data +
-                               xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
                        /*
-                         * Compare the entry, return it if it matches.
+                         * If it has room, return it.
                         */
-                        if (dep->namelen == args->namelen &&
+                        if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
-                            dep->name[0] == args->name[0] &&
+                                XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
-                            memcmp(dep->name, args->name, args->namelen) == 0) {
+                                                        XFS_ERRLEVEL_LOW, mp);
-                                args->inumber = be64_to_cpu(dep->inumber);
+                                if (curfdb != newfdb)
-                                *indexp = index;
+                                        xfs_da_brelse(tp, curbp);
-                                state->extravalid = 1;
+                                return XFS_ERROR(EFSCORRUPTED);
-                                state->extrablk.bp = curbp;
-                                state->extrablk.blkno = curdb;
-                                state->extrablk.index =
-                                        (int)((char *)dep -
-                                              (char *)curbp->data);
-                                state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
-                                return XFS_ERROR(EEXIST);
                        }
+                        curfdb = newfdb;
+                        if (be16_to_cpu(free->bests[fi]) >= length)
+                                goto out;
                }
        }
+        /* Didn't find any space */
+        fi = -1;
+out:
+        ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
+        if (curbp) {
+                /* Giving back a free block. */
+                state->extravalid = 1;
+                state->extrablk.bp = curbp;
+                state->extrablk.index = fi;
+                state->extrablk.blkno = curfdb;
+                state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+        } else {
+                state->extravalid = 0;
+        }
        /*
-         * Didn't find a match.
+         * Return the index, that will be the insertion point.
-         * If we are holding a buffer, give it back in case our caller
-         * finds it useful.
         */
-        if ((state->extravalid = (curbp != NULL))) {
+        *indexp = index;
-                state->extrablk.bp = curbp;
+        return XFS_ERROR(ENOENT);
-                state->extrablk.index = -1;
+}
+/*
+ * Look up a leaf entry in a node-format leaf block.
+ * The extrablk in state a data block.
+ */
+STATIC int
+xfs_dir2_leafn_lookup_for_entry(
+        xfs_dabuf_t             *bp,            /* leaf buffer */
+        xfs_da_args_t           *args,          /* operation arguments */
+        int                     *indexp,        /* out: leaf entry index */
+        xfs_da_state_t          *state)         /* state to fill in */
+{
+        xfs_dabuf_t             *curbp = NULL;  /* current data/free buffer */
+        xfs_dir2_db_t           curdb = -1;     /* current data block number */
+        xfs_dir2_data_entry_t   *dep;           /* data block entry */
+        xfs_inode_t             *dp;            /* incore directory inode */
+        int                     error;          /* error return value */
+        int                     index;          /* leaf entry index */
+        xfs_dir2_leaf_t         *leaf;          /* leaf structure */
+        xfs_dir2_leaf_entry_t   *lep;           /* leaf entry */
+        xfs_mount_t             *mp;            /* filesystem mount point */
+        xfs_dir2_db_t           newdb;          /* new data block number */
+        xfs_trans_t             *tp;            /* transaction pointer */
+        enum xfs_dacmp          cmp;            /* comparison result */
+        dp = args->dp;
+        tp = args->trans;
+        mp = dp->i_mount;
+        leaf = bp->data;
+        ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+#ifdef __KERNEL__
+        ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
+#endif
+        xfs_dir2_leafn_check(dp, bp);
+        /*
+         * Look up the hash value in the leaf entries.
+         */
+        index = xfs_dir2_leaf_search_hash(args, bp);
+        /*
+         * Do we have a buffer coming in?
+         */
+        if (state->extravalid) {
+                curbp = state->extrablk.bp;
+                curdb = state->extrablk.blkno;
+        }
+        /*
+         * Loop over leaf entries with the right hash value.
+         */
+        for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
+                                be32_to_cpu(lep->hashval) == args->hashval;
+                                lep++, index++) {
                /*
-                 * For addname, giving back a free block.
+                 * Skip stale leaf entries.
                 */
-                if (args->addname) {
+                if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
-                        state->extrablk.blkno = curfdb;
+                        continue;
-                        state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
+                /*
+                 * Pull the data block number from the entry.
+                 */
+                newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
+                /*
+                 * Not adding a new entry, so we really want to find
+                 * the name given to us.
+                 *
+                 * If it's a different data block, go get it.
+                 */
+                if (newdb != curdb) {
+                        /*
+                         * If we had a block before that we aren't saving
+                         * for a CI name, drop it
+                         */
+                        if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
+                                                curdb != state->extrablk.blkno))
+                                xfs_da_brelse(tp, curbp);
+                        /*
+                         * If needing the block that is saved with a CI match,
+                         * use it otherwise read in the new data block.
+                         */
+                        if (args->cmpresult != XFS_CMP_DIFFERENT &&
+                                        newdb == state->extrablk.blkno) {
+                                ASSERT(state->extravalid);
+                                curbp = state->extrablk.bp;
+                        } else {
+                                error = xfs_da_read_buf(tp, dp,
+                                                xfs_dir2_db_to_da(mp, newdb),
+                                                -1, &curbp, XFS_DATA_FORK);
+                                if (error)
+                                        return error;
+                        }
+                        xfs_dir2_data_check(dp, curbp);
+                        curdb = newdb;
                }
                /*
-                 * For other callers, giving back a data block.
+                 * Point to the data entry.
                 */
-                else {
+                dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
+                        xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+                /*
+                 * Compare the entry and if it's an exact match, return
+                 * EEXIST immediately. If it's the first case-insensitive
+                 * match, store the block & inode number and continue looking.
+                 */
+                cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+                if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+                        /* If there is a CI match block, drop it */
+                        if (args->cmpresult != XFS_CMP_DIFFERENT &&
+                                                curdb != state->extrablk.blkno)
+                                xfs_da_brelse(tp, state->extrablk.bp);
+                        args->cmpresult = cmp;
+                        args->inumber = be64_to_cpu(dep->inumber);
+                        *indexp = index;
+                        state->extravalid = 1;
+                        state->extrablk.bp = curbp;
                        state->extrablk.blkno = curdb;
+                        state->extrablk.index = (int)((char *)dep -
+                                                        (char *)curbp->data);
                        state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+                        if (cmp == XFS_CMP_EXACT)
+                                return XFS_ERROR(EEXIST);
                }
        }
-        /*
+        ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
-         * Return the final index, that will be the insertion point.
+                                        (args->op_flags & XFS_DA_OP_OKNOENT));
-         */
+        if (curbp) {
+                if (args->cmpresult == XFS_CMP_DIFFERENT) {
+                        /* Giving back last used data block. */
+                        state->extravalid = 1;
+                        state->extrablk.bp = curbp;
+                        state->extrablk.index = -1;
+                        state->extrablk.blkno = curdb;
+                        state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
+                } else {
+                        /* If the curbp is not the CI match block, drop it */
+                        if (state->extrablk.bp != curbp)
+                                xfs_da_brelse(tp, curbp);
+                }
+        } else {
+                state->extravalid = 0;
+        }
        *indexp = index;
-        ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
        return XFS_ERROR(ENOENT);
 }
 /*
+ * Look up a leaf entry in a node-format leaf block.
+ * If this is an addname then the extrablk in state is a freespace block,
+ * otherwise it's a data block.
+ */
+int
+xfs_dir2_leafn_lookup_int(
+        xfs_dabuf_t             *bp,            /* leaf buffer */
+        xfs_da_args_t           *args,          /* operation arguments */
+        int                     *indexp,        /* out: leaf entry index */
+        xfs_da_state_t          *state)         /* state to fill in */
+{
+        if (args->op_flags & XFS_DA_OP_ADDNAME)
+                return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
+                                                        state);
+        return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
+}
+/*
 * Move count leaf entries from source to destination leaf.
 * Log entries and headers.  Stale entries are preserved.
 */
@@ -823,9 +891,10 @@ xfs_dir2_leafn_rebalance(
         */
        if (!state->inleaf)
                blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
-        
-        /* 
+        /*
-         * Finally sanity check just to make sure we are not returning a negative index 
+         * Finally sanity check just to make sure we are not returning a
+         * negative index
         */
        if(blk2->index < 0) {
                state->inleaf = 1;
@@ -1332,7 +1401,7 @@ xfs_dir2_node_addname(
                /*
                 * It worked, fix the hash values up the btree.
                 */
-                if (!args->justcheck)
+                if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
                        xfs_da_fixhashpath(state, &state->path);
        } else {
                /*
@@ -1515,7 +1584,8 @@ xfs_dir2_node_addname_int(
                /*
                 * Not allowed to allocate, return failure.
                 */
-                if (args->justcheck || args->total == 0) {
+                if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
+                                                        args->total == 0) {
                        /*
                         * Drop the freespace buffer unless it came from our
                         * caller.
@@ -1661,7 +1731,7 @@ xfs_dir2_node_addname_int(
                /*
                 * If just checking, we succeeded.
                 */
-                if (args->justcheck) {
+                if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
                        if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
                                xfs_da_buf_done(fbp);
                        return 0;
@@ -1767,6 +1837,14 @@ xfs_dir2_node_lookup(
        error = xfs_da_node_lookup_int(state, &rval);
        if (error)
                rval = error;
+        else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
+                /* If a CI match, dup the actual name and return EEXIST */
+                xfs_dir2_data_entry_t   *dep;
+                dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
+                                                data + state->extrablk.index);
+                rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
+        }
        /*
         * Release the btree blocks and leaf block.
         */
@@ -1810,9 +1888,8 @@ xfs_dir2_node_removename(
         * Look up the entry we're deleting, set up the cursor.
         */
        error = xfs_da_node_lookup_int(state, &rval);
-        if (error) {
+        if (error)
                rval = error;
-        }
        /*
         * Didn't find it, upper layer screwed up.
         */
@@ -1829,9 +1906,8 @@ xfs_dir2_node_removename(
         */
        error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
                &state->extrablk, &rval);
-        if (error) {
+        if (error)
                return error;
-        }
        /*
         * Fix the hash values up the btree.
         */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 919d275a1cef..b46af0013ec9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -255,7 +255,7 @@ xfs_dir2_block_to_sf(
        xfs_dir2_sf_check(args);
 out:
        xfs_trans_log_inode(args->trans, dp, logflags);
-        kmem_free(block, mp->m_dirblksize);
+        kmem_free(block);
        return error;
 }
@@ -332,7 +332,7 @@ xfs_dir2_sf_addname(
                /*
                 * Just checking or no space reservation, it doesn't fit.
                 */
-                if (args->justcheck || args->total == 0)
+                if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
                        return XFS_ERROR(ENOSPC);
                /*
                 * Convert to block form then add the name.
@@ -345,7 +345,7 @@ xfs_dir2_sf_addname(
        /*
         * Just checking, it fits.
         */
-        if (args->justcheck)
+        if (args->op_flags & XFS_DA_OP_JUSTCHECK)
                return 0;
        /*
         * Do it the easy way - just add it at the end.
@@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard(
                sfep = xfs_dir2_sf_nextentry(sfp, sfep);
                memcpy(sfep, oldsfep, old_isize - nbytes);
        }
-        kmem_free(buf, old_isize);
+        kmem_free(buf);
        dp->i_d.di_size = new_isize;
        xfs_dir2_sf_check(args);
 }
@@ -812,8 +812,11 @@ xfs_dir2_sf_lookup(
 {
        xfs_inode_t             *dp;            /* incore directory inode */
        int                     i;              /* entry index */
+        int                     error;
        xfs_dir2_sf_entry_t     *sfep;          /* shortform directory entry */
        xfs_dir2_sf_t           *sfp;           /* shortform structure */
+        enum xfs_dacmp          cmp;            /* comparison result */
+        xfs_dir2_sf_entry_t     *ci_sfep;       /* case-insens. entry */
        xfs_dir2_trace_args("sf_lookup", args);
        xfs_dir2_sf_check(args);
@@ -836,6 +839,7 @@ xfs_dir2_sf_lookup(
         */
        if (args->namelen == 1 && args->name[0] == '.') {
                args->inumber = dp->i_ino;
+                args->cmpresult = XFS_CMP_EXACT;
                return XFS_ERROR(EEXIST);
        }
        /*
@@ -844,28 +848,41 @@ xfs_dir2_sf_lookup(
        if (args->namelen == 2 &&
            args->name[0] == '.' && args->name[1] == '.') {
                args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+                args->cmpresult = XFS_CMP_EXACT;
                return XFS_ERROR(EEXIST);
        }
        /*
         * Loop over all the entries trying to match ours.
         */
-        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
+        ci_sfep = NULL;
-             i < sfp->hdr.count;
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
-             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+                                i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-                if (sfep->namelen == args->namelen &&
+                /*
-                    sfep->name[0] == args->name[0] &&
+                 * Compare name and if it's an exact match, return the inode
-                    memcmp(args->name, sfep->name, args->namelen) == 0) {
+                 * number. If it's the first case-insensitive match, store the
-                        args->inumber =
+                 * inode number and continue looking for an exact match.
-                                xfs_dir2_sf_get_inumber(sfp,
+                 */
-                                        xfs_dir2_sf_inumberp(sfep));
+                cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
-                        return XFS_ERROR(EEXIST);
+                                                                sfep->namelen);
+                if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
+                        args->cmpresult = cmp;
+                        args->inumber = xfs_dir2_sf_get_inumber(sfp,
+                                                xfs_dir2_sf_inumberp(sfep));
+                        if (cmp == XFS_CMP_EXACT)
+                                return XFS_ERROR(EEXIST);
+                        ci_sfep = sfep;
                }
        }
+        ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
        /*
-         * Didn't find it.
+         * Here, we can only be doing a lookup (not a rename or replace).
+         * If a case-insensitive match was not found, return ENOENT.
         */
-        ASSERT(args->oknoent);
+        if (!ci_sfep)
-        return XFS_ERROR(ENOENT);
+                return XFS_ERROR(ENOENT);
+        /* otherwise process the CI match as required by the caller */
+        error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
+        return XFS_ERROR(error);
 }
 /*
@@ -904,24 +921,21 @@ xfs_dir2_sf_removename(
         * Loop over the old directory entries.
         * Find the one we're deleting.
         */
-        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
+        for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
-             i < sfp->hdr.count;
+                                i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-             i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+                if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
-                if (sfep->namelen == args->namelen &&
+                                                                XFS_CMP_EXACT) {
-                    sfep->name[0] == args->name[0] &&
-                    memcmp(sfep->name, args->name, args->namelen) == 0) {
                        ASSERT(xfs_dir2_sf_get_inumber(sfp,
-                                        xfs_dir2_sf_inumberp(sfep)) ==
+                                                xfs_dir2_sf_inumberp(sfep)) ==
-                                args->inumber);
+                                                                args->inumber);
                        break;
                }
        }
        /*
         * Didn't find it.
         */
-        if (i == sfp->hdr.count) {
+        if (i == sfp->hdr.count)
                return XFS_ERROR(ENOENT);
-        }
        /*
         * Calculate sizes.
         */
@@ -1042,11 +1056,10 @@ xfs_dir2_sf_replace(
         */
        else {
                for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-                     i < sfp->hdr.count;
+                                i < sfp->hdr.count;
-                     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+                                i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
-                        if (sfep->namelen == args->namelen &&
+                        if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
-                            sfep->name[0] == args->name[0] &&
+                                                                XFS_CMP_EXACT) {
-                            memcmp(args->name, sfep->name, args->namelen) == 0) {
 #if XFS_BIG_INUMS || defined(DEBUG)
                                ino = xfs_dir2_sf_get_inumber(sfp,
                                        xfs_dir2_sf_inumberp(sfep));
@@ -1061,7 +1074,7 @@ xfs_dir2_sf_replace(
                 * Didn't find it.
                 */
                if (i == sfp->hdr.count) {
-                        ASSERT(args->oknoent);
+                        ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
 #if XFS_BIG_INUMS
                        if (i8elevated)
                                xfs_dir2_sf_toino4(args);
@@ -1174,7 +1187,7 @@ xfs_dir2_sf_toino4(
        /*
         * Clean up the inode.
         */
-        kmem_free(buf, oldsize);
+        kmem_free(buf);
        dp->i_d.di_size = newsize;
        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
 }
@@ -1251,7 +1264,7 @@ xfs_dir2_sf_toino8(
        /*
         * Clean up the inode.
         */
-        kmem_free(buf, oldsize);
+        kmem_free(buf);
        dp->i_d.di_size = newsize;
        xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
 }
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 005629d702d2..deecc9d238f8 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -62,7 +62,7 @@ typedef union {
 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
 * Only need 16 bits, this is the byte offset into the single block form.
 */
-typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t;
+typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
 /*
 * The parent directory has a dedicated field, and the self-pointer must
@@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr {
        __uint8_t               count;          /* count of entries */
        __uint8_t               i8count;        /* count of 8-byte inode #s */
        xfs_dir2_inou_t         parent;         /* parent dir inode number */
-} xfs_dir2_sf_hdr_t;
+} __arch_pack xfs_dir2_sf_hdr_t;
 typedef struct xfs_dir2_sf_entry {
        __uint8_t               namelen;        /* actual name length */
        xfs_dir2_sf_off_t       offset;         /* saved offset */
        __uint8_t               name[1];        /* name, variable size */
        xfs_dir2_inou_t         inumber;        /* inode number, var. offset */
-} xfs_dir2_sf_entry_t;
+} __arch_pack xfs_dir2_sf_entry_t; 
 typedef struct xfs_dir2_sf {
        xfs_dir2_sf_hdr_t       hdr;            /* shortform header */
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index f3fb2ffd6f5c..6cc7c0c681ac 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -85,7 +85,8 @@ xfs_dir2_trace_args(
                (void *)((unsigned long)(args->inumber >> 32)),
                (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
                (void *)args->dp, (void *)args->trans,
-                (void *)(unsigned long)args->justcheck, NULL, NULL);
+                (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+                NULL, NULL);
 }
 void
@@ -100,7 +101,7 @@ xfs_dir2_trace_args_b(
                (void *)((unsigned long)(args->inumber >> 32)),
                (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
                (void *)args->dp, (void *)args->trans,
-                (void *)(unsigned long)args->justcheck,
+                (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
                (void *)(bp ? bp->bps[0] : NULL), NULL);
 }
@@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb(
                (void *)((unsigned long)(args->inumber >> 32)),
                (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
                (void *)args->dp, (void *)args->trans,
-                (void *)(unsigned long)args->justcheck,
+                (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
                (void *)(lbp ? lbp->bps[0] : NULL),
                (void *)(dbp ? dbp->bps[0] : NULL));
 }
@@ -157,8 +158,8 @@ xfs_dir2_trace_args_db(
                (void *)((unsigned long)(args->inumber >> 32)),
                (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
                (void *)args->dp, (void *)args->trans,
-                (void *)(unsigned long)args->justcheck, (void *)(long)db,
+                (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
-                (void *)dbp);
+                (void *)(long)db, (void *)dbp);
 }
 void
@@ -173,7 +174,7 @@ xfs_dir2_trace_args_i(
                (void *)((unsigned long)(args->inumber >> 32)),
                (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
                (void *)args->dp, (void *)args->trans,
-                (void *)(unsigned long)args->justcheck,
+                (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
                (void *)((unsigned long)(i >> 32)),
                (void *)((unsigned long)(i & 0xFFFFFFFF)));
 }
@@ -190,7 +191,8 @@ xfs_dir2_trace_args_s(
                (void *)((unsigned long)(args->inumber >> 32)),
                (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
                (void *)args->dp, (void *)args->trans,
-                (void *)(unsigned long)args->justcheck, (void *)(long)s, NULL);
+                (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
+                (void *)(long)s, NULL);
 }
 void
@@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb(
                (void *)((unsigned long)(args->inumber >> 32)),
                (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
                (void *)args->dp, (void *)args->trans,
-                (void *)(unsigned long)args->justcheck, (void *)(long)s,
+                (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
-                (void *)dbp);
+                (void *)(long)s, (void *)dbp);
 }
 #endif  /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index f71784ab6a60..cdc2d3464a1a 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -166,6 +166,6 @@ typedef enum {
 #define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
                        DM_FLAGS_NDELAY : 0)
-#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
+#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
 #endif  /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 05e5365d3c31..f66756cfb5e8 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,14 +66,6 @@ int	xfs_etest[XFS_NUM_INJECT_ERROR];
 int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
 char *  xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
-void
-xfs_error_test_init(void)
-{
-        memset(xfs_etest, 0, sizeof(xfs_etest));
-        memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
-        memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
-}
 int
 xfs_error_test(int error_tag, int *fsidp, char *expression,
               int line, char *file, unsigned long randfactor)
@@ -150,8 +142,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
                                xfs_etest[i]);
                        xfs_etest[i] = 0;
                        xfs_etest_fsid[i] = 0LL;
-                        kmem_free(xfs_etest_fsname[i],
+                        kmem_free(xfs_etest_fsname[i]);
-                                  strlen(xfs_etest_fsname[i]) + 1);
                        xfs_etest_fsname[i] = NULL;
                }
        }
@@ -175,7 +166,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
                newfmt = kmem_alloc(len, KM_SLEEP);
                sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
                icmn_err(level, newfmt, ap);
-                kmem_free(newfmt, len);
+                kmem_free(newfmt);
        } else {
                icmn_err(level, fmt, ap);
        }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6490d2a9f8e1..d8559d132efa 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,7 +127,6 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
 #if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
 extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
-extern void xfs_error_test_init(void);
 #define XFS_NUM_INJECT_ERROR                            10
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 132bd07b9bb8..8aa28f751b2a 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
        int nexts = efip->efi_format.efi_nextents;
        if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
-                kmem_free(efip, sizeof(xfs_efi_log_item_t) +
+                kmem_free(efip);
-                                (nexts - 1) * sizeof(xfs_extent_t));
        } else {
                kmem_zone_free(xfs_efi_zone, efip);
        }
@@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
        int nexts = efdp->efd_format.efd_nextents;
        if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
-                kmem_free(efdp, sizeof(xfs_efd_log_item_t) +
+                kmem_free(efdp);
-                                (nexts - 1) * sizeof(xfs_extent_t));
        } else {
                kmem_zone_free(xfs_efd_zone, efdp);
        }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 3f3785b10804..c38fd14fca29 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -397,10 +397,12 @@ int
 xfs_filestream_init(void)
 {
        item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
+        if (!item_zone)
+                return -ENOMEM;
 #ifdef XFS_FILESTREAMS_TRACE
        xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
 #endif
-        return item_zone ? 0 : -ENOMEM;
+        return 0;
 }
 /*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 3bed6433d050..01c0cc88d3f3 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_LOGV2       0x0100  /* log format version 2 */
 #define XFS_FSOP_GEOM_FLAGS_SECTOR      0x0200  /* sector sizes >1BB    */
 #define XFS_FSOP_GEOM_FLAGS_ATTR2       0x0400  /* inline attributes rework */
+#define XFS_FSOP_GEOM_FLAGS_DIRV2CI     0x1000  /* ASCII only CI names */
 #define XFS_FSOP_GEOM_FLAGS_LAZYSB      0x4000  /* lazy superblock counters */
@@ -371,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq {
 typedef struct xfs_attr_multiop {
        __u32           am_opcode;
+#define ATTR_OP_GET     1       /* return the indicated attr's value */
+#define ATTR_OP_SET     2       /* set/create the indicated attr/value pair */
+#define ATTR_OP_REMOVE  3       /* remove the indicated attr */
        __s32           am_error;
        void            __user *am_attrname;
        void            __user *am_attrvalue;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 381ebda4f7bc..84583cf73db3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,6 +95,8 @@ xfs_fs_geometry(
                                XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
                        (xfs_sb_version_hassector(&mp->m_sb) ?
                                XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
+                        (xfs_sb_version_hasasciici(&mp->m_sb) ?
+                                XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
                        (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
                                XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
                        (xfs_sb_version_hasattr2(&mp->m_sb) ?
@@ -625,7 +627,7 @@ xfs_fs_goingdown(
                        xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
                        thaw_bdev(sb->s_bdev, sb);
                }
-        
                break;
        }
        case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e569bf5d6cf0..bedc66163176 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1763,67 +1763,6 @@ xfs_itruncate_finish(
        return 0;
 }
-/*
- * xfs_igrow_start
- *
- * Do the first part of growing a file: zero any data in the last
- * block that is beyond the old EOF.  We need to do this before
- * the inode is joined to the transaction to modify the i_size.
- * That way we can drop the inode lock and call into the buffer
- * cache to get the buffer mapping the EOF.
- */
-int
-xfs_igrow_start(
-        xfs_inode_t     *ip,
-        xfs_fsize_t     new_size,
-        cred_t          *credp)
-{
-        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT(new_size > ip->i_size);
-        /*
-         * Zero any pages that may have been created by
-         * xfs_write_file() beyond the end of the file
-         * and any blocks between the old and new file sizes.
-         */
-        return xfs_zero_eof(ip, new_size, ip->i_size);
-}
-/*
- * xfs_igrow_finish
- *
- * This routine is called to extend the size of a file.
- * The inode must have both the iolock and the ilock locked
- * for update and it must be a part of the current transaction.
- * The xfs_igrow_start() function must have been called previously.
- * If the change_flag is not zero, the inode change timestamp will
- * be updated.
- */
-void
-xfs_igrow_finish(
-        xfs_trans_t     *tp,
-        xfs_inode_t     *ip,
-        xfs_fsize_t     new_size,
-        int             change_flag)
-{
-        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-        ASSERT(ip->i_transp == tp);
-        ASSERT(new_size > ip->i_size);
-        /*
-         * Update the file size.  Update the inode change timestamp
-         * if change_flag set.
-         */
-        ip->i_d.di_size = new_size;
-        ip->i_size = new_size;
-        if (change_flag)
-                xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
-        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-}
 /*
 * This is called when the inode's link count goes to 0.
 * We place the on-disk inode on a list in the AGI.  It
@@ -2258,7 +2197,7 @@ xfs_ifree_cluster(
                xfs_trans_binval(tp, bp);
        }
-        kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
+        kmem_free(ip_found);
        xfs_put_perag(mp, pag);
 }
@@ -2470,7 +2409,7 @@ xfs_iroot_realloc(
                                                     (int)new_size);
                memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
        }
-        kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+        kmem_free(ifp->if_broot);
        ifp->if_broot = new_broot;
        ifp->if_broot_bytes = (int)new_size;
        ASSERT(ifp->if_broot_bytes <=
@@ -2514,7 +2453,7 @@ xfs_idata_realloc(
        if (new_size == 0) {
                if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-                        kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+                        kmem_free(ifp->if_u1.if_data);
                }
                ifp->if_u1.if_data = NULL;
                real_size = 0;
@@ -2529,7 +2468,7 @@ xfs_idata_realloc(
                        ASSERT(ifp->if_real_bytes != 0);
                        memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
                              new_size);
-                        kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+                        kmem_free(ifp->if_u1.if_data);
                        ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
                }
                real_size = 0;
@@ -2636,7 +2575,7 @@ xfs_idestroy_fork(
        ifp = XFS_IFORK_PTR(ip, whichfork);
        if (ifp->if_broot != NULL) {
-                kmem_free(ifp->if_broot, ifp->if_broot_bytes);
+                kmem_free(ifp->if_broot);
                ifp->if_broot = NULL;
        }
@@ -2650,7 +2589,7 @@ xfs_idestroy_fork(
                if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
                    (ifp->if_u1.if_data != NULL)) {
                        ASSERT(ifp->if_real_bytes != 0);
-                        kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes);
+                        kmem_free(ifp->if_u1.if_data);
                        ifp->if_u1.if_data = NULL;
                        ifp->if_real_bytes = 0;
                }
@@ -3058,7 +2997,7 @@ xfs_iflush_cluster(
 out_free:
        read_unlock(&pag->pag_ici_lock);
-        kmem_free(ilist, ilist_size);
+        kmem_free(ilist);
        return 0;
@@ -3102,7 +3041,7 @@ cluster_corrupt_out:
         * Unlocks the flush lock
         */
        xfs_iflush_abort(iq);
-        kmem_free(ilist, ilist_size);
+        kmem_free(ilist);
        return XFS_ERROR(EFSCORRUPTED);
 }
@@ -3143,8 +3082,6 @@ xfs_iflush(
         * flush lock and do nothing.
         */
        if (xfs_inode_clean(ip)) {
-                ASSERT((iip != NULL) ?
-                         !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
                xfs_ifunlock(ip);
                return 0;
        }
@@ -3836,7 +3773,7 @@ xfs_iext_add_indirect_multi(
                        erp = xfs_iext_irec_new(ifp, erp_idx);
                }
                memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
-                kmem_free(nex2_ep, byte_diff);
+                kmem_free(nex2_ep);
                erp->er_extcount += nex2;
                xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
        }
@@ -4112,7 +4049,7 @@ xfs_iext_direct_to_inline(
         */
        memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
                nextents * sizeof(xfs_bmbt_rec_t));
-        kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+        kmem_free(ifp->if_u1.if_extents);
        ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
        ifp->if_real_bytes = 0;
 }
@@ -4186,7 +4123,7 @@ xfs_iext_indirect_to_direct(
        ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
        ep = ifp->if_u1.if_ext_irec->er_extbuf;
-        kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t));
+        kmem_free(ifp->if_u1.if_ext_irec);
        ifp->if_flags &= ~XFS_IFEXTIREC;
        ifp->if_u1.if_extents = ep;
        ifp->if_bytes = size;
@@ -4212,7 +4149,7 @@ xfs_iext_destroy(
                }
                ifp->if_flags &= ~XFS_IFEXTIREC;
        } else if (ifp->if_real_bytes) {
-                kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes);
+                kmem_free(ifp->if_u1.if_extents);
        } else if (ifp->if_bytes) {
                memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
                        sizeof(xfs_bmbt_rec_t));
@@ -4483,7 +4420,7 @@ xfs_iext_irec_remove(
        if (erp->er_extbuf) {
                xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
                        -erp->er_extcount);
-                kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ);
+                kmem_free(erp->er_extbuf);
        }
        /* Compact extent records */
        erp = ifp->if_u1.if_ext_irec;
@@ -4501,8 +4438,7 @@ xfs_iext_irec_remove(
                xfs_iext_realloc_indirect(ifp,
                        nlists * sizeof(xfs_ext_irec_t));
        } else {
-                kmem_free(ifp->if_u1.if_ext_irec,
+                kmem_free(ifp->if_u1.if_ext_irec);
-                        sizeof(xfs_ext_irec_t));
        }
        ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
 }
@@ -4571,7 +4507,7 @@ xfs_iext_irec_compact_pages(
                         * so er_extoffs don't get modified in
                         * xfs_iext_irec_remove.
                         */
-                        kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ);
+                        kmem_free(erp_next->er_extbuf);
                        erp_next->er_extbuf = NULL;
                        xfs_iext_irec_remove(ifp, erp_idx + 1);
                        nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
@@ -4596,40 +4532,63 @@ xfs_iext_irec_compact_full(
        int             nlists;                 /* number of irec's (ex lists) */
        ASSERT(ifp->if_flags & XFS_IFEXTIREC);
        nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
        erp = ifp->if_u1.if_ext_irec;
        ep = &erp->er_extbuf[erp->er_extcount];
        erp_next = erp + 1;
        ep_next = erp_next->er_extbuf;
        while (erp_idx < nlists - 1) {
+                /*
+                 * Check how many extent records are available in this irec.
+                 * If there is none skip the whole exercise.
+                 */
                ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
-                ext_diff = MIN(ext_avail, erp_next->er_extcount);
+                if (ext_avail) {
-                memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
-                erp->er_extcount += ext_diff;
-                erp_next->er_extcount -= ext_diff;
-                /* Remove next page */
-                if (erp_next->er_extcount == 0) {
                        /*
-                         * Free page before removing extent record
+                         * Copy over as many as possible extent records into
-                         * so er_extoffs don't get modified in
+                         * the previous page.
-                         * xfs_iext_irec_remove.
                         */
-                        kmem_free(erp_next->er_extbuf,
+                        ext_diff = MIN(ext_avail, erp_next->er_extcount);
-                                erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
+                        memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
-                        erp_next->er_extbuf = NULL;
+                        erp->er_extcount += ext_diff;
-                        xfs_iext_irec_remove(ifp, erp_idx + 1);
+                        erp_next->er_extcount -= ext_diff;
-                        erp = &ifp->if_u1.if_ext_irec[erp_idx];
-                        nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+                        /*
-                /* Update next page */
+                         * If the next irec is empty now we can simply
-                } else {
+                         * remove it.
-                        /* Move rest of page up to become next new page */
+                         */
-                        memmove(erp_next->er_extbuf, ep_next,
+                        if (erp_next->er_extcount == 0) {
-                                erp_next->er_extcount * sizeof(xfs_bmbt_rec_t));
+                                /*
-                        ep_next = erp_next->er_extbuf;
+                                 * Free page before removing extent record
-                        memset(&ep_next[erp_next->er_extcount], 0,
+                                 * so er_extoffs don't get modified in
-                                (XFS_LINEAR_EXTS - erp_next->er_extcount) *
+                                 * xfs_iext_irec_remove.
-                                sizeof(xfs_bmbt_rec_t));
+                                 */
+                                kmem_free(erp_next->er_extbuf);
+                                erp_next->er_extbuf = NULL;
+                                xfs_iext_irec_remove(ifp, erp_idx + 1);
+                                erp = &ifp->if_u1.if_ext_irec[erp_idx];
+                                nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+                        /*
+                         * If the next irec is not empty move up the content
+                         * that has not been copied to the previous page to
+                         * the beggining of this one.
+                         */
+                        } else {
+                                memmove(erp_next->er_extbuf, &ep_next[ext_diff],
+                                        erp_next->er_extcount *
+                                        sizeof(xfs_bmbt_rec_t));
+                                ep_next = erp_next->er_extbuf;
+                                memset(&ep_next[erp_next->er_extcount], 0,
+                                        (XFS_LINEAR_EXTS -
+                                                erp_next->er_extcount) *
+                                        sizeof(xfs_bmbt_rec_t));
+                        }
                }
                if (erp->er_extcount == XFS_LINEAR_EXTS) {
                        erp_idx++;
                        if (erp_idx < nlists)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0a999fee4f03..17a04b6321ed 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -507,9 +507,6 @@ int		xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
 int             xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
                                     xfs_fsize_t, int, int);
 int             xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-int             xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
-void            xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
-                                 xfs_fsize_t, int);
 void            xfs_idestroy_fork(xfs_inode_t *, int);
 void            xfs_idestroy(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 167b33f15772..0eee08a32c26 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -686,7 +686,7 @@ xfs_inode_item_unlock(
                ASSERT(ip->i_d.di_nextents > 0);
                ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
                ASSERT(ip->i_df.if_bytes > 0);
-                kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes);
+                kmem_free(iip->ili_extents_buf);
                iip->ili_extents_buf = NULL;
        }
        if (iip->ili_aextents_buf != NULL) {
@@ -694,7 +694,7 @@ xfs_inode_item_unlock(
                ASSERT(ip->i_d.di_anextents > 0);
                ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
                ASSERT(ip->i_afp->if_bytes > 0);
-                kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes);
+                kmem_free(iip->ili_aextents_buf);
                iip->ili_aextents_buf = NULL;
        }
@@ -957,8 +957,7 @@ xfs_inode_item_destroy(
 {
 #ifdef XFS_TRANS_DEBUG
        if (ip->i_itemp->ili_root_size != 0) {
-                kmem_free(ip->i_itemp->ili_orig_root,
+                kmem_free(ip->i_itemp->ili_orig_root);
-                          ip->i_itemp->ili_root_size);
        }
 #endif
        kmem_zone_free(xfs_ili_zone, ip->i_itemp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7edcde691d1a..67f22b2b44b3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -889,6 +889,16 @@ xfs_iomap_write_unwritten(
        count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
        count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
+        /*
+         * Reserve enough blocks in this transaction for two complete extent
+         * btree splits.  We may be converting the middle part of an unwritten
+         * extent and in this case we will insert two new extents in the btree
+         * each of which could cause a full split.
+         *
+         * This reservation amount will be used in the first call to
+         * xfs_bmbt_split() to select an AG with enough space to satisfy the
+         * rest of the operation.
+         */
        resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
        do {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 419de15aeb43..9a3ef9dcaeb9 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -257,7 +257,7 @@ xfs_bulkstat_one(
                *ubused = error;
 out_free:
-        kmem_free(buf, sizeof(*buf));
+        kmem_free(buf);
        return error;
 }
@@ -708,7 +708,7 @@ xfs_bulkstat(
        /*
         * Done, we're either out of filesystem or space to put the data.
         */
-        kmem_free(irbuf, irbsize);
+        kmem_free(irbuf);
        *ubcountp = ubelem;
        /*
         * Found some inodes, return them now and return the error next time.
@@ -914,7 +914,7 @@ xfs_inumbers(
                }
                *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
        }
-        kmem_free(buffer, bcount * sizeof(*buffer));
+        kmem_free(buffer);
        if (cur)
                xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
                                           XFS_BTREE_NOERROR));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe31..91b00a5686cd 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
 static void
 xlog_grant_add_space_write(struct log *log, int bytes)
 {
-        log->l_grant_write_bytes += bytes;
+        int tmp = log->l_logsize - log->l_grant_write_bytes;
-        if (log->l_grant_write_bytes > log->l_logsize) {
+        if (tmp > bytes)
-                log->l_grant_write_bytes -= log->l_logsize;
+                log->l_grant_write_bytes += bytes;
+        else {
                log->l_grant_write_cycle++;
+                log->l_grant_write_bytes = bytes - tmp;
        }
 }
 static void
 xlog_grant_add_space_reserve(struct log *log, int bytes)
 {
-        log->l_grant_reserve_bytes += bytes;
+        int tmp = log->l_logsize - log->l_grant_reserve_bytes;
-        if (log->l_grant_reserve_bytes > log->l_logsize) {
+        if (tmp > bytes)
-                log->l_grant_reserve_bytes -= log->l_logsize;
+                log->l_grant_reserve_bytes += bytes;
+        else {
                log->l_grant_reserve_cycle++;
+                log->l_grant_reserve_bytes = bytes - tmp;
        }
 }
@@ -1228,7 +1232,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
        spin_lock_init(&log->l_icloglock);
        spin_lock_init(&log->l_grant_lock);
-        initnsema(&log->l_flushsema, 0, "ic-flush");
+        sv_init(&log->l_flush_wait, 0, "flush_wait");
        /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
        ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1570,10 +1574,9 @@ xlog_dealloc_log(xlog_t *log)
                }
 #endif
                next_iclog = iclog->ic_next;
-                kmem_free(iclog, sizeof(xlog_in_core_t));
+                kmem_free(iclog);
                iclog = next_iclog;
        }
-        freesema(&log->l_flushsema);
        spinlock_destroy(&log->l_icloglock);
        spinlock_destroy(&log->l_grant_lock);
@@ -1587,7 +1590,7 @@ xlog_dealloc_log(xlog_t *log)
        }
 #endif
        log->l_mp->m_log = NULL;
-        kmem_free(log, sizeof(xlog_t));
+        kmem_free(log);
 }       /* xlog_dealloc_log */
 /*
@@ -2097,6 +2100,7 @@ xlog_state_do_callback(
        int                funcdidcallbacks; /* flag: function did callbacks */
        int                repeats;     /* for issuing console warnings if
                                         * looping too many times */
+        int                wake = 0;
        spin_lock(&log->l_icloglock);
        first_iclog = iclog = log->l_iclog;
@@ -2278,15 +2282,13 @@ xlog_state_do_callback(
        }
 #endif
-        flushcnt = 0;
+        if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
-        if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
+                wake = 1;
-                flushcnt = log->l_flushcnt;
-                log->l_flushcnt = 0;
-        }
        spin_unlock(&log->l_icloglock);
-        while (flushcnt--)
-                vsema(&log->l_flushsema);
+        if (wake)
-}       /* xlog_state_do_callback */
+                sv_broadcast(&log->l_flush_wait);
+}
 /*
@@ -2384,16 +2386,15 @@ restart:
        }
        iclog = log->l_iclog;
-        if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) {
+        if (iclog->ic_state != XLOG_STATE_ACTIVE) {
-                log->l_flushcnt++;
-                spin_unlock(&log->l_icloglock);
                xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
                XFS_STATS_INC(xs_log_noiclogs);
-                /* Ensure that log writes happen */
-                psema(&log->l_flushsema, PINOD);
+                /* Wait for log writes to have flushed */
+                sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
                goto restart;
        }
-        ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
        head = &iclog->ic_header;
        atomic_inc(&iclog->ic_refcnt);  /* prevents sync */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8952a392b5f3..6245913196b4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -423,10 +423,8 @@ typedef struct log {
        int                     l_logBBsize;    /* size of log in BB chunks */
        /* The following block of fields are changed while holding icloglock */
-        sema_t                  l_flushsema ____cacheline_aligned_in_smp;
+        sv_t                    l_flush_wait ____cacheline_aligned_in_smp;
-                                                /* iclog flushing semaphore */
+                                                /* waiting for iclog flush */
-        int                     l_flushcnt;     /* # of procs waiting on this
-                                                 * sema */
        int                     l_covered_state;/* state of "covering disk
                                                 * log entries" */
        xlog_in_core_t          *l_iclog;       /* head log queue       */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e65ab4af0955..9eb722ec744e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled(
                                        } else {
                                                prevp->bc_next = bcp->bc_next;
                                        }
-                                        kmem_free(bcp,
+                                        kmem_free(bcp);
-                                                  sizeof(xfs_buf_cancel_t));
                                }
                        }
                        return 1;
@@ -2519,7 +2518,7 @@ write_inode_buffer:
 error:
        if (need_free)
-                kmem_free(in_f, sizeof(*in_f));
+                kmem_free(in_f);
        return XFS_ERROR(error);
 }
@@ -2830,16 +2829,14 @@ xlog_recover_free_trans(
                item = item->ri_next;
                 /* Free the regions in the item. */
                for (i = 0; i < free_item->ri_cnt; i++) {
-                        kmem_free(free_item->ri_buf[i].i_addr,
+                        kmem_free(free_item->ri_buf[i].i_addr);
-                                  free_item->ri_buf[i].i_len);
                }
                /* Free the item itself */
-                kmem_free(free_item->ri_buf,
+                kmem_free(free_item->ri_buf);
-                          (free_item->ri_total * sizeof(xfs_log_iovec_t)));
+                kmem_free(free_item);
-                kmem_free(free_item, sizeof(xlog_recover_item_t));
        } while (first_item != item);
        /* Free the transaction recover structure */
-        kmem_free(trans, sizeof(xlog_recover_t));
+        kmem_free(trans);
 }
 STATIC int
@@ -3786,8 +3783,7 @@ xlog_do_log_recovery(
        error = xlog_do_recovery_pass(log, head_blk, tail_blk,
                                      XLOG_RECOVER_PASS1);
        if (error != 0) {
-                kmem_free(log->l_buf_cancel_table,
+                kmem_free(log->l_buf_cancel_table);
-                          XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
                log->l_buf_cancel_table = NULL;
                return error;
        }
@@ -3806,8 +3802,7 @@ xlog_do_log_recovery(
        }
 #endif  /* DEBUG */
-        kmem_free(log->l_buf_cancel_table,
+        kmem_free(log->l_buf_cancel_table);
-                  XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
        log->l_buf_cancel_table = NULL;
        return error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da3988453b71..6c5d1325e7f6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,12 +47,10 @@
 STATIC int      xfs_mount_log_sb(xfs_mount_t *, __int64_t);
 STATIC int      xfs_uuid_mount(xfs_mount_t *);
-STATIC void     xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void     xfs_unmountfs_wait(xfs_mount_t *);
 #ifdef HAVE_PERCPU_SB
-STATIC void     xfs_icsb_destroy_counters(xfs_mount_t *);
 STATIC void     xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
                                                int);
 STATIC void     xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
@@ -63,7 +61,6 @@ STATIC void	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
 #else
-#define xfs_icsb_destroy_counters(mp)                   do { } while (0)
 #define xfs_icsb_balance_counter(mp, a, b)              do { } while (0)
 #define xfs_icsb_balance_counter_locked(mp, a, b)       do { } while (0)
 #define xfs_icsb_modify_counters(mp, a, b, c)           do { } while (0)
@@ -126,33 +123,11 @@ static const struct {
 };
 /*
- * Return a pointer to an initialized xfs_mount structure.
- */
-xfs_mount_t *
-xfs_mount_init(void)
-{
-        xfs_mount_t *mp;
-        mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
-        if (xfs_icsb_init_counters(mp)) {
-                mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
-        }
-        spin_lock_init(&mp->m_sb_lock);
-        mutex_init(&mp->m_ilock);
-        mutex_init(&mp->m_growlock);
-        atomic_set(&mp->m_active_trans, 0);
-        return mp;
-}
-/*
 * Free up the resources associated with a mount structure.  Assume that
 * the structure was initially zeroed, so we can tell which fields got
 * initialized.
 */
-void
+STATIC void
 xfs_mount_free(
        xfs_mount_t     *mp)
 {
@@ -161,11 +136,8 @@ xfs_mount_free(
                for (agno = 0; agno < mp->m_maxagi; agno++)
                        if (mp->m_perag[agno].pagb_list)
-                                kmem_free(mp->m_perag[agno].pagb_list,
+                                kmem_free(mp->m_perag[agno].pagb_list);
-                                                sizeof(xfs_perag_busy_t) *
+                kmem_free(mp->m_perag);
-                                                        XFS_PAGB_NUM_SLOTS);
-                kmem_free(mp->m_perag,
-                          sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
        }
        spinlock_destroy(&mp->m_ail_lock);
@@ -176,13 +148,11 @@ xfs_mount_free(
                XFS_QM_DONE(mp);
        if (mp->m_fsname != NULL)
-                kmem_free(mp->m_fsname, mp->m_fsname_len);
+                kmem_free(mp->m_fsname);
        if (mp->m_rtname != NULL)
-                kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
+                kmem_free(mp->m_rtname);
        if (mp->m_logname != NULL)
-                kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);
+                kmem_free(mp->m_logname);
-        xfs_icsb_destroy_counters(mp);
 }
 /*
@@ -288,6 +258,19 @@ xfs_mount_validate_sb(
                return XFS_ERROR(EFSCORRUPTED);
        }
+        /*
+         * Until this is fixed only page-sized or smaller data blocks work.
+         */
+        if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
+                xfs_fs_mount_cmn_err(flags,
+                        "file system with blocksize %d bytes",
+                        sbp->sb_blocksize);
+                xfs_fs_mount_cmn_err(flags,
+                        "only pagesize (%ld) or less will currently work.",
+                        PAGE_SIZE);
+                return XFS_ERROR(ENOSYS);
+        }
        if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
            xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
                xfs_fs_mount_cmn_err(flags,
@@ -309,19 +292,6 @@ xfs_mount_validate_sb(
                return XFS_ERROR(ENOSYS);
        }
-        /*
-         * Until this is fixed only page-sized or smaller data blocks work.
-         */
-        if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
-                xfs_fs_mount_cmn_err(flags,
-                        "file system with blocksize %d bytes",
-                        sbp->sb_blocksize);
-                xfs_fs_mount_cmn_err(flags,
-                        "only pagesize (%ld) or less will currently work.",
-                        PAGE_SIZE);
-                return XFS_ERROR(ENOSYS);
-        }
        return 0;
 }
@@ -994,9 +964,19 @@ xfs_mountfs(
                 * Re-check for ATTR2 in case it was found in bad_features2
                 * slot.
                 */
-                if (xfs_sb_version_hasattr2(&mp->m_sb))
+                if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+                   !(mp->m_flags & XFS_MOUNT_NOATTR2))
                        mp->m_flags |= XFS_MOUNT_ATTR2;
+        }
+        if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+           (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+                xfs_sb_version_removeattr2(&mp->m_sb);
+                update_flags |= XFS_SB_FEATURES2;
+                /* update sb_versionnum for the clearing of the morebits */
+                if (!sbp->sb_features2)
+                        update_flags |= XFS_SB_VERSIONNUM;
        }
        /*
@@ -1255,15 +1235,13 @@ xfs_mountfs(
 error2:
        for (agno = 0; agno < sbp->sb_agcount; agno++)
                if (mp->m_perag[agno].pagb_list)
-                        kmem_free(mp->m_perag[agno].pagb_list,
+                        kmem_free(mp->m_perag[agno].pagb_list);
-                          sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
+        kmem_free(mp->m_perag);
-        kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
        mp->m_perag = NULL;
        /* FALLTHROUGH */
 error1:
        if (uuid_mounted)
-                xfs_uuid_unmount(mp);
+                uuid_table_remove(&mp->m_sb.sb_uuid);
-        xfs_freesb(mp);
        return error;
 }
@@ -1274,7 +1252,7 @@ xfs_mountfs(
 * log and makes sure that incore structures are freed.
 */
 int
-xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
+xfs_unmountfs(xfs_mount_t *mp)
 {
        __uint64_t      resblks;
        int             error = 0;
@@ -1341,9 +1319,8 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
         */
        ASSERT(mp->m_inodes == NULL);
-        xfs_unmountfs_close(mp, cr);
        if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
-                xfs_uuid_unmount(mp);
+                uuid_table_remove(&mp->m_sb.sb_uuid);
 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
        xfs_errortag_clearall(mp, 0);
@@ -1352,16 +1329,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
        return 0;
 }
-void
-xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
-{
-        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
-                xfs_free_buftarg(mp->m_logdev_targp, 1);
-        if (mp->m_rtdev_targp)
-                xfs_free_buftarg(mp->m_rtdev_targp, 1);
-        xfs_free_buftarg(mp->m_ddev_targp, 0);
-}
 STATIC void
 xfs_unmountfs_wait(xfs_mount_t *mp)
 {
@@ -1905,16 +1872,6 @@ xfs_uuid_mount(
 }
 /*
- * Remove filesystem from the UUID table.
- */
-STATIC void
-xfs_uuid_unmount(
-        xfs_mount_t     *mp)
-{
-        uuid_table_remove(&mp->m_sb.sb_uuid);
-}
-/*
 * Used to log changes to the superblock unit and width fields which could
 * be altered by the mount options, as well as any potential sb_features2
 * fixup. Only the first superblock is updated.
@@ -1928,7 +1885,8 @@ xfs_mount_log_sb(
        int             error;
        ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
-                         XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
+                         XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
+                         XFS_SB_VERSIONNUM));
        tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
        error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
@@ -2109,7 +2067,7 @@ xfs_icsb_reinit_counters(
        xfs_icsb_unlock(mp);
 }
-STATIC void
+void
 xfs_icsb_destroy_counters(
        xfs_mount_t     *mp)
 {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 63e0693a358a..5269bd6e3df0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,6 +61,7 @@ struct xfs_bmap_free;
 struct xfs_extdelta;
 struct xfs_swapext;
 struct xfs_mru_cache;
+struct xfs_nameops;
 /*
 * Prototypes and functions for the Data Migration subsystem.
@@ -210,12 +211,14 @@ typedef struct xfs_icsb_cnts {
 extern int      xfs_icsb_init_counters(struct xfs_mount *);
 extern void     xfs_icsb_reinit_counters(struct xfs_mount *);
+extern void     xfs_icsb_destroy_counters(struct xfs_mount *);
 extern void     xfs_icsb_sync_counters(struct xfs_mount *, int);
 extern void     xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
 #else
-#define xfs_icsb_init_counters(mp)      (0)
+#define xfs_icsb_init_counters(mp)              (0)
-#define xfs_icsb_reinit_counters(mp)    do { } while (0)
+#define xfs_icsb_destroy_counters(mp)           do { } while (0)
+#define xfs_icsb_reinit_counters(mp)            do { } while (0)
 #define xfs_icsb_sync_counters(mp, flags)       do { } while (0)
 #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
 #endif
@@ -313,6 +316,7 @@ typedef struct xfs_mount {
        __uint8_t               m_inode_quiesce;/* call quiesce on new inodes.
                                                   field governed by m_ilock */
        __uint8_t               m_sectbb_log;   /* sectlog - BBSHIFT */
+        const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
        int                     m_dirblksize;   /* directory block sz--bytes */
        int                     m_dirblkfsbs;   /* directory block sz--fsbs */
        xfs_dablk_t             m_dirdatablk;   /* blockno of dir data v2 */
@@ -378,6 +382,7 @@ typedef struct xfs_mount {
                                                   counters */
 #define XFS_MOUNT_FILESTREAMS   (1ULL << 24)    /* enable the filestreams
                                                   allocator */
+#define XFS_MOUNT_NOATTR2       (1ULL << 25)    /* disable use of attr2 format */
 /*
@@ -510,15 +515,12 @@ typedef struct xfs_mod_sb {
 #define XFS_MOUNT_ILOCK(mp)     mutex_lock(&((mp)->m_ilock))
 #define XFS_MOUNT_IUNLOCK(mp)   mutex_unlock(&((mp)->m_ilock))
-extern xfs_mount_t *xfs_mount_init(void);
 extern void     xfs_mod_sb(xfs_trans_t *, __int64_t);
 extern int      xfs_log_sbcount(xfs_mount_t *, uint);
-extern void     xfs_mount_free(xfs_mount_t *mp);
 extern int      xfs_mountfs(xfs_mount_t *mp, int);
 extern void     xfs_mountfs_check_barriers(xfs_mount_t *mp);
-extern int      xfs_unmountfs(xfs_mount_t *, struct cred *);
+extern int      xfs_unmountfs(xfs_mount_t *);
-extern void     xfs_unmountfs_close(xfs_mount_t *, struct cred *);
 extern int      xfs_unmountfs_writesb(xfs_mount_t *);
 extern int      xfs_unmount_flush(xfs_mount_t *, int);
 extern int      xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
@@ -544,9 +546,6 @@ extern void	xfs_qmops_put(struct xfs_mount *);
 extern struct xfs_dmops xfs_dmcore_xfs;
-extern int      xfs_init(void);
-extern void     xfs_cleanup(void);
 #endif  /* __KERNEL__ */
 #endif  /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index a0b2c0a2589a..afee7eb24323 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -307,15 +307,18 @@ xfs_mru_cache_init(void)
        xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
                                         "xfs_mru_cache_elem");
        if (!xfs_mru_elem_zone)
-                return ENOMEM;
+                goto out;
        xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
-        if (!xfs_mru_reap_wq) {
+        if (!xfs_mru_reap_wq)
-                kmem_zone_destroy(xfs_mru_elem_zone);
+                goto out_destroy_mru_elem_zone;
-                return ENOMEM;
-        }
        return 0;
+ out_destroy_mru_elem_zone:
+        kmem_zone_destroy(xfs_mru_elem_zone);
+ out:
+        return -ENOMEM;
 }
 void
@@ -382,9 +385,9 @@ xfs_mru_cache_create(
 exit:
        if (err && mru && mru->lists)
-                kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+                kmem_free(mru->lists);
        if (err && mru)
-                kmem_free(mru, sizeof(*mru));
+                kmem_free(mru);
        return err;
 }
@@ -424,8 +427,8 @@ xfs_mru_cache_destroy(
        xfs_mru_cache_flush(mru);
-        kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
+        kmem_free(mru->lists);
-        kmem_free(mru, sizeof(*mru));
+        kmem_free(mru);
 }
 /*
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d8063e1ad298..d700dacdb10e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -336,22 +336,18 @@ xfs_rename(
                ASSERT(error != EEXIST);
                if (error)
                        goto abort_return;
-                xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-        } else {
-                /*
-                 * We always want to hit the ctime on the source inode.
-                 * We do it in the if clause above for the 'new_parent &&
-                 * src_is_directory' case, and here we get all the other
-                 * cases.  This isn't strictly required by the standards
-                 * since the source inode isn't really being changed,
-                 * but old unix file systems did it and some incremental
-                 * backup programs won't work without it.
-                 */
-                xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
        }
        /*
+         * We always want to hit the ctime on the source inode.
+         *
+         * This isn't strictly required by the standards since the source
+         * inode isn't really being changed, but old unix file systems did
+         * it and some incremental backup programs won't work without it.
+         */
+        xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+        /*
         * Adjust the link count on src_dp.  This is necessary when
         * renaming a directory, either within one parent when
         * the target existed, or across two parent directories.
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a0dc6e5bc5b9..bf87a5913504 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2062,7 +2062,7 @@ xfs_growfs_rt(
        /*
         * Free the fake mp structure.
         */
-        kmem_free(nmp, sizeof(*nmp));
+        kmem_free(nmp);
        return error;
 }
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index d904efe7f871..3f8cf1587f4c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -46,10 +46,12 @@ struct xfs_mount;
 #define XFS_SB_VERSION_SECTORBIT        0x0800
 #define XFS_SB_VERSION_EXTFLGBIT        0x1000
 #define XFS_SB_VERSION_DIRV2BIT         0x2000
+#define XFS_SB_VERSION_BORGBIT          0x4000  /* ASCII only case-insens. */
 #define XFS_SB_VERSION_MOREBITSBIT      0x8000
 #define XFS_SB_VERSION_OKSASHFBITS      \
        (XFS_SB_VERSION_EXTFLGBIT | \
-         XFS_SB_VERSION_DIRV2BIT)
+         XFS_SB_VERSION_DIRV2BIT | \
+         XFS_SB_VERSION_BORGBIT)
 #define XFS_SB_VERSION_OKREALFBITS      \
        (XFS_SB_VERSION_ATTRBIT | \
         XFS_SB_VERSION_NLINKBIT | \
@@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
                ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
 }
+static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
+{
+        return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
+                (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
+}
 static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
 {
        return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -473,6 +481,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
                ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
 }
+static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
+{
+        sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
+        if (!sbp->sb_features2)
+                sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
+}
 /*
 * end of superblock version macros
 */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 140386434aa3..e4ebddd3c500 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -889,7 +889,7 @@ shut_us_down:
        tp->t_commit_lsn = commit_lsn;
        if (nvec > XFS_TRANS_LOGVEC_COUNT) {
-                kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t));
+                kmem_free(log_vector);
        }
        /*
@@ -1265,7 +1265,7 @@ xfs_trans_committed(
                ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
                xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
                next_licp = licp->lic_next;
-                kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+                kmem_free(licp);
                licp = next_licp;
        }
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4c70bf5e9985..2a1c0f071f91 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug(
        iip = ip->i_itemp;
        if (iip->ili_root_size != 0) {
                ASSERT(iip->ili_orig_root != NULL);
-                kmem_free(iip->ili_orig_root, iip->ili_root_size);
+                kmem_free(iip->ili_orig_root);
                iip->ili_root_size = 0;
                iip->ili_orig_root = NULL;
        }
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 66a09f0d894b..db5c83595526 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -161,7 +161,7 @@ xfs_trans_free_item(xfs_trans_t	*tp, xfs_log_item_desc_t *lidp)
                        licpp = &((*licpp)->lic_next);
                }
                *licpp = licp->lic_next;
-                kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+                kmem_free(licp);
                tp->t_items_free -= XFS_LIC_NUM_SLOTS;
        }
 }
@@ -314,7 +314,7 @@ xfs_trans_free_items(
                ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
                (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
                next_licp = licp->lic_next;
-                kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+                kmem_free(licp);
                licp = next_licp;
        }
@@ -363,7 +363,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
                next_licp = licp->lic_next;
                if (XFS_LIC_ARE_ALL_FREE(licp)) {
                        *licpp = next_licp;
-                        kmem_free(licp, sizeof(xfs_log_item_chunk_t));
+                        kmem_free(licp);
                        freed -= XFS_LIC_NUM_SLOTS;
                } else {
                        licpp = &(licp->lic_next);
@@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp)
        lbcp = tp->t_busy.lbc_next;
        while (lbcp != NULL) {
                lbcq = lbcp->lbc_next;
-                kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t));
+                kmem_free(lbcp);
                lbcp = lbcq;
        }
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e5..4a9a43315a86 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -58,586 +58,6 @@
 #include "xfs_utils.h"
-int __init
-xfs_init(void)
-{
-#ifdef XFS_DABUF_DEBUG
-        extern spinlock_t        xfs_dabuf_global_lock;
-        spin_lock_init(&xfs_dabuf_global_lock);
-#endif
-        /*
-         * Initialize all of the zone allocators we use.
-         */
-        xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
-                                                "xfs_log_ticket");
-        xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-                                                "xfs_bmap_free_item");
-        xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
-                                                "xfs_btree_cur");
-        xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
-                                                "xfs_da_state");
-        xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
-        xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-        xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
-        xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
-        xfs_mru_cache_init();
-        xfs_filestream_init();
-        /*
-         * The size of the zone allocated buf log item is the maximum
-         * size possible under XFS.  This wastes a little bit of memory,
-         * but it is much faster.
-         */
-        xfs_buf_item_zone =
-                kmem_zone_init((sizeof(xfs_buf_log_item_t) +
-                                (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
-                                  NBWORD) * sizeof(int))),
-                               "xfs_buf_item");
-        xfs_efd_zone =
-                kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-                               ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
-                                 sizeof(xfs_extent_t))),
-                                      "xfs_efd_item");
-        xfs_efi_zone =
-                kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-                               ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
-                                 sizeof(xfs_extent_t))),
-                                      "xfs_efi_item");
-        /*
-         * These zones warrant special memory allocator hints
-         */
-        xfs_inode_zone =
-                kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
-                                        KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
-                                        KM_ZONE_SPREAD, NULL);
-        xfs_ili_zone =
-                kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
-                                        KM_ZONE_SPREAD, NULL);
-        /*
-         * Allocate global trace buffers.
-         */
-#ifdef XFS_ALLOC_TRACE
-        xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMAP_TRACE
-        xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_BMBT_TRACE
-        xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_ATTR_TRACE
-        xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
-#endif
-#ifdef XFS_DIR2_TRACE
-        xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
-#endif
-        xfs_dir_startup();
-#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
-        xfs_error_test_init();
-#endif /* DEBUG || INDUCE_IO_ERROR */
-        xfs_init_procfs();
-        xfs_sysctl_register();
-        return 0;
-}
-void __exit
-xfs_cleanup(void)
-{
-        extern kmem_zone_t      *xfs_inode_zone;
-        extern kmem_zone_t      *xfs_efd_zone;
-        extern kmem_zone_t      *xfs_efi_zone;
-        xfs_cleanup_procfs();
-        xfs_sysctl_unregister();
-        xfs_filestream_uninit();
-        xfs_mru_cache_uninit();
-        xfs_acl_zone_destroy(xfs_acl_zone);
-#ifdef XFS_DIR2_TRACE
-        ktrace_free(xfs_dir2_trace_buf);
-#endif
-#ifdef XFS_ATTR_TRACE
-        ktrace_free(xfs_attr_trace_buf);
-#endif
-#ifdef XFS_BMBT_TRACE
-        ktrace_free(xfs_bmbt_trace_buf);
-#endif
-#ifdef XFS_BMAP_TRACE
-        ktrace_free(xfs_bmap_trace_buf);
-#endif
-#ifdef XFS_ALLOC_TRACE
-        ktrace_free(xfs_alloc_trace_buf);
-#endif
-        kmem_zone_destroy(xfs_bmap_free_item_zone);
-        kmem_zone_destroy(xfs_btree_cur_zone);
-        kmem_zone_destroy(xfs_inode_zone);
-        kmem_zone_destroy(xfs_trans_zone);
-        kmem_zone_destroy(xfs_da_state_zone);
-        kmem_zone_destroy(xfs_dabuf_zone);
-        kmem_zone_destroy(xfs_buf_item_zone);
-        kmem_zone_destroy(xfs_efd_zone);
-        kmem_zone_destroy(xfs_efi_zone);
-        kmem_zone_destroy(xfs_ifork_zone);
-        kmem_zone_destroy(xfs_ili_zone);
-        kmem_zone_destroy(xfs_log_ticket_zone);
-}
-/*
- * xfs_start_flags
- *
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- */
-STATIC int
-xfs_start_flags(
-        struct xfs_mount_args   *ap,
-        struct xfs_mount        *mp)
-{
-        /* Values are in BBs */
-        if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
-                /*
-                 * At this point the superblock has not been read
-                 * in, therefore we do not know the block size.
-                 * Before the mount call ends we will convert
-                 * these to FSBs.
-                 */
-                mp->m_dalign = ap->sunit;
-                mp->m_swidth = ap->swidth;
-        }
-        if (ap->logbufs != -1 &&
-            ap->logbufs != 0 &&
-            (ap->logbufs < XLOG_MIN_ICLOGS ||
-             ap->logbufs > XLOG_MAX_ICLOGS)) {
-                cmn_err(CE_WARN,
-                        "XFS: invalid logbufs value: %d [not %d-%d]",
-                        ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-                return XFS_ERROR(EINVAL);
-        }
-        mp->m_logbufs = ap->logbufs;
-        if (ap->logbufsize != -1 &&
-            ap->logbufsize !=  0 &&
-            (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
-             ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
-             !is_power_of_2(ap->logbufsize))) {
-                cmn_err(CE_WARN,
-        "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-                        ap->logbufsize);
-                return XFS_ERROR(EINVAL);
-        }
-        mp->m_logbsize = ap->logbufsize;
-        mp->m_fsname_len = strlen(ap->fsname) + 1;
-        mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
-        strcpy(mp->m_fsname, ap->fsname);
-        if (ap->rtname[0]) {
-                mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
-                strcpy(mp->m_rtname, ap->rtname);
-        }
-        if (ap->logname[0]) {
-                mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
-                strcpy(mp->m_logname, ap->logname);
-        }
-        if (ap->flags & XFSMNT_WSYNC)
-                mp->m_flags |= XFS_MOUNT_WSYNC;
-#if XFS_BIG_INUMS
-        if (ap->flags & XFSMNT_INO64) {
-                mp->m_flags |= XFS_MOUNT_INO64;
-                mp->m_inoadd = XFS_INO64_OFFSET;
-        }
-#endif
-        if (ap->flags & XFSMNT_RETERR)
-                mp->m_flags |= XFS_MOUNT_RETERR;
-        if (ap->flags & XFSMNT_NOALIGN)
-                mp->m_flags |= XFS_MOUNT_NOALIGN;
-        if (ap->flags & XFSMNT_SWALLOC)
-                mp->m_flags |= XFS_MOUNT_SWALLOC;
-        if (ap->flags & XFSMNT_OSYNCISOSYNC)
-                mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
-        if (ap->flags & XFSMNT_32BITINODES)
-                mp->m_flags |= XFS_MOUNT_32BITINODES;
-        if (ap->flags & XFSMNT_IOSIZE) {
-                if (ap->iosizelog > XFS_MAX_IO_LOG ||
-                    ap->iosizelog < XFS_MIN_IO_LOG) {
-                        cmn_err(CE_WARN,
-                "XFS: invalid log iosize: %d [not %d-%d]",
-                                ap->iosizelog, XFS_MIN_IO_LOG,
-                                XFS_MAX_IO_LOG);
-                        return XFS_ERROR(EINVAL);
-                }
-                mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-                mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
-        }
-        if (ap->flags & XFSMNT_IKEEP)
-                mp->m_flags |= XFS_MOUNT_IKEEP;
-        if (ap->flags & XFSMNT_DIRSYNC)
-                mp->m_flags |= XFS_MOUNT_DIRSYNC;
-        if (ap->flags & XFSMNT_ATTR2)
-                mp->m_flags |= XFS_MOUNT_ATTR2;
-        if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
-                mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-        /*
-         * no recovery flag requires a read-only mount
-         */
-        if (ap->flags & XFSMNT_NORECOVERY) {
-                if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-                        cmn_err(CE_WARN,
-        "XFS: tried to mount a FS read-write without recovery!");
-                        return XFS_ERROR(EINVAL);
-                }
-                mp->m_flags |= XFS_MOUNT_NORECOVERY;
-        }
-        if (ap->flags & XFSMNT_NOUUID)
-                mp->m_flags |= XFS_MOUNT_NOUUID;
-        if (ap->flags & XFSMNT_BARRIER)
-                mp->m_flags |= XFS_MOUNT_BARRIER;
-        else
-                mp->m_flags &= ~XFS_MOUNT_BARRIER;
-        if (ap->flags2 & XFSMNT2_FILESTREAMS)
-                mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-        if (ap->flags & XFSMNT_DMAPI)
-                mp->m_flags |= XFS_MOUNT_DMAPI;
-        return 0;
-}
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
-        struct xfs_mount_args   *ap,
-        struct xfs_mount        *mp)
-{
-        int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-        /* Fail a mount where the logbuf is smaller then the log stripe */
-        if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-                if ((ap->logbufsize <= 0) &&
-                    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
-                        mp->m_logbsize = mp->m_sb.sb_logsunit;
-                } else if (ap->logbufsize > 0 &&
-                           ap->logbufsize < mp->m_sb.sb_logsunit) {
-                        cmn_err(CE_WARN,
-        "XFS: logbuf size must be greater than or equal to log stripe size");
-                        return XFS_ERROR(EINVAL);
-                }
-        } else {
-                /* Fail a mount if the logbuf is larger than 32K */
-                if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
-                        cmn_err(CE_WARN,
-        "XFS: logbuf size for version 1 logs must be 16K or 32K");
-                        return XFS_ERROR(EINVAL);
-                }
-        }
-        if (xfs_sb_version_hasattr2(&mp->m_sb))
-                mp->m_flags |= XFS_MOUNT_ATTR2;
-        /*
-         * prohibit r/w mounts of read-only filesystems
-         */
-        if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
-                cmn_err(CE_WARN,
-        "XFS: cannot mount a read-only filesystem as read-write");
-                return XFS_ERROR(EROFS);
-        }
-        /*
-         * check for shared mount.
-         */
-        if (ap->flags & XFSMNT_SHARED) {
-                if (!xfs_sb_version_hasshared(&mp->m_sb))
-                        return XFS_ERROR(EINVAL);
-                /*
-                 * For IRIX 6.5, shared mounts must have the shared
-                 * version bit set, have the persistent readonly
-                 * field set, must be version 0 and can only be mounted
-                 * read-only.
-                 */
-                if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
-                     (mp->m_sb.sb_shared_vn != 0))
-                        return XFS_ERROR(EINVAL);
-                mp->m_flags |= XFS_MOUNT_SHARED;
-                /*
-                 * Shared XFS V0 can't deal with DMI.  Return EINVAL.
-                 */
-                if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
-                        return XFS_ERROR(EINVAL);
-        }
-        if (ap->flags & XFSMNT_UQUOTA) {
-                mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-                if (ap->flags & XFSMNT_UQUOTAENF)
-                        mp->m_qflags |= XFS_UQUOTA_ENFD;
-        }
-        if (ap->flags & XFSMNT_GQUOTA) {
-                mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-                if (ap->flags & XFSMNT_GQUOTAENF)
-                        mp->m_qflags |= XFS_OQUOTA_ENFD;
-        } else if (ap->flags & XFSMNT_PQUOTA) {
-                mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-                if (ap->flags & XFSMNT_PQUOTAENF)
-                        mp->m_qflags |= XFS_OQUOTA_ENFD;
-        }
-        return 0;
-}
-/*
- * xfs_mount
- *
- * The file system configurations are:
- *      (1) device (partition) with data and internal log
- *      (2) logical volume with data and log subvolumes.
- *      (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
- */
-int
-xfs_mount(
-        struct xfs_mount        *mp,
-        struct xfs_mount_args   *args,
-        cred_t                  *credp)
-{
-        struct block_device     *ddev, *logdev, *rtdev;
-        int                     flags = 0, error;
-        ddev = mp->m_super->s_bdev;
-        logdev = rtdev = NULL;
-        error = xfs_dmops_get(mp, args);
-        if (error)
-                return error;
-        error = xfs_qmops_get(mp, args);
-        if (error)
-                return error;
-        if (args->flags & XFSMNT_QUIET)
-                flags |= XFS_MFSI_QUIET;
-        /*
-         * Open real time and log devices - order is important.
-         */
-        if (args->logname[0]) {
-                error = xfs_blkdev_get(mp, args->logname, &logdev);
-                if (error)
-                        return error;
-        }
-        if (args->rtname[0]) {
-                error = xfs_blkdev_get(mp, args->rtname, &rtdev);
-                if (error) {
-                        xfs_blkdev_put(logdev);
-                        return error;
-                }
-                if (rtdev == ddev || rtdev == logdev) {
-                        cmn_err(CE_WARN,
-        "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
-                        xfs_blkdev_put(logdev);
-                        xfs_blkdev_put(rtdev);
-                        return EINVAL;
-                }
-        }
-        /*
-         * Setup xfs_mount buffer target pointers
-         */
-        error = ENOMEM;
-        mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
-        if (!mp->m_ddev_targp) {
-                xfs_blkdev_put(logdev);
-                xfs_blkdev_put(rtdev);
-                return error;
-        }
-        if (rtdev) {
-                mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
-                if (!mp->m_rtdev_targp) {
-                        xfs_blkdev_put(logdev);
-                        xfs_blkdev_put(rtdev);
-                        goto error0;
-                }
-        }
-        mp->m_logdev_targp = (logdev && logdev != ddev) ?
-                                xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
-        if (!mp->m_logdev_targp) {
-                xfs_blkdev_put(logdev);
-                xfs_blkdev_put(rtdev);
-                goto error0;
-        }
-        /*
-         * Setup flags based on mount(2) options and then the superblock
-         */
-        error = xfs_start_flags(args, mp);
-        if (error)
-                goto error1;
-        error = xfs_readsb(mp, flags);
-        if (error)
-                goto error1;
-        error = xfs_finish_flags(args, mp);
-        if (error)
-                goto error2;
-        /*
-         * Setup xfs_mount buffer target pointers based on superblock
-         */
-        error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-                                    mp->m_sb.sb_sectsize);
-        if (!error && logdev && logdev != ddev) {
-                unsigned int    log_sector_size = BBSIZE;
-                if (xfs_sb_version_hassector(&mp->m_sb))
-                        log_sector_size = mp->m_sb.sb_logsectsize;
-                error = xfs_setsize_buftarg(mp->m_logdev_targp,
-                                            mp->m_sb.sb_blocksize,
-                                            log_sector_size);
-        }
-        if (!error && rtdev)
-                error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-                                            mp->m_sb.sb_blocksize,
-                                            mp->m_sb.sb_sectsize);
-        if (error)
-                goto error2;
-        if (mp->m_flags & XFS_MOUNT_BARRIER)
-                xfs_mountfs_check_barriers(mp);
-        if ((error = xfs_filestream_mount(mp)))
-                goto error2;
-        error = xfs_mountfs(mp, flags);
-        if (error)
-                goto error2;
-        XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
-        return 0;
-error2:
-        if (mp->m_sb_bp)
-                xfs_freesb(mp);
-error1:
-        xfs_binval(mp->m_ddev_targp);
-        if (logdev && logdev != ddev)
-                xfs_binval(mp->m_logdev_targp);
-        if (rtdev)
-                xfs_binval(mp->m_rtdev_targp);
-error0:
-        xfs_unmountfs_close(mp, credp);
-        xfs_qmops_put(mp);
-        xfs_dmops_put(mp);
-        return error;
-}
-int
-xfs_unmount(
-        xfs_mount_t     *mp,
-        int             flags,
-        cred_t          *credp)
-{
-        xfs_inode_t     *rip;
-        bhv_vnode_t     *rvp;
-        int             unmount_event_wanted = 0;
-        int             unmount_event_flags = 0;
-        int             xfs_unmountfs_needed = 0;
-        int             error;
-        rip = mp->m_rootip;
-        rvp = XFS_ITOV(rip);
-#ifdef HAVE_DMAPI
-        if (mp->m_flags & XFS_MOUNT_DMAPI) {
-                error = XFS_SEND_PREUNMOUNT(mp,
-                                rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
-                                NULL, NULL, 0, 0,
-                                (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
-                                        0:DM_FLAGS_UNWANTED);
-                        if (error)
-                                return XFS_ERROR(error);
-                unmount_event_wanted = 1;
-                unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
-                                        0 : DM_FLAGS_UNWANTED;
-        }
-#endif
-        /*
-         * Blow away any referenced inode in the filestreams cache.
-         * This can and will cause log traffic as inodes go inactive
-         * here.
-         */
-        xfs_filestream_unmount(mp);
-        XFS_bflush(mp->m_ddev_targp);
-        error = xfs_unmount_flush(mp, 0);
-        if (error)
-                goto out;
-        ASSERT(vn_count(rvp) == 1);
-        /*
-         * Drop the reference count
-         */
-        IRELE(rip);
-        /*
-         * If we're forcing a shutdown, typically because of a media error,
-         * we want to make sure we invalidate dirty pages that belong to
-         * referenced vnodes as well.
-         */
-        if (XFS_FORCED_SHUTDOWN(mp)) {
-                error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
-                ASSERT(error != EFSCORRUPTED);
-        }
-        xfs_unmountfs_needed = 1;
-out:
-        /*      Send DMAPI event, if required.
-         *      Then do xfs_unmountfs() if needed.
-         *      Then return error (or zero).
-         */
-        if (unmount_event_wanted) {
-                /* Note: mp structure must still exist for
-                 * XFS_SEND_UNMOUNT() call.
-                 */
-                XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
-                        DM_RIGHT_NULL, 0, error, unmount_event_flags);
-        }
-        if (xfs_unmountfs_needed) {
-                /*
-                 * Call common unmount function to flush to disk
-                 * and free the super block buffer & mount structures.
-                 */
-                xfs_unmountfs(mp, credp);
-                xfs_qmops_put(mp);
-                xfs_dmops_put(mp);
-                kmem_free(mp, sizeof(xfs_mount_t));
-        }
-        return XFS_ERROR(error);
-}
 STATIC void
 xfs_quiesce_fs(
        xfs_mount_t             *mp)
@@ -694,30 +114,6 @@ xfs_attr_quiesce(
        xfs_unmountfs_writesb(mp);
 }
-int
-xfs_mntupdate(
-        struct xfs_mount                *mp,
-        int                             *flags,
-        struct xfs_mount_args           *args)
-{
-        if (!(*flags & MS_RDONLY)) {                    /* rw/ro -> rw */
-                if (mp->m_flags & XFS_MOUNT_RDONLY)
-                        mp->m_flags &= ~XFS_MOUNT_RDONLY;
-                if (args->flags & XFSMNT_BARRIER) {
-                        mp->m_flags |= XFS_MOUNT_BARRIER;
-                        xfs_mountfs_check_barriers(mp);
-                } else {
-                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                }
-        } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */
-                xfs_filestream_flush(mp);
-                xfs_sync(mp, SYNC_DATA_QUIESCE);
-                xfs_attr_quiesce(mp);
-                mp->m_flags |= XFS_MOUNT_RDONLY;
-        }
-        return 0;
-}
 /*
 * xfs_unmount_flush implements a set of flush operation on special
 * inodes, which are needed as a separate set of operations so that
@@ -1048,7 +444,7 @@ xfs_sync_inodes(
                if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
                        XFS_MOUNT_IUNLOCK(mp);
-                        kmem_free(ipointer, sizeof(xfs_iptr_t));
+                        kmem_free(ipointer);
                        return 0;
                }
@@ -1194,7 +590,7 @@ xfs_sync_inodes(
                        }
                        XFS_MOUNT_IUNLOCK(mp);
                        ASSERT(ipointer_in == B_FALSE);
-                        kmem_free(ipointer, sizeof(xfs_iptr_t));
+                        kmem_free(ipointer);
                        return XFS_ERROR(error);
                }
@@ -1224,7 +620,7 @@ xfs_sync_inodes(
        ASSERT(ipointer_in == B_FALSE);
-        kmem_free(ipointer, sizeof(xfs_iptr_t));
+        kmem_free(ipointer);
        return XFS_ERROR(last_error);
 }
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 1688817c55ed..a74b05087da4 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,11 +8,6 @@ struct kstatfs;
 struct xfs_mount;
 struct xfs_mount_args;
-int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
-                struct cred *credp);
-int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
-int xfs_mntupdate(struct xfs_mount *mp, int *flags,
-                struct xfs_mount_args *args);
 int xfs_sync(struct xfs_mount *mp, int flags);
 void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
                int lnnum);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e475e3717eb3..76a1166af822 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -75,26 +75,23 @@ xfs_open(
        return 0;
 }
-/*
- * xfs_setattr
- */
 int
 xfs_setattr(
-        xfs_inode_t             *ip,
+        struct xfs_inode        *ip,
-        bhv_vattr_t             *vap,
+        struct iattr            *iattr,
        int                     flags,
        cred_t                  *credp)
 {
        xfs_mount_t             *mp = ip->i_mount;
+        struct inode            *inode = XFS_ITOV(ip);
+        int                     mask = iattr->ia_valid;
        xfs_trans_t             *tp;
-        int                     mask;
        int                     code;
        uint                    lock_flags;
        uint                    commit_flags=0;
        uid_t                   uid=0, iuid=0;
        gid_t                   gid=0, igid=0;
        int                     timeflags = 0;
-        xfs_prid_t              projid=0, iprojid=0;
        struct xfs_dquot        *udqp, *gdqp, *olddquot1, *olddquot2;
        int                     file_owner;
        int                     need_iolock = 1;
@@ -104,30 +101,9 @@ xfs_setattr(
        if (mp->m_flags & XFS_MOUNT_RDONLY)
                return XFS_ERROR(EROFS);
-        /*
-         * Cannot set certain attributes.
-         */
-        mask = vap->va_mask;
-        if (mask & XFS_AT_NOSET) {
-                return XFS_ERROR(EINVAL);
-        }
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        /*
-         * Timestamps do not need to be logged and hence do not
-         * need to be done within a transaction.
-         */
-        if (mask & XFS_AT_UPDTIMES) {
-                ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
-                timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
-                            ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
-                            ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
-                xfs_ichgtime(ip, timeflags);
-                return 0;
-        }
        olddquot1 = olddquot2 = NULL;
        udqp = gdqp = NULL;
@@ -139,28 +115,22 @@ xfs_setattr(
         * If the IDs do change before we take the ilock, we're covered
         * because the i_*dquot fields will get updated anyway.
         */
-        if (XFS_IS_QUOTA_ON(mp) &&
+        if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
-            (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
                uint    qflags = 0;
-                if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) {
+                if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
-                        uid = vap->va_uid;
+                        uid = iattr->ia_uid;
                        qflags |= XFS_QMOPT_UQUOTA;
                } else {
                        uid = ip->i_d.di_uid;
                }
-                if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) {
+                if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
-                        gid = vap->va_gid;
+                        gid = iattr->ia_gid;
                        qflags |= XFS_QMOPT_GQUOTA;
                }  else {
                        gid = ip->i_d.di_gid;
                }
-                if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) {
-                        projid = vap->va_projid;
-                        qflags |= XFS_QMOPT_PQUOTA;
-                }  else {
-                        projid = ip->i_d.di_projid;
-                }
                /*
                 * We take a reference when we initialize udqp and gdqp,
                 * so it is important that we never blindly double trip on
@@ -168,8 +138,8 @@ xfs_setattr(
                 */
                ASSERT(udqp == NULL);
                ASSERT(gdqp == NULL);
-                code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags,
+                code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
-                                         &udqp, &gdqp);
+                                         qflags, &udqp, &gdqp);
                if (code)
                        return code;
        }
@@ -180,10 +150,10 @@ xfs_setattr(
         */
        tp = NULL;
        lock_flags = XFS_ILOCK_EXCL;
-        if (flags & ATTR_NOLOCK)
+        if (flags & XFS_ATTR_NOLOCK)
                need_iolock = 0;
-        if (!(mask & XFS_AT_SIZE)) {
+        if (!(mask & ATTR_SIZE)) {
-                if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
+                if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
                    (mp->m_flags & XFS_MOUNT_WSYNC)) {
                        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
                        commit_flags = 0;
@@ -196,10 +166,10 @@ xfs_setattr(
                }
        } else {
                if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
-                    !(flags & ATTR_DMI)) {
+                    !(flags & XFS_ATTR_DMI)) {
                        int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
                        code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
-                                vap->va_size, 0, dmflags, NULL);
+                                iattr->ia_size, 0, dmflags, NULL);
                        if (code) {
                                lock_flags = 0;
                                goto error_return;
@@ -219,9 +189,7 @@ xfs_setattr(
         * Only the owner or users with CAP_FOWNER
         * capability may do these things.
         */
-        if (mask &
+        if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
-            (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
-             XFS_AT_GID|XFS_AT_PROJID)) {
                /*
                 * CAP_FOWNER overrides the following restrictions:
                 *
@@ -245,21 +213,21 @@ xfs_setattr(
                 * IDs of the calling process shall match the group owner of
                 * the file when setting the set-group-ID bit on that file
                 */
-                if (mask & XFS_AT_MODE) {
+                if (mask & ATTR_MODE) {
                        mode_t m = 0;
-                        if ((vap->va_mode & S_ISUID) && !file_owner)
+                        if ((iattr->ia_mode & S_ISUID) && !file_owner)
                                m |= S_ISUID;
-                        if ((vap->va_mode & S_ISGID) &&
+                        if ((iattr->ia_mode & S_ISGID) &&
                            !in_group_p((gid_t)ip->i_d.di_gid))
                                m |= S_ISGID;
 #if 0
                        /* Linux allows this, Irix doesn't. */
-                        if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
+                        if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
                                m |= S_ISVTX;
 #endif
                        if (m && !capable(CAP_FSETID))
-                                vap->va_mode &= ~m;
+                                iattr->ia_mode &= ~m;
                }
        }
@@ -270,7 +238,7 @@ xfs_setattr(
         * and can change the group id only to a group of which he
         * or she is a member.
         */
-        if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+        if (mask & (ATTR_UID|ATTR_GID)) {
                /*
                 * These IDs could have changed since we last looked at them.
                 * But, we're assured that if the ownership did change
@@ -278,12 +246,9 @@ xfs_setattr(
                 * would have changed also.
                 */
                iuid = ip->i_d.di_uid;
-                iprojid = ip->i_d.di_projid;
                igid = ip->i_d.di_gid;
-                gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
+                gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
-                uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
+                uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-                projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
-                         iprojid;
                /*
                 * CAP_CHOWN overrides the following restrictions:
@@ -303,11 +268,10 @@ xfs_setattr(
                        goto error_return;
                }
                /*
-                 * Do a quota reservation only if uid/projid/gid is actually
+                 * Do a quota reservation only if uid/gid is actually
                 * going to change.
                 */
                if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-                    (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
                    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
                        ASSERT(tp);
                        code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -321,13 +285,13 @@ xfs_setattr(
        /*
         * Truncate file.  Must have write permission and not be a directory.
         */
-        if (mask & XFS_AT_SIZE) {
+        if (mask & ATTR_SIZE) {
                /* Short circuit the truncate case for zero length files */
-                if ((vap->va_size == 0) &&
+                if (iattr->ia_size == 0 &&
-                   (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
+                    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                        lock_flags &= ~XFS_ILOCK_EXCL;
-                        if (mask & XFS_AT_CTIME)
+                        if (mask & ATTR_CTIME)
                                xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
                        code = 0;
                        goto error_return;
@@ -350,9 +314,9 @@ xfs_setattr(
        /*
         * Change file access or modified times.
         */
-        if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+        if (mask & (ATTR_ATIME|ATTR_MTIME)) {
                if (!file_owner) {
-                        if ((flags & ATTR_UTIME) &&
+                        if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
                            !capable(CAP_FOWNER)) {
                                code = XFS_ERROR(EPERM);
                                goto error_return;
@@ -361,90 +325,23 @@ xfs_setattr(
        }
        /*
-         * Change extent size or realtime flag.
-         */
-        if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
-                /*
-                 * Can't change extent size if any extents are allocated.
-                 */
-                if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
-                    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-                     vap->va_extsize) ) {
-                        code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                        goto error_return;
-                }
-                /*
-                 * Can't change realtime flag if any extents are allocated.
-                 */
-                if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-                    (mask & XFS_AT_XFLAGS) &&
-                    (XFS_IS_REALTIME_INODE(ip)) !=
-                    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
-                        code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                        goto error_return;
-                }
-                /*
-                 * Extent size must be a multiple of the appropriate block
-                 * size, if set at all.
-                 */
-                if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
-                        xfs_extlen_t    size;
-                        if (XFS_IS_REALTIME_INODE(ip) ||
-                            ((mask & XFS_AT_XFLAGS) &&
-                            (vap->va_xflags & XFS_XFLAG_REALTIME))) {
-                                size = mp->m_sb.sb_rextsize <<
-                                       mp->m_sb.sb_blocklog;
-                        } else {
-                                size = mp->m_sb.sb_blocksize;
-                        }
-                        if (vap->va_extsize % size) {
-                                code = XFS_ERROR(EINVAL);
-                                goto error_return;
-                        }
-                }
-                /*
-                 * If realtime flag is set then must have realtime data.
-                 */
-                if ((mask & XFS_AT_XFLAGS) &&
-                    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
-                        if ((mp->m_sb.sb_rblocks == 0) ||
-                            (mp->m_sb.sb_rextsize == 0) ||
-                            (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
-                                code = XFS_ERROR(EINVAL);
-                                goto error_return;
-                        }
-                }
-                /*
-                 * Can't modify an immutable/append-only file unless
-                 * we have appropriate permission.
-                 */
-                if ((mask & XFS_AT_XFLAGS) &&
-                    (ip->i_d.di_flags &
-                                (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
-                     (vap->va_xflags &
-                                (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
-                    !capable(CAP_LINUX_IMMUTABLE)) {
-                        code = XFS_ERROR(EPERM);
-                        goto error_return;
-                }
-        }
-        /*
         * Now we can make the changes.  Before we join the inode
-         * to the transaction, if XFS_AT_SIZE is set then take care of
+         * to the transaction, if ATTR_SIZE is set then take care of
         * the part of the truncation that must be done without the
         * inode lock.  This needs to be done before joining the inode
         * to the transaction, because the inode cannot be unlocked
         * once it is a part of the transaction.
         */
-        if (mask & XFS_AT_SIZE) {
+        if (mask & ATTR_SIZE) {
                code = 0;
-                if ((vap->va_size > ip->i_size) &&
+                if (iattr->ia_size > ip->i_size) {
-                    (flags & ATTR_NOSIZETOK) == 0) {
+                        /*
-                        code = xfs_igrow_start(ip, vap->va_size, credp);
+                         * Do the first part of growing a file: zero any data
+                         * in the last block that is beyond the old EOF.  We
+                         * need to do this before the inode is joined to the
+                         * transaction to modify the i_size.
+                         */
+                        code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
                }
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -461,10 +358,10 @@ xfs_setattr(
                 * not within the range we care about here.
                 */
                if (!code &&
-                    (ip->i_size != ip->i_d.di_size) &&
+                    ip->i_size != ip->i_d.di_size &&
-                    (vap->va_size > ip->i_d.di_size)) {
+                    iattr->ia_size > ip->i_d.di_size) {
                        code = xfs_flush_pages(ip,
-                                        ip->i_d.di_size, vap->va_size,
+                                        ip->i_d.di_size, iattr->ia_size,
                                        XFS_B_ASYNC, FI_NONE);
                }
@@ -472,7 +369,7 @@ xfs_setattr(
                vn_iowait(ip);
                if (!code)
-                        code = xfs_itruncate_data(ip, vap->va_size);
+                        code = xfs_itruncate_data(ip, iattr->ia_size);
                if (code) {
                        ASSERT(tp == NULL);
                        lock_flags &= ~XFS_ILOCK_EXCL;
@@ -501,28 +398,30 @@ xfs_setattr(
        /*
         * Truncate file.  Must have write permission and not be a directory.
         */
-        if (mask & XFS_AT_SIZE) {
+        if (mask & ATTR_SIZE) {
                /*
                 * Only change the c/mtime if we are changing the size
                 * or we are explicitly asked to change it. This handles
                 * the semantic difference between truncate() and ftruncate()
                 * as implemented in the VFS.
                 */
-                if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
+                if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
                        timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
-                if (vap->va_size > ip->i_size) {
+                if (iattr->ia_size > ip->i_size) {
-                        xfs_igrow_finish(tp, ip, vap->va_size,
+                        ip->i_d.di_size = iattr->ia_size;
-                            !(flags & ATTR_DMI));
+                        ip->i_size = iattr->ia_size;
-                } else if ((vap->va_size <= ip->i_size) ||
+                        if (!(flags & XFS_ATTR_DMI))
-                           ((vap->va_size == 0) && ip->i_d.di_nextents)) {
+                                xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+                        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+                } else if (iattr->ia_size <= ip->i_size ||
+                           (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
                        /*
                         * signal a sync transaction unless
                         * we're truncating an already unlinked
                         * file on a wsync filesystem
                         */
-                        code = xfs_itruncate_finish(&tp, ip,
+                        code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
-                                            (xfs_fsize_t)vap->va_size,
                                            XFS_DATA_FORK,
                                            ((ip->i_d.di_nlink != 0 ||
                                              !(mp->m_flags & XFS_MOUNT_WSYNC))
@@ -544,9 +443,12 @@ xfs_setattr(
        /*
         * Change file access modes.
         */
-        if (mask & XFS_AT_MODE) {
+        if (mask & ATTR_MODE) {
                ip->i_d.di_mode &= S_IFMT;
-                ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
+                ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
+                inode->i_mode &= S_IFMT;
+                inode->i_mode |= iattr->ia_mode & ~S_IFMT;
                xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
                timeflags |= XFS_ICHGTIME_CHG;
@@ -559,7 +461,7 @@ xfs_setattr(
         * and can change the group id only to a group of which he
         * or she is a member.
         */
-        if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+        if (mask & (ATTR_UID|ATTR_GID)) {
                /*
                 * CAP_FSETID overrides the following restrictions:
                 *
@@ -577,39 +479,24 @@ xfs_setattr(
                 */
                if (iuid != uid) {
                        if (XFS_IS_UQUOTA_ON(mp)) {
-                                ASSERT(mask & XFS_AT_UID);
+                                ASSERT(mask & ATTR_UID);
                                ASSERT(udqp);
                                olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
                                                        &ip->i_udquot, udqp);
                        }
                        ip->i_d.di_uid = uid;
+                        inode->i_uid = uid;
                }
                if (igid != gid) {
                        if (XFS_IS_GQUOTA_ON(mp)) {
                                ASSERT(!XFS_IS_PQUOTA_ON(mp));
-                                ASSERT(mask & XFS_AT_GID);
+                                ASSERT(mask & ATTR_GID);
                                ASSERT(gdqp);
                                olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
                                                        &ip->i_gdquot, gdqp);
                        }
                        ip->i_d.di_gid = gid;
-                }
+                        inode->i_gid = gid;
-                if (iprojid != projid) {
-                        if (XFS_IS_PQUOTA_ON(mp)) {
-                                ASSERT(!XFS_IS_GQUOTA_ON(mp));
-                                ASSERT(mask & XFS_AT_PROJID);
-                                ASSERT(gdqp);
-                                olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
-                                                        &ip->i_gdquot, gdqp);
-                        }
-                        ip->i_d.di_projid = projid;
-                        /*
-                         * We may have to rev the inode as well as
-                         * the superblock version number since projids didn't
-                         * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
-                         */
-                        if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
-                                xfs_bump_ino_vers2(tp, ip);
                }
                xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
@@ -620,82 +507,34 @@ xfs_setattr(
        /*
         * Change file access or modified times.
         */
-        if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+        if (mask & (ATTR_ATIME|ATTR_MTIME)) {
-                if (mask & XFS_AT_ATIME) {
+                if (mask & ATTR_ATIME) {
-                        ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
+                        inode->i_atime = iattr->ia_atime;
-                        ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
+                        ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+                        ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
                        ip->i_update_core = 1;
                        timeflags &= ~XFS_ICHGTIME_ACC;
                }
-                if (mask & XFS_AT_MTIME) {
+                if (mask & ATTR_MTIME) {
-                        ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
+                        inode->i_mtime = iattr->ia_mtime;
-                        ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
+                        ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+                        ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
                        timeflags &= ~XFS_ICHGTIME_MOD;
                        timeflags |= XFS_ICHGTIME_CHG;
                }
-                if (tp && (flags & ATTR_UTIME))
+                if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
                        xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
        }
        /*
-         * Change XFS-added attributes.
+         * Change file inode change time only if ATTR_CTIME set
-         */
-        if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
-                if (mask & XFS_AT_EXTSIZE) {
-                        /*
-                         * Converting bytes to fs blocks.
-                         */
-                        ip->i_d.di_extsize = vap->va_extsize >>
-                                mp->m_sb.sb_blocklog;
-                }
-                if (mask & XFS_AT_XFLAGS) {
-                        uint    di_flags;
-                        /* can't set PREALLOC this way, just preserve it */
-                        di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-                        if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
-                                di_flags |= XFS_DIFLAG_IMMUTABLE;
-                        if (vap->va_xflags & XFS_XFLAG_APPEND)
-                                di_flags |= XFS_DIFLAG_APPEND;
-                        if (vap->va_xflags & XFS_XFLAG_SYNC)
-                                di_flags |= XFS_DIFLAG_SYNC;
-                        if (vap->va_xflags & XFS_XFLAG_NOATIME)
-                                di_flags |= XFS_DIFLAG_NOATIME;
-                        if (vap->va_xflags & XFS_XFLAG_NODUMP)
-                                di_flags |= XFS_DIFLAG_NODUMP;
-                        if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
-                                di_flags |= XFS_DIFLAG_PROJINHERIT;
-                        if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
-                                di_flags |= XFS_DIFLAG_NODEFRAG;
-                        if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
-                                di_flags |= XFS_DIFLAG_FILESTREAM;
-                        if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
-                                if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
-                                        di_flags |= XFS_DIFLAG_RTINHERIT;
-                                if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
-                                        di_flags |= XFS_DIFLAG_NOSYMLINKS;
-                                if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
-                                        di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-                        } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
-                                if (vap->va_xflags & XFS_XFLAG_REALTIME)
-                                        di_flags |= XFS_DIFLAG_REALTIME;
-                                if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
-                                        di_flags |= XFS_DIFLAG_EXTSIZE;
-                        }
-                        ip->i_d.di_flags = di_flags;
-                }
-                xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-                timeflags |= XFS_ICHGTIME_CHG;
-        }
-        /*
-         * Change file inode change time only if XFS_AT_CTIME set
         * AND we have been called by a DMI function.
         */
-        if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
+        if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
-                ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
+                inode->i_ctime = iattr->ia_ctime;
-                ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
+                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
                ip->i_update_core = 1;
                timeflags &= ~XFS_ICHGTIME_CHG;
        }
@@ -704,7 +543,7 @@ xfs_setattr(
         * Send out timestamp changes that need to be set to the
         * current time.  Not done when called by a DMI function.
         */
-        if (timeflags && !(flags & ATTR_DMI))
+        if (timeflags && !(flags & XFS_ATTR_DMI))
                xfs_ichgtime(ip, timeflags);
        XFS_STATS_INC(xs_ig_attrchg);
@@ -742,7 +581,7 @@ xfs_setattr(
        }
        if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
-            !(flags & ATTR_DMI)) {
+            !(flags & XFS_ATTR_DMI)) {
                (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
                                        NULL, DM_RIGHT_NULL, NULL, NULL,
                                        0, 0, AT_DELAY_FLAG(flags));
@@ -1601,12 +1440,18 @@ xfs_inactive(
        return VN_INACTIVE_CACHE;
 }
+/*
+ * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
+ * is allowed, otherwise it has to be an exact match. If a CI match is found,
+ * ci_name->name will point to a the actual name (caller must free) or
+ * will be set to NULL if an exact match is found.
+ */
 int
 xfs_lookup(
        xfs_inode_t             *dp,
        struct xfs_name         *name,
-        xfs_inode_t             **ipp)
+        xfs_inode_t             **ipp,
+        struct xfs_name         *ci_name)
 {
        xfs_ino_t               inum;
        int                     error;
@@ -1618,7 +1463,7 @@ xfs_lookup(
                return XFS_ERROR(EIO);
        lock_mode = xfs_ilock_map_shared(dp);
-        error = xfs_dir_lookup(NULL, dp, name, &inum);
+        error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
        xfs_iunlock_map_shared(dp, lock_mode);
        if (error)
@@ -1626,12 +1471,15 @@ xfs_lookup(
        error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
        if (error)
-                goto out;
+                goto out_free_name;
        xfs_itrace_ref(*ipp);
        return 0;
- out:
+out_free_name:
+        if (ci_name)
+                kmem_free(ci_name->name);
+out:
        *ipp = NULL;
        return error;
 }
@@ -2098,13 +1946,6 @@ again:
 #endif
 }
-#ifdef  DEBUG
-#define REMOVE_DEBUG_TRACE(x)   {remove_which_error_return = (x);}
-int remove_which_error_return = 0;
-#else /* ! DEBUG */
-#define REMOVE_DEBUG_TRACE(x)
-#endif  /* ! DEBUG */
 int
 xfs_remove(
        xfs_inode_t             *dp,
@@ -2113,6 +1954,7 @@ xfs_remove(
 {
        xfs_mount_t             *mp = dp->i_mount;
        xfs_trans_t             *tp = NULL;
+        int                     is_dir = S_ISDIR(ip->i_d.di_mode);
        int                     error = 0;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
@@ -2120,8 +1962,10 @@ xfs_remove(
        int                     committed;
        int                     link_zero;
        uint                    resblks;
+        uint                    log_count;
        xfs_itrace_entry(dp);
+        xfs_itrace_entry(ip);
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
@@ -2134,19 +1978,23 @@ xfs_remove(
                        return error;
        }
-        xfs_itrace_entry(ip);
-        xfs_itrace_ref(ip);
        error = XFS_QM_DQATTACH(mp, dp, 0);
-        if (!error)
+        if (error)
-                error = XFS_QM_DQATTACH(mp, ip, 0);
+                goto std_return;
-        if (error) {
-                REMOVE_DEBUG_TRACE(__LINE__);
+        error = XFS_QM_DQATTACH(mp, ip, 0);
+        if (error)
                goto std_return;
-        }
-        tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+        if (is_dir) {
+                tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
+                log_count = XFS_DEFAULT_LOG_COUNT;
+        } else {
+                tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+                log_count = XFS_REMOVE_LOG_COUNT;
+        }
        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        /*
         * We try to get the real space reservation first,
         * allowing for directory btree deletion(s) implying
@@ -2158,25 +2006,21 @@ xfs_remove(
         */
        resblks = XFS_REMOVE_SPACE_RES(mp);
        error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
-                        XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+                                  XFS_TRANS_PERM_LOG_RES, log_count);
        if (error == ENOSPC) {
                resblks = 0;
                error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
-                                XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
+                                          XFS_TRANS_PERM_LOG_RES, log_count);
        }
        if (error) {
                ASSERT(error != ENOSPC);
-                REMOVE_DEBUG_TRACE(__LINE__);
+                cancel_flags = 0;
-                xfs_trans_cancel(tp, 0);
+                goto out_trans_cancel;
-                return error;
        }
        error = xfs_lock_dir_and_entry(dp, ip);
-        if (error) {
+        if (error)
-                REMOVE_DEBUG_TRACE(__LINE__);
+                goto out_trans_cancel;
-                xfs_trans_cancel(tp, cancel_flags);
-                goto std_return;
-        }
        /*
         * At this point, we've gotten both the directory and the entry
@@ -2189,6 +2033,21 @@ xfs_remove(
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        /*
+         * If we're removing a directory perform some additional validation.
+         */
+        if (is_dir) {
+                ASSERT(ip->i_d.di_nlink >= 2);
+                if (ip->i_d.di_nlink != 2) {
+                        error = XFS_ERROR(ENOTEMPTY);
+                        goto out_trans_cancel;
+                }
+                if (!xfs_dir_isempty(ip)) {
+                        error = XFS_ERROR(ENOTEMPTY);
+                        goto out_trans_cancel;
+                }
+        }
+        /*
         * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
         */
        XFS_BMAP_INIT(&free_list, &first_block);
@@ -2196,39 +2055,64 @@ xfs_remove(
                                        &first_block, &free_list, resblks);
        if (error) {
                ASSERT(error != ENOENT);
-                REMOVE_DEBUG_TRACE(__LINE__);
+                goto out_bmap_cancel;
-                goto error1;
        }
        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+        /*
+         * Bump the in memory generation count on the parent
+         * directory so that other can know that it has changed.
+         */
        dp->i_gen++;
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-        error = xfs_droplink(tp, ip);
+        if (is_dir) {
-        if (error) {
+                /*
-                REMOVE_DEBUG_TRACE(__LINE__);
+                 * Drop the link from ip's "..".
-                goto error1;
+                 */
+                error = xfs_droplink(tp, dp);
+                if (error)
+                        goto out_bmap_cancel;
+                /*
+                 * Drop the link from dp to ip.
+                 */
+                error = xfs_droplink(tp, ip);
+                if (error)
+                        goto out_bmap_cancel;
+        } else {
+                /*
+                 * When removing a non-directory we need to log the parent
+                 * inode here for the i_gen update.  For a directory this is
+                 * done implicitly by the xfs_droplink call for the ".." entry.
+                 */
+                xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
        }
-        /* Determine if this is the last link while
+        /*
+         * Drop the "." link from ip to self.
+         */
+        error = xfs_droplink(tp, ip);
+        if (error)
+                goto out_bmap_cancel;
+        /*
+         * Determine if this is the last link while
         * we are in the transaction.
         */
-        link_zero = (ip)->i_d.di_nlink==0;
+        link_zero = (ip->i_d.di_nlink == 0);
        /*
         * If this is a synchronous mount, make sure that the
         * remove transaction goes to disk before returning to
         * the user.
         */
-        if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+        if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
                xfs_trans_set_sync(tp);
-        }
        error = xfs_bmap_finish(&tp, &free_list, &committed);
-        if (error) {
+        if (error)
-                REMOVE_DEBUG_TRACE(__LINE__);
+                goto out_bmap_cancel;
-                goto error_rele;
-        }
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
        if (error)
@@ -2240,38 +2124,26 @@ xfs_remove(
         * will get killed on last close in xfs_close() so we don't
         * have to worry about that.
         */
-        if (link_zero && xfs_inode_is_filestream(ip))
+        if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
                xfs_filestream_deassociate(ip);
        xfs_itrace_exit(ip);
+        xfs_itrace_exit(dp);
-/*      Fall through to std_return with error = 0 */
 std_return:
        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
-                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
+                XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
-                                dp, DM_RIGHT_NULL,
+                                NULL, DM_RIGHT_NULL, name->name, NULL,
-                                NULL, DM_RIGHT_NULL,
+                                ip->i_d.di_mode, error, 0);
-                                name->name, NULL, ip->i_d.di_mode, error, 0);
        }
-        return error;
- error1:
+        return error;
-        xfs_bmap_cancel(&free_list);
-        cancel_flags |= XFS_TRANS_ABORT;
-        xfs_trans_cancel(tp, cancel_flags);
-        goto std_return;
- error_rele:
+ out_bmap_cancel:
-        /*
-         * In this case make sure to not release the inode until after
-         * the current transaction is aborted.  Releasing it beforehand
-         * can cause us to go to xfs_inactive and start a recursive
-         * transaction which can easily deadlock with the current one.
-         */
        xfs_bmap_cancel(&free_list);
        cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
        xfs_trans_cancel(tp, cancel_flags);
        goto std_return;
 }
@@ -2638,186 +2510,6 @@ std_return:
 }
 int
-xfs_rmdir(
-        xfs_inode_t             *dp,
-        struct xfs_name         *name,
-        xfs_inode_t             *cdp)
-{
-        xfs_mount_t             *mp = dp->i_mount;
-        xfs_trans_t             *tp;
-        int                     error;
-        xfs_bmap_free_t         free_list;
-        xfs_fsblock_t           first_block;
-        int                     cancel_flags;
-        int                     committed;
-        int                     last_cdp_link;
-        uint                    resblks;
-        xfs_itrace_entry(dp);
-        if (XFS_FORCED_SHUTDOWN(mp))
-                return XFS_ERROR(EIO);
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
-                error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
-                                        dp, DM_RIGHT_NULL,
-                                        NULL, DM_RIGHT_NULL, name->name,
-                                        NULL, cdp->i_d.di_mode, 0, 0);
-                if (error)
-                        return XFS_ERROR(error);
-        }
-        /*
-         * Get the dquots for the inodes.
-         */
-        error = XFS_QM_DQATTACH(mp, dp, 0);
-        if (!error)
-                error = XFS_QM_DQATTACH(mp, cdp, 0);
-        if (error) {
-                REMOVE_DEBUG_TRACE(__LINE__);
-                goto std_return;
-        }
-        tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
-        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-        /*
-         * We try to get the real space reservation first,
-         * allowing for directory btree deletion(s) implying
-         * possible bmap insert(s).  If we can't get the space
-         * reservation then we use 0 instead, and avoid the bmap
-         * btree insert(s) in the directory code by, if the bmap
-         * insert tries to happen, instead trimming the LAST
-         * block from the directory.
-         */
-        resblks = XFS_REMOVE_SPACE_RES(mp);
-        error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
-                        XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
-        if (error == ENOSPC) {
-                resblks = 0;
-                error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
-                                XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
-        }
-        if (error) {
-                ASSERT(error != ENOSPC);
-                cancel_flags = 0;
-                goto error_return;
-        }
-        XFS_BMAP_INIT(&free_list, &first_block);
-        /*
-         * Now lock the child directory inode and the parent directory
-         * inode in the proper order.  This will take care of validating
-         * that the directory entry for the child directory inode has
-         * not changed while we were obtaining a log reservation.
-         */
-        error = xfs_lock_dir_and_entry(dp, cdp);
-        if (error) {
-                xfs_trans_cancel(tp, cancel_flags);
-                goto std_return;
-        }
-        IHOLD(dp);
-        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-        IHOLD(cdp);
-        xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
-        ASSERT(cdp->i_d.di_nlink >= 2);
-        if (cdp->i_d.di_nlink != 2) {
-                error = XFS_ERROR(ENOTEMPTY);
-                goto error_return;
-        }
-        if (!xfs_dir_isempty(cdp)) {
-                error = XFS_ERROR(ENOTEMPTY);
-                goto error_return;
-        }
-        error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
-                                        &first_block, &free_list, resblks);
-        if (error)
-                goto error1;
-        xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-        /*
-         * Bump the in memory generation count on the parent
-         * directory so that other can know that it has changed.
-         */
-        dp->i_gen++;
-        /*
-         * Drop the link from cdp's "..".
-         */
-        error = xfs_droplink(tp, dp);
-        if (error) {
-                goto error1;
-        }
-        /*
-         * Drop the link from dp to cdp.
-         */
-        error = xfs_droplink(tp, cdp);
-        if (error) {
-                goto error1;
-        }
-        /*
-         * Drop the "." link from cdp to self.
-         */
-        error = xfs_droplink(tp, cdp);
-        if (error) {
-                goto error1;
-        }
-        /* Determine these before committing transaction */
-        last_cdp_link = (cdp)->i_d.di_nlink==0;
-        /*
-         * If this is a synchronous mount, make sure that the
-         * rmdir transaction goes to disk before returning to
-         * the user.
-         */
-        if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-                xfs_trans_set_sync(tp);
-        }
-        error = xfs_bmap_finish (&tp, &free_list, &committed);
-        if (error) {
-                xfs_bmap_cancel(&free_list);
-                xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
-                                 XFS_TRANS_ABORT));
-                goto std_return;
-        }
-        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-        if (error) {
-                goto std_return;
-        }
-        /* Fall through to std_return with error = 0 or the errno
-         * from xfs_trans_commit. */
- std_return:
-        if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
-                (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-                                        dp, DM_RIGHT_NULL,
-                                        NULL, DM_RIGHT_NULL,
-                                        name->name, NULL, cdp->i_d.di_mode,
-                                        error, 0);
-        }
-        return error;
- error1:
-        xfs_bmap_cancel(&free_list);
-        cancel_flags |= XFS_TRANS_ABORT;
-        /* FALLTHROUGH */
- error_return:
-        xfs_trans_cancel(tp, cancel_flags);
-        goto std_return;
-}
-int
 xfs_symlink(
        xfs_inode_t             *dp,
        struct xfs_name         *link_name,
@@ -3242,7 +2934,6 @@ xfs_finish_reclaim(
 {
        xfs_perag_t     *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
        bhv_vnode_t     *vp = XFS_ITOV_NULL(ip);
-        int             error;
        if (vp && VN_BAD(vp))
                goto reclaim;
@@ -3285,29 +2976,16 @@ xfs_finish_reclaim(
                xfs_iflock(ip);
        }
-        if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+        /*
-                if (ip->i_update_core ||
+         * In the case of a forced shutdown we rely on xfs_iflush() to
-                    ((ip->i_itemp != NULL) &&
+         * wait for the inode to be unpinned before returning an error.
-                     (ip->i_itemp->ili_format.ilf_fields != 0))) {
+         */
-                        error = xfs_iflush(ip, sync_mode);
+        if (xfs_iflush(ip, sync_mode) == 0) {
-                        /*
+                /* synchronize with xfs_iflush_done */
-                         * If we hit an error, typically because of filesystem
+                xfs_iflock(ip);
-                         * shutdown, we don't need to let vn_reclaim to know
+                xfs_ifunlock(ip);
-                         * because we're gonna reclaim the inode anyway.
-                         */
-                        if (error) {
-                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                                goto reclaim;
-                        }
-                        xfs_iflock(ip); /* synchronize with xfs_iflush_done */
-                }
-                ASSERT(ip->i_update_core == 0);
-                ASSERT(ip->i_itemp == NULL ||
-                       ip->i_itemp->ili_format.ilf_fields == 0);
        }
-        xfs_ifunlock(ip);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 reclaim:
@@ -3418,7 +3096,7 @@ xfs_alloc_file_space(
        /*      Generate a DMAPI event if needed.       */
        if (alloc_type != 0 && offset < ip->i_size &&
-                        (attr_flags&ATTR_DMI) == 0  &&
+                        (attr_flags & XFS_ATTR_DMI) == 0  &&
                        DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
                xfs_off_t           end_dmi_offset;
@@ -3532,7 +3210,7 @@ retry:
                allocatesize_fsb -= allocated_fsb;
        }
 dmapi_enospc_check:
-        if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
+        if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
            DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
                                ip, DM_RIGHT_NULL,
@@ -3679,7 +3357,7 @@ xfs_free_file_space(
        end_dmi_offset = offset + len;
        endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
-        if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 &&
+        if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
            DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
                if (end_dmi_offset > ip->i_size)
                        end_dmi_offset = ip->i_size;
@@ -3690,7 +3368,7 @@ xfs_free_file_space(
                        return error;
        }
-        if (attr_flags & ATTR_NOLOCK)
+        if (attr_flags & XFS_ATTR_NOLOCK)
                need_iolock = 0;
        if (need_iolock) {
                xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -3867,7 +3545,7 @@ xfs_change_file_space(
        xfs_off_t       startoffset;
        xfs_off_t       llen;
        xfs_trans_t     *tp;
-        bhv_vattr_t     va;
+        struct iattr    iattr;
        xfs_itrace_entry(ip);
@@ -3941,10 +3619,10 @@ xfs_change_file_space(
                                break;
                }
-                va.va_mask = XFS_AT_SIZE;
+                iattr.ia_valid = ATTR_SIZE;
-                va.va_size = startoffset;
+                iattr.ia_size = startoffset;
-                error = xfs_setattr(ip, &va, attr_flags, credp);
+                error = xfs_setattr(ip, &iattr, attr_flags, credp);
                if (error)
                        return error;
@@ -3974,7 +3652,7 @@ xfs_change_file_space(
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_ihold(tp, ip);
-        if ((attr_flags & ATTR_DMI) == 0) {
+        if ((attr_flags & XFS_ATTR_DMI) == 0) {
                ip->i_d.di_mode &= ~S_ISUID;
                /*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 57335ba4ce53..e932a96bec54 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,9 +2,9 @@
 #define _XFS_VNODEOPS_H 1
 struct attrlist_cursor_kern;
-struct bhv_vattr;
 struct cred;
 struct file;
+struct iattr;
 struct inode;
 struct iovec;
 struct kiocb;
@@ -15,14 +15,18 @@ struct xfs_iomap;
 int xfs_open(struct xfs_inode *ip);
-int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
+int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
                struct cred *credp);
+#define XFS_ATTR_DMI            0x01    /* invocation from a DMI function */
+#define XFS_ATTR_NONBLOCK       0x02    /* return EAGAIN if operation would block */
+#define XFS_ATTR_NOLOCK         0x04    /* Don't grab any conflicting locks */
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_fsync(struct xfs_inode *ip);
 int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
-                struct xfs_inode **ipp);
+                struct xfs_inode **ipp, struct xfs_name *ci_name);
 int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
                xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
@@ -31,8 +35,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
                struct xfs_name *target_name);
 int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
                mode_t mode, struct xfs_inode **ipp, struct cred *credp);
-int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
-                struct xfs_inode *cdp);
 int xfs_readdir(struct xfs_inode        *dp, void *dirent, size_t bufsize,
                       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
author	Ingo Molnar <mingo@elte.hu>	2008-08-13 07:08:47 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-08-13 07:08:47 -0400
commit	a12e61df4fa1cfae7a6b76976fa65a6fcb048e3f (patch)
tree	957c915298b299902aa80afa7c78e94c71b3dc25 /fs
parent	7b27718bdb1b70166383dec91391df5534d449ee (diff)
parent	30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)