73 files changed, 1021 insertions, 751 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 88e3787c6ea9..e298fe194093 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 const struct file_operations v9fs_dir_operations = {
        .read = generic_read_dir,
+        .llseek = generic_file_llseek,
        .readdir = v9fs_dir_readdir,
        .open = v9fs_file_open,
        .release = v9fs_dir_release,
diff --git a/fs/Kconfig b/fs/Kconfig
index d3873583360b..abccb5dab9a8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH
          If unsure, say N.
+config CIFS_UPCALL
+          bool "Kerberos/SPNEGO advanced session setup"
+          depends on CIFS && KEYS
+          help
+            Enables an upcall mechanism for CIFS which accesses
+            userspace helper utilities to provide SPNEGO packaged (RFC 4178)
+            Kerberos tickets which are needed to mount to certain secure servers
+            (for which more secure Kerberos authentication is required). If
+            unsure, say N.
 config CIFS_XATTR
        bool "CIFS extended attributes"
        depends on CIFS
@@ -1982,17 +1992,6 @@ config CIFS_EXPERIMENTAL
            (which is disabled by default). See the file fs/cifs/README 
            for more details.  If unsure, say N.
-config CIFS_UPCALL
-          bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
-          depends on CIFS_EXPERIMENTAL
-          depends on KEYS
-          help
-            Enables an upcall mechanism for CIFS which accesses
-            userspace helper utilities to provide SPNEGO packaged (RFC 4178)
-            Kerberos tickets which are needed to mount to certain secure servers
-            (for which more secure Kerberos authentication is required). If
-            unsure, say N.
 config CIFS_DFS_UPCALL
          bool "DFS feature support (EXPERIMENTAL)"
          depends on CIFS_EXPERIMENTAL
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index fc1a8dc64d78..85a30e929800 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -197,6 +197,7 @@ out:
 const struct file_operations adfs_dir_operations = {
        .read           = generic_read_dir,
+        .llseek         = generic_file_llseek,
        .readdir        = adfs_readdir,
        .fsync          = file_fsync,
 };
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 6e3f282424b0..7b36904dbeac 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t);
 const struct file_operations affs_dir_operations = {
        .read           = generic_read_dir,
+        .llseek         = generic_file_llseek,
        .readdir        = affs_readdir,
        .fsync          = file_fsync,
 };
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index bcfb2dc0a61b..2a41c2a7fc52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = {
        .release        = dcache_dir_close,
        .read           = generic_read_dir,
        .readdir        = dcache_readdir,
+        .llseek         = dcache_dir_lseek,
        .ioctl          = autofs4_root_ioctl,
 };
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = {
        .release        = dcache_dir_close,
        .read           = generic_read_dir,
        .readdir        = dcache_readdir,
+        .llseek         = dcache_dir_lseek,
 };
 const struct inode_operations autofs4_indirect_root_inode_operations = {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 02c6e62b72f8..740f53672a8a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep;
 static const struct file_operations befs_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = befs_readdir,
+        .llseek         = generic_file_llseek,
 };
 static const struct inode_operations befs_dir_inode_operations = {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 56372ecf1690..dfc0197905ca 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        /* Stash our initial stack pointer into the mm structure */
        current->mm->start_stack = (unsigned long )sp;
-        
+#ifdef FLAT_PLAT_INIT
+        FLAT_PLAT_INIT(regs);
+#endif
        DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n",
                (int)regs, (int)start_addr, (int)current->mm->start_stack);
        
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 756205314c24..8d7e88e02e0f 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (bprm->misc_bang)
                goto _ret;
-        bprm->misc_bang = 1;
        /* to keep locking time low, we copy the interpreter string */
        read_lock(&entries_lock);
        fmt = check_file(bprm);
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (retval < 0)
                goto _error;
+        bprm->misc_bang = 1;
        retval = search_binary_handler (bprm, regs);
        if (retval < 0)
                goto _error;
diff --git a/fs/bio.c b/fs/bio.c
index 8000e2fa16cb..3cba7ae34d75 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -469,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd)
        kfree(bmd);
 }
-static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
+static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
+                                               gfp_t gfp_mask)
 {
-        struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL);
+        struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
        if (!bmd)
                return NULL;
-        bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL);
+        bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
        if (!bmd->iovecs) {
                kfree(bmd);
                return NULL;
        }
-        bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL);
+        bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
        if (bmd->sgvecs)
                return bmd;
@@ -491,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count)
        return NULL;
 }
-static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
+static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
-                          int uncopy)
+                          struct sg_iovec *iov, int iov_count, int uncopy)
 {
        int ret = 0, i;
        struct bio_vec *bvec;
@@ -502,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
        __bio_for_each_segment(bvec, bio, i, 0) {
                char *bv_addr = page_address(bvec->bv_page);
-                unsigned int bv_len = bvec->bv_len;
+                unsigned int bv_len = iovecs[i].bv_len;
                while (bv_len && iov_idx < iov_count) {
                        unsigned int bytes;
@@ -554,7 +555,7 @@ int bio_uncopy_user(struct bio *bio)
        struct bio_map_data *bmd = bio->bi_private;
        int ret;
-        ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1);
+        ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
        bio_free_map_data(bmd);
        bio_put(bio);
@@ -596,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
                len += iov[i].iov_len;
        }
-        bmd = bio_alloc_map_data(nr_pages, iov_count);
+        bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
        if (!bmd)
                return ERR_PTR(-ENOMEM);
@@ -633,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
         * success
         */
        if (!write_to_vm) {
-                ret = __bio_copy_iov(bio, iov, iov_count, 0);
+                ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
                if (ret)
                        goto cleanup;
        }
@@ -942,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
 {
        struct bio_vec *bvec;
        const int read = bio_data_dir(bio) == READ;
-        char *p = bio->bi_private;
+        struct bio_map_data *bmd = bio->bi_private;
        int i;
+        char *p = bmd->sgvecs[0].iov_base;
        __bio_for_each_segment(bvec, bio, i, 0) {
                char *addr = page_address(bvec->bv_page);
+                int len = bmd->iovecs[i].bv_len;
                if (read && !err)
-                        memcpy(p, addr, bvec->bv_len);
+                        memcpy(p, addr, len);
                __free_page(bvec->bv_page);
-                p += bvec->bv_len;
+                p += len;
        }
+        bio_free_map_data(bmd);
        bio_put(bio);
 }
@@ -978,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
        const int nr_pages = end - start;
        struct bio *bio;
        struct bio_vec *bvec;
+        struct bio_map_data *bmd;
        int i, ret;
+        struct sg_iovec iov;
+        iov.iov_base = data;
+        iov.iov_len = len;
+        bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
+        if (!bmd)
+                return ERR_PTR(-ENOMEM);
+        ret = -ENOMEM;
        bio = bio_alloc(gfp_mask, nr_pages);
        if (!bio)
-                return ERR_PTR(-ENOMEM);
+                goto out_bmd;
        while (len) {
                struct page *page;
@@ -1016,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
                }
        }
-        bio->bi_private = data;
+        bio->bi_private = bmd;
        bio->bi_end_io = bio_copy_kern_endio;
+        bio_set_map_data(bmd, bio, &iov, 1);
        return bio;
 cleanup:
        bio_for_each_segment(bvec, bio, i)
                __free_page(bvec->bv_page);
        bio_put(bio);
+out_bmd:
+        bio_free_map_data(bmd);
        return ERR_PTR(ret);
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 38653e36e225..ac78d4c19b3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh)
        BUG_ON(!buffer_mapped(bh));
        BUG_ON(!bh->b_end_io);
-        if (buffer_ordered(bh) && (rw == WRITE))
+        /*
-                rw = WRITE_BARRIER;
+         * Mask in barrier bit for a write (could be either a WRITE or a
+         * WRITE_SYNC
+         */
+        if (buffer_ordered(bh) && (rw & WRITE))
+                rw |= WRITE_BARRIER;
        /*
-         * Only clear out a write error when rewriting, should this
+         * Only clear out a write error when rewriting
-         * include WRITE_SYNC as well?
         */
-        if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER))
+        if (test_set_buffer_req(bh) && (rw & WRITE))
                clear_buffer_write_io_error(bh);
        /*
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index f5d0083e09fa..06e521a945c3 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,15 @@ Fix premature write failure on congested networks (we would give up
 on EAGAIN from the socket too quickly on large writes).
 Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
 Fix endian problems in acl (mode from/to cifs acl) on bigendian
-architectures.
+architectures.  Fix problems with preserving timestamps on copying open
+files (e.g. "cp -a") to Windows servers.  For mkdir and create honor setgid bit
+on parent directory when server supports Unix Extensions but not POSIX
+create. Update cifs.upcall version to handle new Kerberos sec flags
+(this requires update of cifs.upcall program from Samba).  Fix memory leak
+on dns_upcall (resolving DFS referralls).  Fix plain text password
+authentication (requires setting SecurityFlags to 0x30030 to enable
+lanman and plain text though).  Fix writes to be at correct offset when
+file is open with O_APPEND and file is on a directio (forcediretio) mount.
 Version 1.53
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 2bd6fe556f88..bd2343d4c6a6 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -542,10 +542,20 @@ SecurityFlags		Flags which control security negotiation and
                        hashing mechanisms (as "must use") on the other hand 
                        does not make much sense. Default flags are 
                                0x07007 
-                        (NTLM, NTLMv2 and packet signing allowed).  Maximum 
+                        (NTLM, NTLMv2 and packet signing allowed).  The maximum 
                        allowable flags if you want to allow mounts to servers
                        using weaker password hashes is 0x37037 (lanman,
-                        plaintext, ntlm, ntlmv2, signing allowed):
+                        plaintext, ntlm, ntlmv2, signing allowed).  Some
+                        SecurityFlags require the corresponding menuconfig
+                        options to be enabled (lanman and plaintext require
+                        CONFIG_CIFS_WEAK_PW_HASH for example).  Enabling
+                        plaintext authentication currently requires also
+                        enabling lanman authentication in the security flags
+                        because the cifs module only supports sending
+                        laintext passwords using the older lanman dialect
+                        form of the session setup SMB.  (e.g. for authentication
+                        using plain text passwords, set the SecurityFlags
+                        to 0x30030):
 
                        may use packet signing                          0x00001
                        must use packet signing                         0x01001
@@ -642,8 +652,30 @@ The statistics for the number of total SMBs and oplock breaks are different in
 that they represent all for that share, not just those for which the server
 returned success.
        
-Also note that "cat /proc/fs/cifs/DebugData" will display information about 
+Also note that "cat /proc/fs/cifs/DebugData" will display information about
 the active sessions and the shares that are mounted.
-Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is
-on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and
+Enabling Kerberos (extended security) works but requires version 1.2 or later
-LANMAN support do not require this helper.
+of the helper program cifs.upcall to be present and to be configured in the
+/etc/request-key.conf file.  The cifs.upcall helper program is from the Samba
+project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
+require this helper. Note that NTLMv2 security (which does not require the
+cifs.upcall helper program), instead of using Kerberos, is sufficient for
+some use cases.
+Enabling DFS support (used to access shares transparently in an MS-DFS
+global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled.  In
+addition, DFS support for target shares which are specified as UNC
+names which begin with host names (rather than IP addresses) requires
+a user space helper (such as cifs.upcall) to be present in order to
+translate host names to ip address, and the user space helper must also
+be configured in the file /etc/request-key.conf
+To use cifs Kerberos and DFS support, the Linux keyutils package should be
+installed and something like the following lines should be added to the
+/etc/request-key.conf file:
+create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
+create dns_resolver * * /usr/local/sbin/cifs.upcall %k
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 5fabd2caf93c..1b09f1670061 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
        unsigned int cls, con, tag, oidlen, rc;
        bool use_ntlmssp = false;
        bool use_kerberos = false;
+        bool use_mskerberos = false;
        *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
@@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length,
                                         *(oid + 1), *(oid + 2), *(oid + 3)));
                                if (compare_oid(oid, oidlen, MSKRB5_OID,
-                                                MSKRB5_OID_LEN))
+                                                MSKRB5_OID_LEN) &&
-                                        use_kerberos = true;
+                                                !use_kerberos)
+                                        use_mskerberos = true;
                                else if (compare_oid(oid, oidlen, KRB5_OID,
-                                                     KRB5_OID_LEN))
+                                                     KRB5_OID_LEN) &&
+                                                     !use_mskerberos)
                                        use_kerberos = true;
                                else if (compare_oid(oid, oidlen, NTLMSSP_OID,
                                                     NTLMSSP_OID_LEN))
@@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
        if (use_kerberos)
                *secType = Kerberos;
+        else if (use_mskerberos)
+                *secType = MSKerberos;
        else if (use_ntlmssp)
                *secType = NTLMSSP;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 2434ab0e8791..117ef4bba68e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -114,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
        dp = description + strlen(description);
-        /* for now, only sec=krb5 is valid */
+        /* for now, only sec=krb5 and sec=mskrb5 are valid */
        if (server->secType == Kerberos)
                sprintf(dp, ";sec=krb5");
+        else if (server->secType == MSKerberos)
+                sprintf(dp, ";sec=mskrb5");
        else
                goto out;
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h
index 05a34b17a1ab..e4041ec4d712 100644
--- a/fs/cifs/cifs_spnego.h
+++ b/fs/cifs/cifs_spnego.h
@@ -23,7 +23,7 @@
 #ifndef _CIFS_SPNEGO_H
 #define _CIFS_SPNEGO_H
-#define CIFS_SPNEGO_UPCALL_VERSION 1
+#define CIFS_SPNEGO_UPCALL_VERSION 2
 /*
 * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION.
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 83fd40dc1ef0..bd5f13d38450 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
        if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
                if (extended_security & CIFSSEC_MAY_PLNTXT) {
+                        memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
                        memcpy(lnm_session_key, password_with_pad,
                                CIFS_ENCPWD_SIZE);
                        return;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 7e1cf262effe..8dfd6f24d488 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -80,7 +80,8 @@ enum securityEnum {
        NTLMv2,                 /* Legacy NTLM auth with NTLMv2 hash */
        RawNTLMSSP,             /* NTLMSSP without SPNEGO */
        NTLMSSP,                /* NTLMSSP via SPNEGO */
-        Kerberos                /* Kerberos via SPNEGO */
+        Kerberos,               /* Kerberos via SPNEGO */
+        MSKerberos,             /* MS Kerberos via SPNEGO */
 };
 enum protocolEnum {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0711db65afe8..4c13bcdb92a5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
        char ntlm_session_key[CIFS_SESS_KEY_SIZE];
        bool ntlmv2_flag = false;
        int first_time = 0;
+        struct TCP_Server_Info *server = pSesInfo->server;
        /* what if server changes its buffer size after dropping the session? */
-        if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ {
+        if (server->maxBuf == 0) /* no need to send on reconnect */ {
                rc = CIFSSMBNegotiate(xid, pSesInfo);
-                if (rc == -EAGAIN) /* retry only once on 1st time connection */ {
+                if (rc == -EAGAIN) {
+                        /* retry only once on 1st time connection */
                        rc = CIFSSMBNegotiate(xid, pSesInfo);
                        if (rc == -EAGAIN)
                                rc = -EHOSTDOWN;
                }
                if (rc == 0) {
                        spin_lock(&GlobalMid_Lock);
-                        if (pSesInfo->server->tcpStatus != CifsExiting)
+                        if (server->tcpStatus != CifsExiting)
-                                pSesInfo->server->tcpStatus = CifsGood;
+                                server->tcpStatus = CifsGood;
                        else
                                rc = -EHOSTDOWN;
                        spin_unlock(&GlobalMid_Lock);
@@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
                goto ss_err_exit;
        pSesInfo->flags = 0;
-        pSesInfo->capabilities = pSesInfo->server->capabilities;
+        pSesInfo->capabilities = server->capabilities;
        if (linuxExtEnabled == 0)
                pSesInfo->capabilities &= (~CAP_UNIX);
        /*      pSesInfo->sequence_number = 0;*/
        cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
-                 pSesInfo->server->secMode,
+                 server->secMode, server->capabilities, server->timeAdj));
-                 pSesInfo->server->capabilities,
-                 pSesInfo->server->timeAdj));
        if (experimEnabled < 2)
                rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
        else if (extended_security
                        && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-                        && (pSesInfo->server->secType == NTLMSSP)) {
+                        && (server->secType == NTLMSSP)) {
                rc = -EOPNOTSUPP;
        } else if (extended_security
                        && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
-                        && (pSesInfo->server->secType == RawNTLMSSP)) {
+                        && (server->secType == RawNTLMSSP)) {
                cFYI(1, ("NTLMSSP sesssetup"));
                rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
                                                   nls_info);
@@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
                        } else {
                                SMBNTencrypt(pSesInfo->password,
-                                             pSesInfo->server->cryptKey,
+                                             server->cryptKey,
                                             ntlm_session_key);
                                if (first_time)
                                        cifs_calculate_mac_key(
-                                             &pSesInfo->server->mac_signing_key,
+                                             &server->mac_signing_key,
                                             ntlm_session_key,
                                             pSesInfo->password);
                        }
@@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
                                                      nls_info);
                }
        } else { /* old style NTLM 0.12 session setup */
-                SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey,
+                SMBNTencrypt(pSesInfo->password, server->cryptKey,
                             ntlm_session_key);
                if (first_time)
-                        cifs_calculate_mac_key(
+                        cifs_calculate_mac_key(&server->mac_signing_key,
-                                        &pSesInfo->server->mac_signing_key,
+                                                ntlm_session_key,
-                                        ntlm_session_key, pSesInfo->password);
+                                                pSesInfo->password);
                rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
        }
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index f730ef35499e..a2e0673e1b08 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data,
        return rc;
 }
+static void
+dns_resolver_destroy(struct key *key)
+{
+        kfree(key->payload.data);
+}
 struct key_type key_type_dns_resolver = {
        .name        = "dns_resolver",
        .def_datalen = sizeof(struct in_addr),
        .describe    = user_describe,
        .instantiate = dns_resolver_instantiate,
+        .destroy     = dns_resolver_destroy,
        .match       = user_match,
 };
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ff14d14903a0..cbefe1f1f9fe 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
                return -EBADF;
        open_file = (struct cifsFileInfo *) file->private_data;
+        rc = generic_write_checks(file, poffset, &write_size, 0);
+        if (rc)
+                return rc;
        xid = GetXid();
        if (*poffset > file->f_path.dentry->d_inode->i_size)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 848286861c31..9c548f110102 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode,
                if ((inode->i_mode & S_IWUGO) == 0 &&
                    (attr & ATTR_READONLY) == 0)
                        inode->i_mode |= (S_IWUGO & default_mode);
-                        inode->i_mode &= ~S_IFMT;
+                inode->i_mode &= ~S_IFMT;
        }
        /* clear write bits if ATTR_READONLY is set */
        if (attr & ATTR_READONLY)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index ed150efbe27c..252fdc0567f1 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
                char lnm_session_key[CIFS_SESS_KEY_SIZE];
+                pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
                /* no capabilities flags in old lanman negotiation */
                pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
@@ -505,7 +507,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
                        unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
                } else
                        ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
-        } else if (type == Kerberos) {
+        } else if (type == Kerberos || type == MSKerberos) {
 #ifdef CONFIG_CIFS_UPCALL
                struct cifs_spnego_msg *msg;
                spnego_key = cifs_get_spnego_key(ses);
@@ -516,6 +518,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
                }
                msg = spnego_key->payload.data;
+                /* check version field to make sure that cifs.upcall is
+                   sending us a response in an expected form */
+                if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+                        cERROR(1, ("incorrect version of cifs.upcall (expected"
+                                   " %d but got %d)",
+                                   CIFS_SPNEGO_UPCALL_VERSION, msg->version));
+                        rc = -EKEYREJECTED;
+                        goto ssetup_exit;
+                }
                /* bail out if key is too long */
                if (msg->sesskey_len >
                    sizeof(ses->server->mac_signing_key.data.krb5)) {
diff --git a/fs/compat.c b/fs/compat.c
index c9d1472e65c5..075d0509970d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen,
        if (buf->result)
                return -EINVAL;
        d_ino = ino;
-        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+                buf->result = -EOVERFLOW;
                return -EOVERFLOW;
+        }
        buf->result++;
        dirent = buf->dirent;
        if (!access_ok(VERIFY_WRITE, dirent,
@@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen,
        if (reclen > buf->count)
                return -EINVAL;
        d_ino = ino;
-        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+                buf->error = -EOVERFLOW;
                return -EOVERFLOW;
+        }
        dirent = buf->previous;
        if (dirent) {
                if (__put_user(offset, &dirent->d_off))
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7a8db78a91d2..8e93341f3e82 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
         * Ensure that no racing symlink() will make detach_prep() fail while
         * the new link is temporarily attached
         */
-        mutex_lock(&configfs_symlink_mutex);
-        spin_lock(&configfs_dirent_lock);
        do {
                struct mutex *wait_mutex;
+                mutex_lock(&configfs_symlink_mutex);
+                spin_lock(&configfs_dirent_lock);
                ret = configfs_detach_prep(dentry, &wait_mutex);
-                if (ret) {
+                if (ret)
                        configfs_detach_rollback(dentry);
-                        spin_unlock(&configfs_dirent_lock);
+                spin_unlock(&configfs_dirent_lock);
-                        mutex_unlock(&configfs_symlink_mutex);
+                mutex_unlock(&configfs_symlink_mutex);
+                if (ret) {
                        if (ret != -EAGAIN) {
                                config_item_put(parent_item);
                                return ret;
@@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                        /* Wait until the racing operation terminates */
                        mutex_lock(wait_mutex);
                        mutex_unlock(wait_mutex);
-                        mutex_lock(&configfs_symlink_mutex);
-                        spin_lock(&configfs_dirent_lock);
                }
        } while (ret == -EAGAIN);
-        spin_unlock(&configfs_dirent_lock);
-        mutex_unlock(&configfs_symlink_mutex);
        /* Get a working ref for the duration of this function */
        item = configfs_get_config_item(dentry);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 0c3b618c15b3..f40423eb1a14 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex);
 static int cramfs_iget5_test(struct inode *inode, void *opaque)
 {
        struct cramfs_inode *cramfs_inode = opaque;
+        return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
-        if (inode->i_ino != CRAMINO(cramfs_inode))
-                return 0; /* does not match */
-        if (inode->i_ino != 1)
-                return 1;
-        /* all empty directories, char, block, pipe, and sock, share inode #1 */
-        if ((inode->i_mode != cramfs_inode->mode) ||
-            (inode->i_gid != cramfs_inode->gid) ||
-            (inode->i_uid != cramfs_inode->uid))
-                return 0; /* does not match */
-        if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) &&
-            (inode->i_rdev != old_decode_dev(cramfs_inode->size)))
-                return 0; /* does not match */
-        return 1; /* matches */
 }
 static int cramfs_iget5_set(struct inode *inode, void *opaque)
 {
-        static struct timespec zerotime;
        struct cramfs_inode *cramfs_inode = opaque;
-        inode->i_mode = cramfs_inode->mode;
-        inode->i_uid = cramfs_inode->uid;
-        inode->i_size = cramfs_inode->size;
-        inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
-        inode->i_gid = cramfs_inode->gid;
-        /* Struct copy intentional */
-        inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
        inode->i_ino = CRAMINO(cramfs_inode);
-        /* inode->i_nlink is left 1 - arguably wrong for directories,
-           but it's the best we can do without reading the directory
-           contents.  1 yields the right result in GNU find, even
-           without -noleaf option. */
-        if (S_ISREG(inode->i_mode)) {
-                inode->i_fop = &generic_ro_fops;
-                inode->i_data.a_ops = &cramfs_aops;
-        } else if (S_ISDIR(inode->i_mode)) {
-                inode->i_op = &cramfs_dir_inode_operations;
-                inode->i_fop = &cramfs_directory_operations;
-        } else if (S_ISLNK(inode->i_mode)) {
-                inode->i_op = &page_symlink_inode_operations;
-                inode->i_data.a_ops = &cramfs_aops;
-        } else {
-                inode->i_size = 0;
-                inode->i_blocks = 0;
-                init_special_inode(inode, inode->i_mode,
-                        old_decode_dev(cramfs_inode->size));
-        }
        return 0;
 }
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
        struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
                                            cramfs_iget5_test, cramfs_iget5_set,
                                            cramfs_inode);
+        static struct timespec zerotime;
        if (inode && (inode->i_state & I_NEW)) {
+                inode->i_mode = cramfs_inode->mode;
+                inode->i_uid = cramfs_inode->uid;
+                inode->i_size = cramfs_inode->size;
+                inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
+                inode->i_gid = cramfs_inode->gid;
+                /* Struct copy intentional */
+                inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+                /* inode->i_nlink is left 1 - arguably wrong for directories,
+                   but it's the best we can do without reading the directory
+                   contents.  1 yields the right result in GNU find, even
+                   without -noleaf option. */
+                if (S_ISREG(inode->i_mode)) {
+                        inode->i_fop = &generic_ro_fops;
+                        inode->i_data.a_ops = &cramfs_aops;
+                } else if (S_ISDIR(inode->i_mode)) {
+                        inode->i_op = &cramfs_dir_inode_operations;
+                        inode->i_fop = &cramfs_directory_operations;
+                } else if (S_ISLNK(inode->i_mode)) {
+                        inode->i_op = &page_symlink_inode_operations;
+                        inode->i_data.a_ops = &cramfs_aops;
+                } else {
+                        inode->i_size = 0;
+                        inode->i_blocks = 0;
+                        init_special_inode(inode, inode->i_mode,
+                                old_decode_dev(cramfs_inode->size));
+                }
                unlock_new_inode(inode);
        }
        return inode;
 }
+static void cramfs_drop_inode(struct inode *inode)
+{
+        if (inode->i_ino == 1)
+                generic_delete_inode(inode);
+        else
+                generic_drop_inode(inode);
+}
 /*
 * We have our own block cache: don't fill up the buffer cache
 * with the rom-image, because the way the filesystem is set
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = {
        .put_super      = cramfs_put_super,
        .remount_fs     = cramfs_remount,
        .statfs         = cramfs_statfs,
+        .drop_inode     = cramfs_drop_inode,
 };
 static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index 101663d15e9f..80e93956aced 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 * If no entry exists with the exact case name, allocate new dentry with
 * the exact case, and return the spliced entry.
 */
-struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
+struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
                        struct qstr *name)
 {
        int error;
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 3a404e7fad53..291abb11e20e 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei
        }
        unlock_kernel();
-        d_add(dentry, inode);
+        return d_splice_alias(inode, dentry);
-        return NULL;
 }
 static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino,
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1ae5004e93fc..e9fa960ba6da 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
                free_blocks =
                        percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
 #endif
+        if (free_blocks <= root_blocks)
+                /* we don't have free space */
+                return 0;
        if (free_blocks - root_blocks < nblocks)
                return free_blocks - root_blocks;
        return nblocks;
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d3d23d73c08b..ec8e33b45219 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent,
                                get_dtype(sb, fname->file_type));
                if (error) {
                        filp->f_pos = curr_pos;
-                        info->extra_fname = fname->next;
+                        info->extra_fname = fname;
                        return error;
                }
                fname = fname->next;
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp,
         * If there are any leftover names on the hash collision
         * chain, return them first.
         */
-        if (info->extra_fname &&
+        if (info->extra_fname) {
-            call_filldir(filp, dirent, filldir, info->extra_fname))
+                if (call_filldir(filp, dirent, filldir, info->extra_fname))
-                goto finished;
+                        goto finished;
-        if (!info->curr_node)
+                info->extra_fname = NULL;
+                info->curr_node = rb_next(info->curr_node);
+                if (!info->curr_node) {
+                        if (info->next_hash == ~0) {
+                                filp->f_pos = EXT4_HTREE_EOF;
+                                goto finished;
+                        }
+                        info->curr_hash = info->next_hash;
+                        info->curr_minor_hash = 0;
+                }
+        } else if (!info->curr_node)
                info->curr_node = rb_first(&info->root);
        while (1) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6c7924d9e358..295003241d3d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1072,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
+extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@@ -1227,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
+                                       int chunk);
 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        ext4_lblk_t iblock,
                        unsigned long max_blocks, struct buffer_head *bh_result,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 6c166c0a54b7..d33dc56d6986 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
 extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
 extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
-extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
+                                                   int num,
+                                                   struct ext4_ext_path *path);
 extern int ext4_ext_try_to_merge(struct inode *inode,
                                 struct ext4_ext_path *path,
                                 struct ext4_extent *);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index eb8bc3afe6e9..b455c685a98b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -51,6 +51,14 @@
                                         EXT4_XATTR_TRANS_BLOCKS - 2 + \
                                         2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+/*
+ * Define the number of metadata blocks we need to account to modify data.
+ *
+ * This include super block, inode block, quota blocks and xattr blocks
+ */
+#define EXT4_META_TRANS_BLOCKS(sb)      (EXT4_XATTR_TRANS_BLOCKS + \
+                                        2*EXT4_QUOTA_TRANS_BLOCKS(sb))
 /* Delete operations potentially hit one directory's namespace plus an
 * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
 * generous.  We can grow the delete transaction later if necessary. */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 612c3d2c3824..b24d3c53f20c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 }
 /*
- * ext4_ext_calc_credits_for_insert:
+ * ext4_ext_calc_credits_for_single_extent:
- * This routine returns max. credits that the extent tree can consume.
+ * This routine returns max. credits that needed to insert an extent
- * It should be OK for low-performance paths like ->writepage()
+ * to the extent tree.
- * To allow many writing processes to fit into a single transaction,
+ * When pass the actual path, the caller should calculate credits
- * the caller should calculate credits under i_data_sem and
+ * under i_data_sem.
- * pass the actual path.
 */
-int ext4_ext_calc_credits_for_insert(struct inode *inode,
+int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
                                                struct ext4_ext_path *path)
 {
-        int depth, needed;
        if (path) {
+                int depth = ext_depth(inode);
+                int ret = 0;
                /* probably there is space in leaf? */
-                depth = ext_depth(inode);
                if (le16_to_cpu(path[depth].p_hdr->eh_entries)
-                                < le16_to_cpu(path[depth].p_hdr->eh_max))
+                                < le16_to_cpu(path[depth].p_hdr->eh_max)) {
-                        return 1;
-        }
-        /*
+                        /*
-         * given 32-bit logical block (4294967296 blocks), max. tree
+                         *  There are some space in the leaf tree, no
-         * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
+                         *  need to account for leaf block credit
-         * Let's also add one more level for imbalance.
+                         *
-         */
+                         *  bitmaps and block group descriptor blocks
-        depth = 5;
+                         *  and other metadat blocks still need to be
+                         *  accounted.
-        /* allocation of new data block(s) */
+                         */
-        needed = 2;
+                        /* 1 bitmap, 1 block group descriptor */
+                        ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
+                }
+        }
-        /*
+        return ext4_chunk_trans_blocks(inode, nrblocks);
-         * tree can be full, so it would need to grow in depth:
+}
-         * we need one credit to modify old root, credits for
-         * new root will be added in split accounting
-         */
-        needed += 1;
-        /*
+/*
-         * Index split can happen, we would need:
+ * How many index/leaf blocks need to change/allocate to modify nrblocks?
-         *    allocate intermediate indexes (bitmap + group)
+ *
-         *  + change two blocks at each level, but root (already included)
+ * if nrblocks are fit in a single extent (chunk flag is 1), then
-         */
+ * in the worse case, each tree level index/leaf need to be changed
-        needed += (depth * 2) + (depth * 2);
+ * if the tree split due to insert a new extent, then the old tree
+ * index/leaf need to be updated too
+ *
+ * If the nrblocks are discontiguous, they could cause
+ * the whole tree split more than once, but this is really rare.
+ */
+int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+        int index;
+        int depth = ext_depth(inode);
-        /* any allocation modifies superblock */
+        if (chunk)
-        needed += 1;
+                index = depth * 2;
+        else
+                index = depth * 3;
-        return needed;
+        return index;
 }
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
@@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                        correct_index = 1;
                        credits += (ext_depth(inode)) + 1;
                }
-#ifdef CONFIG_QUOTA
                credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
                err = ext4_ext_journal_restart(handle, credits);
                if (err)
@@ -2805,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode)
        /*
         * probably first extent we're gonna free will be last in block
         */
-        err = ext4_writepage_trans_blocks(inode) + 3;
+        err = ext4_writepage_trans_blocks(inode);
        handle = ext4_journal_start(inode, err);
        if (IS_ERR(handle))
                return;
@@ -2819,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode)
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_ext_invalidate_cache(inode);
-        ext4_mb_discard_inode_preallocations(inode);
+        ext4_discard_reservation(inode);
        /*
         * TODO: optimization is possible here.
@@ -2858,27 +2863,6 @@ out_stop:
        ext4_journal_stop(handle);
 }
-/*
- * ext4_ext_writepage_trans_blocks:
- * calculate max number of blocks we could modify
- * in order to allocate new block for an inode
- */
-int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
-{
-        int needed;
-        needed = ext4_ext_calc_credits_for_insert(inode, NULL);
-        /* caller wants to allocate num blocks, but note it includes sb */
-        needed = needed * num - (num - 1);
-#ifdef CONFIG_QUOTA
-        needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
-        return needed;
-}
 static void ext4_falloc_update_inode(struct inode *inode,
                                int mode, loff_t new_size, int update_ctime)
 {
@@ -2939,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
        max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
                                                        - block;
        /*
-         * credits to insert 1 extent into extent tree + buffers to be able to
+         * credits to insert 1 extent into extent tree
-         * modify 1 super block, 1 block bitmap and 1 group descriptor.
         */
-        credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
+        credits = ext4_chunk_trans_blocks(inode, max_blocks);
        mutex_lock(&inode->i_mutex);
 retry:
        while (ret >= 0 && ret < max_blocks) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 655e760212b8..f344834bbf58 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -351,7 +351,7 @@ find_close_to_parent:
                        goto found_flexbg;
                }
-                if (best_flex < 0 ||
+                if (flex_group[best_flex].free_inodes == 0 ||
                    (flex_group[i].free_blocks >
                     flex_group[best_flex].free_blocks &&
                     flex_group[i].free_inodes))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 59fbbe899acc..7e91913e325b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -41,6 +41,8 @@
 #include "acl.h"
 #include "ext4_extents.h"
+#define MPAGE_DA_EXTENT_TAIL 0x01
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
@@ -1005,6 +1007,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
 */
 static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
 {
+        if (!blocks)
+                return 0;
        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
                return ext4_ext_calc_metadata_amount(inode, blocks);
@@ -1041,18 +1046,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
-/* Maximum number of blocks we map for direct IO at once. */
-#define DIO_MAX_BLOCKS 4096
-/*
- * Number of credits we need for writing DIO_MAX_BLOCKS:
- * We need sb + group descriptor + bitmap + inode -> 4
- * For B blocks with A block pointers per block we need:
- * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
- * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
- */
-#define DIO_CREDITS 25
 /*
 * The ext4_get_blocks_wrap() function try to look up the requested blocks,
 * and returns if the blocks are already mapped.
@@ -1164,19 +1157,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
        return retval;
 }
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
 static int ext4_get_block(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
 {
        handle_t *handle = ext4_journal_current_handle();
        int ret = 0, started = 0;
        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+        int dio_credits;
        if (create && !handle) {
                /* Direct IO write... */
                if (max_blocks > DIO_MAX_BLOCKS)
                        max_blocks = DIO_MAX_BLOCKS;
-                handle = ext4_journal_start(inode, DIO_CREDITS +
+                dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
-                              2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
+                handle = ext4_journal_start(inode, dio_credits);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        goto out;
@@ -1559,7 +1556,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        int total, mdb, mdb_free, release;
+        if (!to_free)
+                return;         /* Nothing to release, exit */
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+        if (!EXT4_I(inode)->i_reserved_data_blocks) {
+                /*
+                 * if there is no reserved blocks, but we try to free some
+                 * then the counter is messed up somewhere.
+                 * but since this function is called from invalidate
+                 * page, it's harmless to return without any action
+                 */
+                printk(KERN_INFO "ext4 delalloc try to release %d reserved "
+                            "blocks for inode %lu, but there is no reserved "
+                            "data blocks\n", to_free, inode->i_ino);
+                spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+                return;
+        }
        /* recalculate the number of metablocks still need to be reserved */
        total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
        mdb = ext4_calc_metadata_amount(inode, total);
@@ -1613,11 +1628,13 @@ struct mpage_da_data {
        unsigned long first_page, next_page;    /* extent of pages */
        get_block_t *get_block;
        struct writeback_control *wbc;
+        int io_done;
+        long pages_written;
 };
 /*
 * mpage_da_submit_io - walks through extent of pages and try to write
- * them with __mpage_writepage()
+ * them with writepage() call back
 *
 * @mpd->inode: inode
 * @mpd->first_page: first page of the extent
@@ -1632,18 +1649,11 @@ struct mpage_da_data {
 static int mpage_da_submit_io(struct mpage_da_data *mpd)
 {
        struct address_space *mapping = mpd->inode->i_mapping;
-        struct mpage_data mpd_pp = {
-                .bio = NULL,
-                .last_block_in_bio = 0,
-                .get_block = mpd->get_block,
-                .use_writepage = 1,
-        };
        int ret = 0, err, nr_pages, i;
        unsigned long index, end;
        struct pagevec pvec;
        BUG_ON(mpd->next_page <= mpd->first_page);
        pagevec_init(&pvec, 0);
        index = mpd->first_page;
        end = mpd->next_page - 1;
@@ -1661,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
                                break;
                        index++;
-                        err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
+                        err = mapping->a_ops->writepage(page, mpd->wbc);
+                        if (!err)
+                                mpd->pages_written++;
                        /*
                         * In error case, we have to continue because
                         * remaining pages are still locked
@@ -1673,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
                }
                pagevec_release(&pvec);
        }
-        if (mpd_pp.bio)
-                mpage_bio_submit(WRITE, mpd_pp.bio);
        return ret;
 }
@@ -1698,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
        int blocks = exbh->b_size >> inode->i_blkbits;
        sector_t pblock = exbh->b_blocknr, cur_logical;
        struct buffer_head *head, *bh;
-        unsigned long index, end;
+        pgoff_t index, end;
        struct pagevec pvec;
        int nr_pages, i;
@@ -1741,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
                                if (buffer_delay(bh)) {
                                        bh->b_blocknr = pblock;
                                        clear_buffer_delay(bh);
+                                        bh->b_bdev = inode->i_sb->s_bdev;
+                                } else if (buffer_unwritten(bh)) {
+                                        bh->b_blocknr = pblock;
+                                        clear_buffer_unwritten(bh);
+                                        set_buffer_mapped(bh);
+                                        set_buffer_new(bh);
+                                        bh->b_bdev = inode->i_sb->s_bdev;
                                } else if (buffer_mapped(bh))
                                        BUG_ON(bh->b_blocknr != pblock);
@@ -1776,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode,
 *
 * The function skips space we know is already mapped to disk blocks.
 *
- * The function ignores errors ->get_block() returns, thus real
- * error handling is postponed to __mpage_writepage()
 */
 static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 {
+        int err = 0;
        struct buffer_head *lbh = &mpd->lbh;
-        int err = 0, remain = lbh->b_size;
        sector_t next = lbh->b_blocknr;
        struct buffer_head new;
@@ -1792,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
        if (buffer_mapped(lbh) && !buffer_delay(lbh))
                return;
-        while (remain) {
+        new.b_state = lbh->b_state;
-                new.b_state = lbh->b_state;
+        new.b_blocknr = 0;
-                new.b_blocknr = 0;
+        new.b_size = lbh->b_size;
-                new.b_size = remain;
-                err = mpd->get_block(mpd->inode, next, &new, 1);
-                if (err) {
-                        /*
-                         * Rather than implement own error handling
-                         * here, we just leave remaining blocks
-                         * unallocated and try again with ->writepage()
-                         */
-                        break;
-                }
-                BUG_ON(new.b_size == 0);
-                if (buffer_new(&new))
+        /*
-                        __unmap_underlying_blocks(mpd->inode, &new);
+         * If we didn't accumulate anything
+         * to write simply return
+         */
+        if (!new.b_size)
+                return;
+        err = mpd->get_block(mpd->inode, next, &new, 1);
+        if (err)
+                return;
+        BUG_ON(new.b_size == 0);
-                /*
+        if (buffer_new(&new))
-                 * If blocks are delayed marked, we need to
+                __unmap_underlying_blocks(mpd->inode, &new);
-                 * put actual blocknr and drop delayed bit
-                 */
-                if (buffer_delay(lbh))
-                        mpage_put_bnr_to_bhs(mpd, next, &new);
-                /* go for the remaining blocks */
+        /*
-                next += new.b_size >> mpd->inode->i_blkbits;
+         * If blocks are delayed marked, we need to
-                remain -= new.b_size;
+         * put actual blocknr and drop delayed bit
-        }
+         */
+        if (buffer_delay(lbh) || buffer_unwritten(lbh))
+                mpage_put_bnr_to_bhs(mpd, next, &new);
+        return;
 }
-#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
+#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
+                (1 << BH_Delay) | (1 << BH_Unwritten))
 /*
 * mpage_add_bh_to_extent - try to add one more block to extent of blocks
@@ -1837,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
 static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
                                   sector_t logical, struct buffer_head *bh)
 {
-        struct buffer_head *lbh = &mpd->lbh;
        sector_t next;
+        size_t b_size = bh->b_size;
+        struct buffer_head *lbh = &mpd->lbh;
+        int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
-        next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
+        /* check if thereserved journal credits might overflow */
+        if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+                if (nrblocks >= EXT4_MAX_TRANS_DATA) {
+                        /*
+                         * With non-extent format we are limited by the journal
+                         * credit available.  Total credit needed to insert
+                         * nrblocks contiguous blocks is dependent on the
+                         * nrblocks.  So limit nrblocks.
+                         */
+                        goto flush_it;
+                } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
+                                EXT4_MAX_TRANS_DATA) {
+                        /*
+                         * Adding the new buffer_head would make it cross the
+                         * allowed limit for which we have journal credit
+                         * reserved. So limit the new bh->b_size
+                         */
+                        b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
+                                                mpd->inode->i_blkbits;
+                        /* we will do mpage_da_submit_io in the next loop */
+                }
+        }
        /*
         * First block in the extent
         */
        if (lbh->b_size == 0) {
                lbh->b_blocknr = logical;
-                lbh->b_size = bh->b_size;
+                lbh->b_size = b_size;
                lbh->b_state = bh->b_state & BH_FLAGS;
                return;
        }
+        next = lbh->b_blocknr + nrblocks;
        /*
         * Can we merge the block to our big extent?
         */
        if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
-                lbh->b_size += bh->b_size;
+                lbh->b_size += b_size;
                return;
        }
+flush_it:
        /*
         * We couldn't merge the block to our extent, so we
         * need to flush current  extent and start new one
         */
        mpage_da_map_blocks(mpd);
+        mpage_da_submit_io(mpd);
-        /*
+        mpd->io_done = 1;
-         * Now start a new extent
+        return;
-         */
-        lbh->b_size = bh->b_size;
-        lbh->b_state = bh->b_state & BH_FLAGS;
-        lbh->b_blocknr = logical;
 }
 /*
@@ -1891,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page,
        struct buffer_head *bh, *head, fake;
        sector_t logical;
+        if (mpd->io_done) {
+                /*
+                 * Rest of the page in the page_vec
+                 * redirty then and skip then. We will
+                 * try to to write them again after
+                 * starting a new transaction
+                 */
+                redirty_page_for_writepage(wbc, page);
+                unlock_page(page);
+                return MPAGE_DA_EXTENT_TAIL;
+        }
        /*
         * Can we merge this page to current extent?
         */
        if (mpd->next_page != page->index) {
                /*
                 * Nope, we can't. So, we map non-allocated blocks
-                 * and start IO on them using __mpage_writepage()
+                 * and start IO on them using writepage()
                 */
                if (mpd->next_page != mpd->first_page) {
                        mpage_da_map_blocks(mpd);
                        mpage_da_submit_io(mpd);
+                        /*
+                         * skip rest of the page in the page_vec
+                         */
+                        mpd->io_done = 1;
+                        redirty_page_for_writepage(wbc, page);
+                        unlock_page(page);
+                        return MPAGE_DA_EXTENT_TAIL;
                }
                /*
@@ -1932,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page,
                set_buffer_dirty(bh);
                set_buffer_uptodate(bh);
                mpage_add_bh_to_extent(mpd, logical, bh);
+                if (mpd->io_done)
+                        return MPAGE_DA_EXTENT_TAIL;
        } else {
                /*
                 * Page with regular buffer heads, just add all dirty ones
@@ -1940,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page,
                bh = head;
                do {
                        BUG_ON(buffer_locked(bh));
-                        if (buffer_dirty(bh))
+                        if (buffer_dirty(bh) &&
+                                (!buffer_mapped(bh) || buffer_delay(bh))) {
                                mpage_add_bh_to_extent(mpd, logical, bh);
+                                if (mpd->io_done)
+                                        return MPAGE_DA_EXTENT_TAIL;
+                        }
                        logical++;
                } while ((bh = bh->b_this_page) != head);
        }
@@ -1960,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page,
 *
 * This is a library function, which implements the writepages()
 * address_space_operation.
- *
- * In order to avoid duplication of logic that deals with partial pages,
- * multiple bio per page, etc, we find non-allocated blocks, allocate
- * them with minimal calls to ->get_block() and re-use __mpage_writepage()
- *
- * It's important that we call __mpage_writepage() only once for each
- * involved page, otherwise we'd have to implement more complicated logic
- * to deal with pages w/o PG_lock or w/ PG_writeback and so on.
- *
- * See comments to mpage_writepages()
 */
 static int mpage_da_writepages(struct address_space *mapping,
                               struct writeback_control *wbc,
                               get_block_t get_block)
 {
        struct mpage_da_data mpd;
+        long to_write;
        int ret;
        if (!get_block)
@@ -1989,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping,
        mpd.first_page = 0;
        mpd.next_page = 0;
        mpd.get_block = get_block;
+        mpd.io_done = 0;
+        mpd.pages_written = 0;
+        to_write = wbc->nr_to_write;
        ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
        /*
         * Handle last extent of pages
         */
-        if (mpd.next_page != mpd.first_page) {
+        if (!mpd.io_done && mpd.next_page != mpd.first_page) {
                mpage_da_map_blocks(&mpd);
                mpage_da_submit_io(&mpd);
        }
+        wbc->nr_to_write = to_write - mpd.pages_written;
        return ret;
 }
@@ -2204,63 +2255,95 @@ static int ext4_da_writepage(struct page *page,
 }
 /*
- * For now just follow the DIO way to estimate the max credits
+ * This is called via ext4_da_writepages() to
- * needed to write out EXT4_MAX_WRITEBACK_PAGES.
+ * calulate the total number of credits to reserve to fit
- * todo: need to calculate the max credits need for
+ * a single extent allocation into a single transaction,
- * extent based files, currently the DIO credits is based on
+ * ext4_da_writpeages() will loop calling this before
- * indirect-blocks mapping way.
+ * the block allocation.
- *
- * Probably should have a generic way to calculate credits
- * for DIO, writepages, and truncate
 */
-#define EXT4_MAX_WRITEBACK_PAGES      DIO_MAX_BLOCKS
-#define EXT4_MAX_WRITEBACK_CREDITS    DIO_CREDITS
+static int ext4_da_writepages_trans_blocks(struct inode *inode)
+{
+        int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+        /*
+         * With non-extent format the journal credit needed to
+         * insert nrblocks contiguous block is dependent on
+         * number of contiguous block. So we will limit
+         * number of contiguous block to a sane value
+         */
+        if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+            (max_blocks > EXT4_MAX_TRANS_DATA))
+                max_blocks = EXT4_MAX_TRANS_DATA;
+        return ext4_chunk_trans_blocks(inode, max_blocks);
+}
 static int ext4_da_writepages(struct address_space *mapping,
-                                struct writeback_control *wbc)
+                              struct writeback_control *wbc)
 {
-        struct inode *inode = mapping->host;
        handle_t *handle = NULL;
-        int needed_blocks;
-        int ret = 0;
-        long to_write;
        loff_t range_start = 0;
+        struct inode *inode = mapping->host;
+        int needed_blocks, ret = 0, nr_to_writebump = 0;
+        long to_write, pages_skipped = 0;
+        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
        /*
         * No pages to write? This is mainly a kludge to avoid starting
         * a transaction for special inodes like journal inode on last iput()
         * because that could violate lock ordering on umount
         */
-        if (!mapping->nrpages)
+        if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                return 0;
        /*
-         * Estimate the worse case needed credits to write out
+         * Make sure nr_to_write is >= sbi->s_mb_stream_request
-         * EXT4_MAX_BUF_BLOCKS pages
+         * This make sure small files blocks are allocated in
+         * single attempt. This ensure that small files
+         * get less fragmented.
         */
-        needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
+        if (wbc->nr_to_write < sbi->s_mb_stream_request) {
+                nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+                wbc->nr_to_write = sbi->s_mb_stream_request;
+        }
-        to_write = wbc->nr_to_write;
+        if (!wbc->range_cyclic)
-        if (!wbc->range_cyclic) {
                /*
                 * If range_cyclic is not set force range_cont
                 * and save the old writeback_index
                 */
                wbc->range_cont = 1;
-                range_start =  wbc->range_start;
-        }
-        while (!ret && to_write) {
+        range_start =  wbc->range_start;
+        pages_skipped = wbc->pages_skipped;
+restart_loop:
+        to_write = wbc->nr_to_write;
+        while (!ret && to_write > 0) {
+                /*
+                 * we  insert one extent at a time. So we need
+                 * credit needed for single extent allocation.
+                 * journalled mode is currently not supported
+                 * by delalloc
+                 */
+                BUG_ON(ext4_should_journal_data(inode));
+                needed_blocks = ext4_da_writepages_trans_blocks(inode);
                /* start a new transaction*/
                handle = ext4_journal_start(inode, needed_blocks);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
+                        printk(KERN_EMERG "%s: jbd2_start: "
+                               "%ld pages, ino %lu; err %d\n", __func__,
+                                wbc->nr_to_write, inode->i_ino, ret);
+                        dump_stack();
                        goto out_writepages;
                }
                if (ext4_should_order_data(inode)) {
                        /*
                         * With ordered mode we need to add
-                         * the inode to the journal handle
+                         * the inode to the journal handl
                         * when we do block allocation.
                         */
                        ret = ext4_jbd2_file_inode(handle, inode);
@@ -2268,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping,
                                ext4_journal_stop(handle);
                                goto out_writepages;
                        }
                }
-                /*
-                 * set the max dirty pages could be write at a time
-                 * to fit into the reserved transaction credits
-                 */
-                if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES)
-                        wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES;
                to_write -= wbc->nr_to_write;
                ret = mpage_da_writepages(mapping, wbc,
-                                                ext4_da_get_block_write);
+                                          ext4_da_get_block_write);
                ext4_journal_stop(handle);
-                if (wbc->nr_to_write) {
+                if (ret == MPAGE_DA_EXTENT_TAIL) {
+                        /*
+                         * got one extent now try with
+                         * rest of the pages
+                         */
+                        to_write += wbc->nr_to_write;
+                        ret = 0;
+                } else if (wbc->nr_to_write) {
                        /*
                         * There is no more writeout needed
                         * or we requested for a noblocking writeout
@@ -2293,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping,
                wbc->nr_to_write = to_write;
        }
-out_writepages:
+        if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
-        wbc->nr_to_write = to_write;
+                /* We skipped pages in this loop */
-        if (range_start)
                wbc->range_start = range_start;
+                wbc->nr_to_write = to_write +
+                                wbc->pages_skipped - pages_skipped;
+                wbc->pages_skipped = pages_skipped;
+                goto restart_loop;
+        }
+out_writepages:
+        wbc->nr_to_write = to_write - nr_to_writebump;
+        wbc->range_start = range_start;
        return ret;
 }
@@ -3486,6 +3577,9 @@ void ext4_truncate(struct inode *inode)
         * modify the block allocation tree.
         */
        down_write(&ei->i_data_sem);
+        ext4_discard_reservation(inode);
        /*
         * The orphan list entry will now protect us from any crash which
         * occurs before the truncate completes, so it is now safe to propagate
@@ -3555,8 +3649,6 @@ do_indirects:
                ;
        }
-        ext4_discard_reservation(inode);
        up_write(&ei->i_data_sem);
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
@@ -4324,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
        return 0;
 }
+static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
+                                      int chunk)
+{
+        int indirects;
+        /* if nrblocks are contiguous */
+        if (chunk) {
+                /*
+                 * With N contiguous data blocks, it need at most
+                 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
+                 * 2 dindirect blocks
+                 * 1 tindirect block
+                 */
+                indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
+                return indirects + 3;
+        }
+        /*
+         * if nrblocks are not contiguous, worse case, each block touch
+         * a indirect block, and each indirect block touch a double indirect
+         * block, plus a triple indirect block
+         */
+        indirects = nrblocks * 2 + 1;
+        return indirects;
+}
+static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+        if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+                return ext4_indirect_trans_blocks(inode, nrblocks, 0);
+        return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
+}
 /*
- * How many blocks doth make a writepage()?
+ * Account for index blocks, block groups bitmaps and block group
- *
+ * descriptor blocks if modify datablocks and index blocks
- * With N blocks per page, it may be:
+ * worse case, the indexs blocks spread over different block groups
- * N data blocks
- * 2 indirect block
- * 2 dindirect
- * 1 tindirect
- * N+5 bitmap blocks (from the above)
- * N+5 group descriptor summary blocks
- * 1 inode block
- * 1 superblock.
- * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
 *
- * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
+ * If datablocks are discontiguous, they are possible to spread over
+ * different block groups too. If they are contiugous, with flexbg,
+ * they could still across block group boundary.
 *
- * With ordered or writeback data it's the same, less the N data blocks.
+ * Also account for superblock, inode, quota and xattr blocks
+ */
+int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+        int groups, gdpblocks;
+        int idxblocks;
+        int ret = 0;
+        /*
+         * How many index blocks need to touch to modify nrblocks?
+         * The "Chunk" flag indicating whether the nrblocks is
+         * physically contiguous on disk
+         *
+         * For Direct IO and fallocate, they calls get_block to allocate
+         * one single extent at a time, so they could set the "Chunk" flag
+         */
+        idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
+        ret = idxblocks;
+        /*
+         * Now let's see how many group bitmaps and group descriptors need
+         * to account
+         */
+        groups = idxblocks;
+        if (chunk)
+                groups += 1;
+        else
+                groups += nrblocks;
+        gdpblocks = groups;
+        if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
+                groups = EXT4_SB(inode->i_sb)->s_groups_count;
+        if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
+                gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
+        /* bitmaps and block group descriptor blocks */
+        ret += groups + gdpblocks;
+        /* Blocks for super block, inode, quota and xattr blocks */
+        ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
+        return ret;
+}
+/*
+ * Calulate the total number of credits to reserve to fit
+ * the modification of a single pages into a single transaction,
+ * which may include multiple chunks of block allocations.
 *
- * If the inode's direct blocks can hold an integral number of pages then a
+ * This could be called via ext4_write_begin()
- * page cannot straddle two indirect blocks, and we can only touch one indirect
- * and dindirect block, and the "5" above becomes "3".
 *
- * This still overestimates under most circumstances.  If we were to pass the
+ * We need to consider the worse case, when
- * start and end offsets in here as well we could do block_to_path() on each
+ * one new block per extent.
- * block and work out the exact number of indirects which are touched.  Pah.
 */
 int ext4_writepage_trans_blocks(struct inode *inode)
 {
        int bpp = ext4_journal_blocks_per_page(inode);
-        int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
        int ret;
-        if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+        ret = ext4_meta_trans_blocks(inode, bpp, 0);
-                return ext4_ext_writepage_trans_blocks(inode, bpp);
+        /* Account for data blocks for journalled mode */
        if (ext4_should_journal_data(inode))
-                ret = 3 * (bpp + indirects) + 2;
+                ret += bpp;
-        else
-                ret = 2 * (bpp + indirects) + 2;
-#ifdef CONFIG_QUOTA
-        /* We know that structure was already allocated during DQUOT_INIT so
-         * we will be updating only the data blocks + inodes */
-        ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
-#endif
        return ret;
 }
 /*
+ * Calculate the journal credits for a chunk of data modification.
+ *
+ * This is called from DIO, fallocate or whoever calling
+ * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
+ *
+ * journal buffers for data blocks are not included here, as DIO
+ * and fallocate do no need to journal data buffers.
+ */
+int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
+{
+        return ext4_meta_trans_blocks(inode, nrblocks, 1);
+}
+/*
 * The caller must have previously called ext4_reserve_inode_write().
 * Give this, we know that the caller already has write access to iloc->bh.
 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 865e9ddb44d4..e0e3a5eb1ddb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 }
 /*
+ * Return the prealloc space that have minimal distance
+ * from the goal block. @cpa is the prealloc
+ * space that is having currently known minimal distance
+ * from the goal block.
+ */
+static struct ext4_prealloc_space *
+ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
+                        struct ext4_prealloc_space *pa,
+                        struct ext4_prealloc_space *cpa)
+{
+        ext4_fsblk_t cur_distance, new_distance;
+        if (cpa == NULL) {
+                atomic_inc(&pa->pa_count);
+                return pa;
+        }
+        cur_distance = abs(goal_block - cpa->pa_pstart);
+        new_distance = abs(goal_block - pa->pa_pstart);
+        if (cur_distance < new_distance)
+                return cpa;
+        /* drop the previous reference */
+        atomic_dec(&cpa->pa_count);
+        atomic_inc(&pa->pa_count);
+        return pa;
+}
+/*
 * search goal blocks in preallocated space
 */
 static noinline_for_stack int
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        int order, i;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
-        struct ext4_prealloc_space *pa;
+        struct ext4_prealloc_space *pa, *cpa = NULL;
+        ext4_fsblk_t goal_block;
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                /* The max size of hash table is PREALLOC_TB_SIZE */
                order = PREALLOC_TB_SIZE - 1;
+        goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
+                     ac->ac_g_ex.fe_start +
+                     le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
+        /*
+         * search for the prealloc space that is having
+         * minimal distance from the goal block.
+         */
        for (i = order; i < PREALLOC_TB_SIZE; i++) {
                rcu_read_lock();
                list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                        spin_lock(&pa->pa_lock);
                        if (pa->pa_deleted == 0 &&
                                        pa->pa_free >= ac->ac_o_ex.fe_len) {
-                                atomic_inc(&pa->pa_count);
-                                ext4_mb_use_group_pa(ac, pa);
+                                cpa = ext4_mb_check_group_pa(goal_block,
-                                spin_unlock(&pa->pa_lock);
+                                                                pa, cpa);
-                                ac->ac_criteria = 20;
-                                rcu_read_unlock();
-                                return 1;
                        }
                        spin_unlock(&pa->pa_lock);
                }
                rcu_read_unlock();
        }
+        if (cpa) {
+                ext4_mb_use_group_pa(ac, cpa);
+                ac->ac_criteria = 20;
+                return 1;
+        }
        return 0;
 }
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b9e077ba07e9..46fc0b5b12ba 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
         * credit. But below we try to not accumalate too much
         * of them by restarting the journal.
         */
-        needed = ext4_ext_calc_credits_for_insert(inode, path);
+        needed = ext4_ext_calc_credits_for_single_extent(inode,
+                    lb->last_block - lb->first_block + 1, path);
        /*
         * Make sure the credit we accumalated is not really high
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 0a9265164265..b3d35604ea18 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        if (reserved_gdb || gdb_off == 0) {
                if (!EXT4_HAS_COMPAT_FEATURE(sb,
-                                             EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+                                             EXT4_FEATURE_COMPAT_RESIZE_INODE)
+                    || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
                        ext4_warning(sb, __func__,
                                     "No reserved GDT blocks, can't resize");
                        return -EPERM;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d5d77958b861..566344b926b7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 #endif
        ei->i_block_alloc_info = NULL;
        ei->vfs_inode.i_version = 1;
+        ei->vfs_inode.i_data.writeback_index = 0;
        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
        INIT_LIST_HEAD(&ei->i_prealloc_list);
        spin_lock_init(&ei->i_prealloc_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6d266d793e2c..80ff3381fa21 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait)
        struct buffer_head *bh;
        struct msdos_dir_entry *raw_entry;
        loff_t i_pos;
-        int err = 0;
+        int err;
 retry:
        i_pos = MSDOS_I(inode)->i_pos;
        if (inode->i_ino == MSDOS_ROOT_INO || !i_pos)
                return 0;
-        lock_super(sb);
        bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
        if (!bh) {
                printk(KERN_ERR "FAT: unable to read inode block "
                       "for updating (i_pos %lld)\n", i_pos);
-                err = -EIO;
+                return -EIO;
-                goto out;
        }
        spin_lock(&sbi->inode_hash_lock);
        if (i_pos != MSDOS_I(inode)->i_pos) {
                spin_unlock(&sbi->inode_hash_lock);
                brelse(bh);
-                unlock_super(sb);
                goto retry;
        }
@@ -607,11 +604,10 @@ retry:
        }
        spin_unlock(&sbi->inode_hash_lock);
        mark_buffer_dirty(bh);
+        err = 0;
        if (wait)
                err = sync_dirty_buffer(bh);
        brelse(bh);
-out:
-        unlock_super(sb);
        return err;
 }
diff --git a/fs/ioprio.c b/fs/ioprio.c
index c4a1c3c65aac..da3cc460d4df 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
                                pgrp = task_pgrp(current);
                        else
                                pgrp = find_vpid(who);
-                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
                                ret = set_task_ioprio(p, ioprio);
                                if (ret)
                                        break;
-                        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                        break;
                case IOPRIO_WHO_USER:
                        if (!who)
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
                                pgrp = task_pgrp(current);
                        else
                                pgrp = find_vpid(who);
-                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
+                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
                                tmpio = get_task_ioprio(p);
                                if (tmpio < 0)
                                        continue;
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
                                        ret = tmpio;
                                else
                                        ret = ioprio_best(ret, tmpio);
-                        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
+                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                        break;
                case IOPRIO_WHO_USER:
                        if (!who)
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 31559f45fdde..4c41db91eaa4 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -12,7 +12,6 @@
 #ifndef _JFFS2_FS_I
 #define _JFFS2_FS_I
-#include <linux/version.h>
 #include <linux/rbtree.h>
 #include <linux/posix_acl.h>
 #include <linux/mutex.h>
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9abcd2b329f7..e9b20173fef3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw,
                }
        }
+        if (errors > 0) {
+                dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n",
+                                errors, (errors == 1 ? "" : "s"));
+                if (!sloppy)
+                        return 0;
+        }
        return 1;
 out_nomem:
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index b6ed38380ab8..54b8b4140c8f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt)
         * enough space for either:
         */
        alloc = sizeof(struct posix_ace_state_array)
-                + cnt*sizeof(struct posix_ace_state);
+                + cnt*sizeof(struct posix_user_ace_state);
        state->users = kzalloc(alloc, GFP_KERNEL);
        if (!state->users)
                return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2e51adac65de..e5b51ffafc6c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        int             slack_bytes;
        __be32          status;
-        status = nfserr_resource;
-        cstate = cstate_alloc();
-        if (cstate == NULL)
-                goto out;
        resp->xbuf = &rqstp->rq_res;
        resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
        resp->tagp = resp->p;
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
        if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
                goto out;
+        status = nfserr_resource;
+        cstate = cstate_alloc();
+        if (cstate == NULL)
+                goto out;
        status = nfs_ok;
        while (!status && resp->opcnt < args->opcnt) {
                op = &args->ops[resp->opcnt++];
@@ -957,9 +957,9 @@ encode_op:
                nfsd4_increment_op_stats(op->opnum);
        }
+        cstate_free(cstate);
 out:
        nfsd4_release_compoundargs(args);
-        cstate_free(cstate);
        dprintk("nfsv4 compound returned %d\n", ntohl(status));
        return status;
 }
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index e1781c8b1650..9e8a95be7a1e 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
        // TODO: Consider moving this lot to a separate function! (AIA)
 handle_name:
   {
-        struct dentry *real_dent, *new_dent;
        MFT_RECORD *m;
        ntfs_attr_search_ctx *ctx;
        ntfs_inode *ni = NTFS_I(dent_inode);
@@ -255,93 +254,9 @@ handle_name:
        }
        nls_name.hash = full_name_hash(nls_name.name, nls_name.len);
-        /*
+        dent = d_add_ci(dent, dent_inode, &nls_name);
-         * Note: No need for dent->d_lock lock as i_mutex is held on the
-         * parent inode.
-         */
-        /* Does a dentry matching the nls_name exist already? */
-        real_dent = d_lookup(dent->d_parent, &nls_name);
-        /* If not, create it now. */
-        if (!real_dent) {
-                real_dent = d_alloc(dent->d_parent, &nls_name);
-                kfree(nls_name.name);
-                if (!real_dent) {
-                        err = -ENOMEM;
-                        goto err_out;
-                }
-                new_dent = d_splice_alias(dent_inode, real_dent);
-                if (new_dent)
-                        dput(real_dent);
-                else
-                        new_dent = real_dent;
-                ntfs_debug("Done.  (Created new dentry.)");
-                return new_dent;
-        }
        kfree(nls_name.name);
-        /* Matching dentry exists, check if it is negative. */
+        return dent;
-        if (real_dent->d_inode) {
-                if (unlikely(real_dent->d_inode != dent_inode)) {
-                        /* This can happen because bad inodes are unhashed. */
-                        BUG_ON(!is_bad_inode(dent_inode));
-                        BUG_ON(!is_bad_inode(real_dent->d_inode));
-                }
-                /*
-                 * Already have the inode and the dentry attached, decrement
-                 * the reference count to balance the ntfs_iget() we did
-                 * earlier on.  We found the dentry using d_lookup() so it
-                 * cannot be disconnected and thus we do not need to worry
-                 * about any NFS/disconnectedness issues here.
-                 */
-                iput(dent_inode);
-                ntfs_debug("Done.  (Already had inode and dentry.)");
-                return real_dent;
-        }
-        /*
-         * Negative dentry: instantiate it unless the inode is a directory and
-         * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
-         * in which case d_move() that in place of the found dentry.
-         */
-        if (!S_ISDIR(dent_inode->i_mode)) {
-                /* Not a directory; everything is easy. */
-                d_instantiate(real_dent, dent_inode);
-                ntfs_debug("Done.  (Already had negative file dentry.)");
-                return real_dent;
-        }
-        spin_lock(&dcache_lock);
-        if (list_empty(&dent_inode->i_dentry)) {
-                /*
-                 * Directory without a 'disconnected' dentry; we need to do
-                 * d_instantiate() by hand because it takes dcache_lock which
-                 * we already hold.
-                 */
-                list_add(&real_dent->d_alias, &dent_inode->i_dentry);
-                real_dent->d_inode = dent_inode;
-                spin_unlock(&dcache_lock);
-                security_d_instantiate(real_dent, dent_inode);
-                ntfs_debug("Done.  (Already had negative directory dentry.)");
-                return real_dent;
-        }
-        /*
-         * Directory with a 'disconnected' dentry; get a reference to the
-         * 'disconnected' dentry.
-         */
-        new_dent = list_entry(dent_inode->i_dentry.next, struct dentry,
-                        d_alias);
-        dget_locked(new_dent);
-        spin_unlock(&dcache_lock);
-        /* Do security vodoo. */
-        security_d_instantiate(real_dent, dent_inode);
-        /* Move new_dent in place of real_dent. */
-        d_move(new_dent, real_dent);
-        /* Balance the ntfs_iget() we did above. */
-        iput(dent_inode);
-        /* Throw away real_dent. */
-        dput(real_dent);
-        /* Use new_dent as the actual dentry. */
-        ntfs_debug("Done.  (Already had negative, disconnected directory "
-                        "dentry.)");
-        return new_dent;
 eio_err_out:
        ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk.");
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 3a8af75351e8..4087fbdac327 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -113,7 +113,7 @@ typedef struct {
 * Reason flags (32-bit).  Cumulative flags describing the change(s) to the
 * file since it was last opened.  I think the names speak for themselves but
 * if you disagree check out the descriptions in the Linux NTFS project NTFS
- * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ * documentation: http://www.linux-ntfs.org/
 */
 enum {
        USN_REASON_DATA_OVERWRITE       = const_cpu_to_le32(0x00000001),
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS;
 * Source info flags (32-bit).  Information about the source of the change(s)
 * to the file.  For detailed descriptions of what these mean, see the Linux
 * NTFS project NTFS documentation:
- *      http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html
+ *      http://www.linux-ntfs.org/
 */
 enum {
        USN_SOURCE_DATA_MANAGEMENT        = const_cpu_to_le32(0x00000001),
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index d8bfa0eb41b2..52276c02f710 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v)
                           "  message id:   %d\n"
                           "  message type: %u\n"
                           "  message key:  0x%08x\n"
-                           "  sock acquiry: %lu.%lu\n"
+                           "  sock acquiry: %lu.%ld\n"
-                           "  send start:   %lu.%lu\n"
+                           "  send start:   %lu.%ld\n"
-                           "  wait start:   %lu.%lu\n",
+                           "  wait start:   %lu.%ld\n",
                           nst, (unsigned long)nst->st_task->pid,
                           (unsigned long)nst->st_task->tgid,
                           nst->st_task->comm, nst->st_node,
                           nst->st_sc, nst->st_id, nst->st_msg_type,
                           nst->st_msg_key,
                           nst->st_sock_time.tv_sec,
-                           (unsigned long)nst->st_sock_time.tv_usec,
+                           (long)nst->st_sock_time.tv_usec,
                           nst->st_send_time.tv_sec,
-                           (unsigned long)nst->st_send_time.tv_usec,
+                           (long)nst->st_send_time.tv_usec,
                           nst->st_status_time.tv_sec,
-                           nst->st_status_time.tv_usec);
+                           (long)nst->st_status_time.tv_usec);
        }
        spin_unlock(&o2net_debug_lock);
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        return sc; /* unused, just needs to be null when done */
 }
-#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec
+#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec
 static int sc_seq_show(struct seq_file *seq, void *v)
 {
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v)
                           "  remote node:     %s\n"
                           "  page off:        %zu\n"
                           "  handshake ok:    %u\n"
-                           "  timer:           %lu.%lu\n"
+                           "  timer:           %lu.%ld\n"
-                           "  data ready:      %lu.%lu\n"
+                           "  data ready:      %lu.%ld\n"
-                           "  advance start:   %lu.%lu\n"
+                           "  advance start:   %lu.%ld\n"
-                           "  advance stop:    %lu.%lu\n"
+                           "  advance stop:    %lu.%ld\n"
-                           "  func start:      %lu.%lu\n"
+                           "  func start:      %lu.%ld\n"
-                           "  func stop:       %lu.%lu\n"
+                           "  func stop:       %lu.%ld\n"
                           "  func key:        %u\n"
                           "  func type:       %u\n",
                           sc,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index a27d61581bd6..2bcf706d9dd3 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
 static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
 #ifdef CONFIG_DEBUG_FS
-void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-                    u32 msgkey, struct task_struct *task, u8 node)
+                           u32 msgkey, struct task_struct *task, u8 node)
 {
        INIT_LIST_HEAD(&nst->st_net_debug_item);
        nst->st_task = task;
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
        nst->st_node = node;
 }
-void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
 {
        do_gettimeofday(&nst->st_sock_time);
 }
-void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
 {
        do_gettimeofday(&nst->st_send_time);
 }
-void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
 {
        do_gettimeofday(&nst->st_status_time);
 }
-void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
                                         struct o2net_sock_container *sc)
 {
        nst->st_sc = sc;
 }
-void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
 {
        nst->st_id = msg_id;
 }
+#else  /* CONFIG_DEBUG_FS */
+static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+                                  u32 msgkey, struct task_struct *task, u8 node)
+{
+}
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+{
+}
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+{
+}
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+{
+}
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+                                                struct o2net_sock_container *sc)
+{
+}
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+                                        u32 msg_id)
+{
+}
 #endif /* CONFIG_DEBUG_FS */
 static inline int o2net_reconnect_delay(void)
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 18307ff81b77..8d58cfe410b1 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -224,42 +224,10 @@ struct o2net_send_tracking {
        struct timeval                  st_send_time;
        struct timeval                  st_status_time;
 };
-void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-                    u32 msgkey, struct task_struct *task, u8 node);
-void o2net_set_nst_sock_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_send_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_status_time(struct o2net_send_tracking *nst);
-void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-                                  struct o2net_sock_container *sc);
-void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id);
 #else
 struct o2net_send_tracking {
        u32     dummy;
 };
-static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-                                  u32 msgkey, struct task_struct *task, u8 node)
-{
-}
-static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
-{
-}
-static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
-                                                struct o2net_sock_container *sc)
-{
-}
-static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
-                                        u32 msg_id)
-{
-}
 #endif  /* CONFIG_DEBUG_FS */
 #endif /* O2CLUSTER_TCP_INTERNAL_H */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8a1875848080..9cce563fd627 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
        di->i_size = cpu_to_le64(sb->s_blocksize);
        di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
        di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
-        dir->i_blocks = ocfs2_inode_sector_count(dir);
        /*
         * This should never fail as our extent list is empty and all
@@ -1310,9 +1309,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
                                  NULL);
        if (ret) {
                mlog_errno(ret);
-                goto out;
+                goto out_commit;
        }
+        /*
+         * Set i_blocks after the extent insert for the most up to
+         * date ip_clusters value.
+         */
+        dir->i_blocks = ocfs2_inode_sector_count(dir);
        ret = ocfs2_journal_dirty(handle, di_bh);
        if (ret) {
                mlog_errno(ret);
@@ -1336,7 +1341,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
                                          len, 0, NULL);
                if (ret) {
                        mlog_errno(ret);
-                        goto out;
+                        goto out_commit;
                }
        }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 7a37240f7a31..c47bc2a809c2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1418,13 +1418,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
 {
        unsigned int node_num;
        int status, i;
+        u32 gen;
        struct buffer_head *bh = NULL;
        struct ocfs2_dinode *di;
        /* This is called with the super block cluster lock, so we
         * know that the slot map can't change underneath us. */
-        spin_lock(&osb->osb_lock);
        for (i = 0; i < osb->max_slots; i++) {
                /* Read journal inode to get the recovery generation */
                status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
@@ -1433,23 +1433,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
                        goto bail;
                }
                di = (struct ocfs2_dinode *)bh->b_data;
-                osb->slot_recovery_generations[i] =
+                gen = ocfs2_get_recovery_generation(di);
-                                        ocfs2_get_recovery_generation(di);
                brelse(bh);
                bh = NULL;
+                spin_lock(&osb->osb_lock);
+                osb->slot_recovery_generations[i] = gen;
                mlog(0, "Slot %u recovery generation is %u\n", i,
                     osb->slot_recovery_generations[i]);
-                if (i == osb->slot_num)
+                if (i == osb->slot_num) {
+                        spin_unlock(&osb->osb_lock);
                        continue;
+                }
                status = ocfs2_slot_to_node_num_locked(osb, i, &node_num);
-                if (status == -ENOENT)
+                if (status == -ENOENT) {
+                        spin_unlock(&osb->osb_lock);
                        continue;
+                }
-                if (__ocfs2_recovery_map_test(osb, node_num))
+                if (__ocfs2_recovery_map_test(osb, node_num)) {
+                        spin_unlock(&osb->osb_lock);
                        continue;
+                }
                spin_unlock(&osb->osb_lock);
                /* Ok, we have a slot occupied by another node which
@@ -1465,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
                        mlog_errno(status);
                        goto bail;
                }
-                spin_lock(&osb->osb_lock);
        }
-        spin_unlock(&osb->osb_lock);
        status = 0;
 bail:
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 10e149ae5e3a..07f348b8d721 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name,
                goto out;
        }
-        /* Ok, the stack is pinned */
-        p->sp_count++;
        active_stack = p;
        rc = 0;
 out:
+        /* If we found it, pin it */
+        if (!rc)
+                active_stack->sp_count++;
        spin_unlock(&ocfs2_stack_lock);
        return rc;
 }
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0d6eb33597c6..71c9be59c9c2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
        return 0;
 }
-/*
- * Use precise platform statistics if available:
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-static cputime_t task_utime(struct task_struct *p)
-{
-        return p->utime;
-}
-static cputime_t task_stime(struct task_struct *p)
-{
-        return p->stime;
-}
-#else
-static cputime_t task_utime(struct task_struct *p)
-{
-        clock_t utime = cputime_to_clock_t(p->utime),
-                total = utime + cputime_to_clock_t(p->stime);
-        u64 temp;
-        /*
-         * Use CFS's precise accounting:
-         */
-        temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
-        if (total) {
-                temp *= utime;
-                do_div(temp, total);
-        }
-        utime = (clock_t)temp;
-        p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
-        return p->prev_utime;
-}
-static cputime_t task_stime(struct task_struct *p)
-{
-        clock_t stime;
-        /*
-         * Use CFS's precise accounting. (we subtract utime from
-         * the total, to make sure the total observed by userspace
-         * grows monotonically - apps rely on that):
-         */
-        stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
-                        cputime_to_clock_t(task_utime(p));
-        if (stime >= 0)
-                p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
-        return p->prev_stime;
-}
-#endif
-static cputime_t task_gtime(struct task_struct *p)
-{
-        return p->gtime;
-}
 static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                        struct pid *pid, struct task_struct *task, int whole)
 {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 4fb81e9c94e3..bca0f81eb687 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -330,6 +330,7 @@ retry:
                spin_lock(&proc_inum_lock);
                ida_remove(&proc_inum_ida, i);
                spin_unlock(&proc_inum_lock);
+                return 0;
        }
        return PROC_DYNAMIC_FIRST + i;
 }
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 79ecd281d2cb..3f87d2632947 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
        }
        seq_printf(m,
-                   "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+                   "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
                   vma->vm_start,
                   vma->vm_end,
                   flags & VM_READ ? 'r' : '-',
                   flags & VM_WRITE ? 'w' : '-',
                   flags & VM_EXEC ? 'x' : '-',
                   flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
-                   vma->vm_pgoff << PAGE_SHIFT,
+                   ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
                   MAJOR(dev), MINOR(dev), ino, &len);
        if (file) {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ded969862960..00f10a2dcf12 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -24,6 +24,7 @@
 #include <linux/tty.h>
 #include <linux/string.h>
 #include <linux/mman.h>
+#include <linux/quicklist.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/mm.h>
@@ -189,7 +190,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
                "Committed_AS: %8lu kB\n"
                "VmallocTotal: %8lu kB\n"
                "VmallocUsed:  %8lu kB\n"
-                "VmallocChunk: %8lu kB\n",
+                "VmallocChunk: %8lu kB\n"
+                "Quicklists:   %8lu kB\n",
                K(i.totalram),
                K(i.freeram),
                K(i.bufferram),
@@ -221,7 +223,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
                K(committed),
                (unsigned long)VMALLOC_TOTAL >> 10,
                vmi.used >> 10,
-                vmi.largest_chunk >> 10
+                vmi.largest_chunk >> 10,
+                K(quicklist_total_size())
                );
                len += hugetlb_report_meminfo(page + len);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7546a918f790..73d1891ee625 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v)
                ino = inode->i_ino;
        }
-        seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+        seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
                        vma->vm_start,
                        vma->vm_end,
                        flags & VM_READ ? 'r' : '-',
                        flags & VM_WRITE ? 'w' : '-',
                        flags & VM_EXEC ? 'x' : '-',
                        flags & VM_MAYSHARE ? 's' : 'p',
-                        vma->vm_pgoff << PAGE_SHIFT,
+                        ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
                        MAJOR(dev), MINOR(dev), ino, &len);
        /*
diff --git a/fs/readdir.c b/fs/readdir.c
index 4e026e5407fb..93a7559bbfd8 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset
        if (buf->result)
                return -EINVAL;
        d_ino = ino;
-        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+                buf->result = -EOVERFLOW;
                return -EOVERFLOW;
+        }
        buf->result++;
        dirent = buf->dirent;
        if (!access_ok(VERIFY_WRITE, dirent,
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
        if (reclen > buf->count)
                return -EINVAL;
        d_ino = ino;
-        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino)
+        if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
+                buf->error = -EOVERFLOW;
                return -EOVERFLOW;
+        }
        dirent = buf->previous;
        if (dirent) {
                if (__put_user(offset, &dirent->d_off))
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 5d54205e486b..bd20f7f5a933 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
                        goto Done;
        }
        /* we need at least one record in buffer */
+        pos = m->index;
+        p = m->op->start(m, &pos);
        while (1) {
-                pos = m->index;
-                p = m->op->start(m, &pos);
                err = PTR_ERR(p);
                if (!p || IS_ERR(p))
                        break;
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
                        break;
                if (unlikely(err))
                        m->count = 0;
+                if (unlikely(!m->count)) {
+                        p = m->op->next(m, p, &pos);
+                        m->index = pos;
+                        continue;
+                }
                if (m->count < m->size)
                        goto Fill;
                m->op->stop(m, p);
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
                        goto Enomem;
                m->count = 0;
                m->version = 0;
+                pos = m->index;
+                p = m->op->start(m, &pos);
        }
        m->op->stop(m, p);
        m->count = 0;
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 154098157473..73db464cd08b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
        int subtract_lebs;
        long long available;
-        /*
-         * Force the amount available to the total size reported if the used
-         * space is zero.
-         */
-        if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
-            c->budg_data_growth + c->budg_dd_growth == 0) {
-                /* Do the same calculation as for c->block_cnt */
-                available = c->main_lebs - 2;
-                available *= c->leb_size - c->dark_wm;
-                return available;
-        }
        available = c->main_bytes - c->lst.total_used;
        /*
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
 }
 /**
- * ubifs_budg_get_free_space - return amount of free space.
+ * ubifs_reported_space - calculate reported free space.
+ * @c: the UBIFS file-system description object
+ * @free: amount of free space
+ *
+ * This function calculates amount of free space which will be reported to
+ * user-space. User-space application tend to expect that if the file-system
+ * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
+ * are able to write a file of size N. UBIFS attaches node headers to each data
+ * node and it has to write indexind nodes as well. This introduces additional
+ * overhead, and UBIFS it has to report sligtly less free space to meet the
+ * above expectetion.
+ *
+ * This function assumes free space is made up of uncompressed data nodes and
+ * full index nodes (one per data node, tripled because we always allow enough
+ * space to write the index thrice).
+ *
+ * Note, the calculation is pessimistic, which means that most of the time
+ * UBIFS reports less space than it actually has.
+ */
+long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
+{
+        int divisor, factor, f;
+        /*
+         * Reported space size is @free * X, where X is UBIFS block size
+         * divided by UBIFS block size + all overhead one data block
+         * introduces. The overhead is the node header + indexing overhead.
+         *
+         * Indexing overhead calculations are based on the following formula:
+         * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
+         * of data nodes, f - fanout. Because effective UBIFS fanout is twice
+         * as less than maximum fanout, we assume that each data node
+         * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
+         * Note, the multiplier 3 is because UBIFS reseves thrice as more space
+         * for the index.
+         */
+        f = c->fanout > 3 ? c->fanout >> 1 : 2;
+        factor = UBIFS_BLOCK_SIZE;
+        divisor = UBIFS_MAX_DATA_NODE_SZ;
+        divisor += (c->max_idx_node_sz * 3) / (f - 1);
+        free *= factor;
+        do_div(free, divisor);
+        return free;
+}
+/**
+ * ubifs_get_free_space - return amount of free space.
 * @c: UBIFS file-system description object
 *
- * This function returns amount of free space on the file-system.
+ * This function calculates amount of free space to report to user-space.
+ *
+ * Because UBIFS may introduce substantial overhead (the index, node headers,
+ * alighment, wastage at the end of eraseblocks, etc), it cannot report real
+ * amount of free flash space it has (well, because not all dirty space is
+ * reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
+ * it would bread user expectetion about what free space is. Users seem to
+ * accustomed to assume that if the file-system reports N bytes of free space,
+ * they would be able to fit a file of N bytes to the FS. This almost works for
+ * traditional file-systems, because they have way less overhead than UBIFS.
+ * So, to keep users happy, UBIFS tries to take the overhead into account.
 */
-long long ubifs_budg_get_free_space(struct ubifs_info *c)
+long long ubifs_get_free_space(struct ubifs_info *c)
 {
-        int min_idx_lebs, rsvd_idx_lebs;
+        int min_idx_lebs, rsvd_idx_lebs, lebs;
        long long available, outstanding, free;
-        /* Do exactly the same calculations as in 'do_budget_space()' */
        spin_lock(&c->space_lock);
        min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+        outstanding = c->budg_data_growth + c->budg_dd_growth;
-        if (min_idx_lebs > c->lst.idx_lebs)
+        /*
-                rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
+         * Force the amount available to the total size reported if the used
-        else
+         * space is zero.
-                rsvd_idx_lebs = 0;
+         */
+        if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
-        if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
-                                - c->lst.taken_empty_lebs) {
                spin_unlock(&c->space_lock);
-                return 0;
+                return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
        }
        available = ubifs_calc_available(c, min_idx_lebs);
-        outstanding = c->budg_data_growth + c->budg_dd_growth;
-        c->min_idx_lebs = min_idx_lebs;
+        /*
+         * When reporting free space to user-space, UBIFS guarantees that it is
+         * possible to write a file of free space size. This means that for
+         * empty LEBs we may use more precise calculations than
+         * 'ubifs_calc_available()' is using. Namely, we know that in empty
+         * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
+         * Thus, amend the available space.
+         *
+         * Note, the calculations below are similar to what we have in
+         * 'do_budget_space()', so refer there for comments.
+         */
+        if (min_idx_lebs > c->lst.idx_lebs)
+                rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
+        else
+                rsvd_idx_lebs = 0;
+        lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
+               c->lst.taken_empty_lebs;
+        lebs -= rsvd_idx_lebs;
+        available += lebs * (c->dark_wm - c->leb_overhead);
        spin_unlock(&c->space_lock);
        if (available > outstanding)
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 5c96f1fb7016..2b267c9a1806 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
        if (err) {
                if (err != -ENOSPC)
                        return err;
-                err = 0;
                budgeted = 0;
        }
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 4071d1cae29f..3d698e2022b1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
        int err;
        struct ubifs_budget_req req;
        loff_t old_size = inode->i_size, new_size = attr->ia_size;
-        int offset = new_size & (UBIFS_BLOCK_SIZE - 1);
+        int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
        struct ubifs_inode *ui = ubifs_inode(inode);
        dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
        /* A funny way to budget for truncation node */
        req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
        err = ubifs_budget_space(c, &req);
-        if (err)
+        if (err) {
-                return err;
+                /*
+                 * Treat truncations to zero as deletion and always allow them,
+                 * just like we do for '->unlink()'.
+                 */
+                if (new_size || err != -ENOSPC)
+                        return err;
+                budgeted = 0;
+        }
        err = vmtruncate(inode, new_size);
        if (err)
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
        err = ubifs_jnl_truncate(c, inode, old_size, new_size);
        mutex_unlock(&ui->ui_mutex);
 out_budg:
-        ubifs_release_budget(c, &req);
+        if (budgeted)
+                ubifs_release_budget(c, &req);
+        else {
+                c->nospace = c->nospace_rp = 0;
+                smp_wmb();
+        }
        return err;
 }
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index adee7b5ddeab..e045c8b55423 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
 * or do not have an LEB which satisfies the @min_space criteria.
 *
- * Note:
+ * Note, LEBs which have less than dead watermark of free + dirty space are
- *   o LEBs which have less than dead watermark of dirty space are never picked
+ * never picked by this function.
- *   by this function;
- *
- * Returns zero and the LEB properties of
- * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
- * negative error code in case of other failures. The returned LEB is marked as
- * "taken".
 *
 * The additional @pick_free argument controls if this function has to return a
 * free or freeable LEB if one is present. For example, GC must to set it to %1,
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
 *
 * In addition @pick_free is set to %2 by the recovery process in order to
 * recover gc_lnum in which case an index LEB must not be returned.
+ *
+ * This function returns zero and the LEB properties of found dirty LEB in case
+ * of success, %-ENOSPC if no dirty LEB was found and a negative error code in
+ * case of other failures. The returned LEB is marked as "taken".
 */
 int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
                         int min_space, int pick_free)
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
                int lebs, rsvd_idx_lebs = 0;
                spin_lock(&c->space_lock);
-                lebs = c->lst.empty_lebs;
+                lebs = c->lst.empty_lebs + c->idx_gc_cnt;
                lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
                /*
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
                lp = idx_lp;
        if (lp) {
-                ubifs_assert(lp->dirty >= c->dead_wm);
+                ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
                goto found;
        }
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac29081..13f1019c859f 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
                if (err)
                        goto out;
+                /* Allow for races with TNC */
+                c->gced_lnum = lnum;
+                smp_wmb();
+                c->gc_seq += 1;
+                smp_wmb();
                if (c->gc_lnum == -1) {
                        c->gc_lnum = lnum;
                        err = LEB_RETAINED;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 87dabf9fe742..4c12a9215d7f 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
 }
 /**
- * ubifs_reported_space - calculate reported free space.
- * @c: the UBIFS file-system description object
- * @free: amount of free space
- *
- * This function calculates amount of free space which will be reported to
- * user-space. User-space application tend to expect that if the file-system
- * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
- * are able to write a file of size N. UBIFS attaches node headers to each data
- * node and it has to write indexind nodes as well. This introduces additional
- * overhead, and UBIFS it has to report sligtly less free space to meet the
- * above expectetion.
- *
- * This function assumes free space is made up of uncompressed data nodes and
- * full index nodes (one per data node, doubled because we always allow enough
- * space to write the index twice).
- *
- * Note, the calculation is pessimistic, which means that most of the time
- * UBIFS reports less space than it actually has.
- */
-static inline long long ubifs_reported_space(const struct ubifs_info *c,
-                                             uint64_t free)
-{
-        int divisor, factor;
-        divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
-        factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
-        do_div(free, divisor);
-        return free * factor;
-}
-/**
 * ubifs_current_time - round current time to time granularity.
 * @inode: inode
 */
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
                current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
 }
+/**
+ * ubifs_tnc_lookup - look up a file-system node.
+ * @c: UBIFS file-system description object
+ * @key: node key to lookup
+ * @node: the node is returned here
+ *
+ * This function look up and reads node with key @key. The caller has to make
+ * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * of success, %-ENOENT if the node was not found, and a negative error code in
+ * case of failure.
+ */
+static inline int ubifs_tnc_lookup(struct ubifs_info *c,
+                                   const union ubifs_key *key, void *node)
+{
+        return ubifs_tnc_locate(c, key, node, NULL, NULL);
+}
 #endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f71e6b8822c4..7562464ac83f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct ubifs_info *c = dentry->d_sb->s_fs_info;
        unsigned long long free;
+        __le32 *uuid = (__le32 *)c->uuid;
-        free = ubifs_budg_get_free_space(c);
+        free = ubifs_get_free_space(c);
        dbg_gen("free space %lld bytes (%lld blocks)",
                free, free >> UBIFS_BLOCK_SHIFT);
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_files = 0;
        buf->f_ffree = 0;
        buf->f_namelen = UBIFS_MAX_NLEN;
+        buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
+        buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
        return 0;
 }
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c)
        c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
        c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
+        /*
+         * Calculate how many bytes would be wasted at the end of LEB if it was
+         * fully filled with data nodes of maximum size. This is used in
+         * calculations when reporting free space.
+         */
+        c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
        return 0;
 }
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c)
         * internally because it does not make much sense for UBIFS, but it is
         * necessary to report something for the 'statfs()' call.
         *
-         * Subtract the LEB reserved for GC and the LEB which is reserved for
+         * Subtract the LEB reserved for GC, the LEB which is reserved for
-         * deletions.
+         * deletions, and assume only one journal head is available.
-         *
-         * Review 'ubifs_calc_available()' if changing this calculation.
         */
-        tmp64 = c->main_lebs - 2;
+        tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
-        tmp64 *= (uint64_t)c->leb_size - c->dark_wm;
+        tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
        tmp64 = ubifs_reported_space(c, tmp64);
        c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e909f4a96443..7da209ab9378 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
                if (keys_cmp(c, key, &node_key) != 0)
                        ret = 0;
        }
-        if (ret == 0)
+        if (ret == 0 && c->replaying)
                dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
                        zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
        return ret;
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
 }
 /**
- * ubifs_tnc_lookup - look up a file-system node.
+ * maybe_leb_gced - determine if a LEB may have been garbage collected.
 * @c: UBIFS file-system description object
- * @key: node key to lookup
+ * @lnum: LEB number
- * @node: the node is returned here
+ * @gc_seq1: garbage collection sequence number
 *
- * This function look up and reads node with key @key. The caller has to make
+ * This function determines if @lnum may have been garbage collected since
- * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
- * of success, %-ENOENT if the node was not found, and a negative error code in
+ * %0 is returned.
- * case of failure.
 */
-int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
+static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
-                     void *node)
 {
-        int found, n, err;
+        int gc_seq2, gced_lnum;
-        struct ubifs_znode *znode;
-        struct ubifs_zbranch zbr, *zt;
-        mutex_lock(&c->tnc_mutex);
+        gced_lnum = c->gced_lnum;
-        found = ubifs_lookup_level0(c, key, &znode, &n);
+        smp_rmb();
-        if (!found) {
+        gc_seq2 = c->gc_seq;
-                err = -ENOENT;
+        /* Same seq means no GC */
-                goto out;
+        if (gc_seq1 == gc_seq2)
-        } else if (found < 0) {
+                return 0;
-                err = found;
+        /* Different by more than 1 means we don't know */
-                goto out;
+        if (gc_seq1 + 1 != gc_seq2)
-        }
+                return 1;
-        zt = &znode->zbranch[n];
+        /*
-        if (is_hash_key(c, key)) {
+         * We have seen the sequence number has increased by 1. Now we need to
-                /*
+         * be sure we read the right LEB number, so read it again.
-                 * In this case the leaf node cache gets used, so we pass the
+         */
-                 * address of the zbranch and keep the mutex locked
+        smp_rmb();
-                 */
+        if (gced_lnum != c->gced_lnum)
-                err = tnc_read_node_nm(c, zt, node);
+                return 1;
-                goto out;
+        /* Finally we can check lnum */
-        }
+        if (gced_lnum == lnum)
-        zbr = znode->zbranch[n];
+                return 1;
-        mutex_unlock(&c->tnc_mutex);
+        return 0;
-        err = ubifs_tnc_read_node(c, &zbr, node);
-        return err;
-out:
-        mutex_unlock(&c->tnc_mutex);
-        return err;
 }
 /**
@@ -1436,16 +1425,19 @@ out:
 * @lnum: LEB number is returned here
 * @offs: offset is returned here
 *
- * This function is the same as 'ubifs_tnc_lookup()' but it returns the node
+ * This function look up and reads node with key @key. The caller has to make
- * location also. See 'ubifs_tnc_lookup()'.
+ * sure the @node buffer is large enough to fit the node. Returns zero in case
+ * of success, %-ENOENT if the node was not found, and a negative error code in
+ * case of failure. The node location can be returned in @lnum and @offs.
 */
 int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
                     void *node, int *lnum, int *offs)
 {
-        int found, n, err;
+        int found, n, err, safely = 0, gc_seq1;
        struct ubifs_znode *znode;
        struct ubifs_zbranch zbr, *zt;
+again:
        mutex_lock(&c->tnc_mutex);
        found = ubifs_lookup_level0(c, key, &znode, &n);
        if (!found) {
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
                goto out;
        }
        zt = &znode->zbranch[n];
+        if (lnum) {
+                *lnum = zt->lnum;
+                *offs = zt->offs;
+        }
        if (is_hash_key(c, key)) {
                /*
                 * In this case the leaf node cache gets used, so we pass the
                 * address of the zbranch and keep the mutex locked
                 */
-                *lnum = zt->lnum;
-                *offs = zt->offs;
                err = tnc_read_node_nm(c, zt, node);
                goto out;
        }
+        if (safely) {
+                err = ubifs_tnc_read_node(c, zt, node);
+                goto out;
+        }
+        /* Drop the TNC mutex prematurely and race with garbage collection */
        zbr = znode->zbranch[n];
+        gc_seq1 = c->gc_seq;
        mutex_unlock(&c->tnc_mutex);
-        *lnum = zbr.lnum;
+        if (ubifs_get_wbuf(c, zbr.lnum)) {
-        *offs = zbr.offs;
+                /* We do not GC journal heads */
+                err = ubifs_tnc_read_node(c, &zbr, node);
+                return err;
+        }
-        err = ubifs_tnc_read_node(c, &zbr, node);
+        err = fallible_read_node(c, key, &zbr, node);
-        return err;
+        if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
+                /*
+                 * The node may have been GC'ed out from under us so try again
+                 * while keeping the TNC mutex locked.
+                 */
+                safely = 1;
+                goto again;
+        }
+        return 0;
 out:
        mutex_unlock(&c->tnc_mutex);
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
 {
        int found, n, err;
        struct ubifs_znode *znode;
-        struct ubifs_zbranch zbr;
        dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
        mutex_lock(&c->tnc_mutex);
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
                goto out_unlock;
        }
-        zbr = znode->zbranch[n];
+        err = tnc_read_node_nm(c, &znode->zbranch[n], node);
-        mutex_unlock(&c->tnc_mutex);
-        err = tnc_read_node_nm(c, &zbr, node);
-        return err;
 out_unlock:
        mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index bd2121f3426e..a9ecbd9af20d 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -87,7 +87,7 @@
 #define UBIFS_SK_LEN 8
 /* Minimum index tree fanout */
-#define UBIFS_MIN_FANOUT 2
+#define UBIFS_MIN_FANOUT 3
 /* Maximum number of levels in UBIFS indexing B-tree */
 #define UBIFS_MAX_LEVELS 512
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d7f706f7a302..17c620b93eec 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -995,6 +995,9 @@ struct ubifs_mount_opts {
 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
 * @max_inode_sz: maximum possible inode size in bytes
 * @max_znode_sz: size of znode in bytes
+ *
+ * @leb_overhead: how many bytes are wasted in an LEB when it is filled with
+ *                data nodes of maximum size - used in free space reporting
 * @dead_wm: LEB dead space watermark
 * @dark_wm: LEB dark space watermark
 * @block_cnt: count of 4KiB blocks on the FS
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts {
 * @sbuf: a buffer of LEB size used by GC and replay for scanning
 * @idx_gc: list of index LEBs that have been garbage collected
 * @idx_gc_cnt: number of elements on the idx_gc list
+ * @gc_seq: incremented for every non-index LEB garbage collected
+ * @gced_lnum: last non-index LEB that was garbage collected
 *
 * @infos_list: links all 'ubifs_info' objects
 * @umount_mutex: serializes shrinker and un-mount
@@ -1224,6 +1229,8 @@ struct ubifs_info {
        int max_idx_node_sz;
        long long max_inode_sz;
        int max_znode_sz;
+        int leb_overhead;
        int dead_wm;
        int dark_wm;
        int block_cnt;
@@ -1257,6 +1264,8 @@ struct ubifs_info {
        void *sbuf;
        struct list_head idx_gc;
        int idx_gc_cnt;
+        volatile int gc_seq;
+        volatile int gced_lnum;
        struct list_head infos_list;
        struct mutex umount_mutex;
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
                                struct ubifs_budget_req *req);
 void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
                         struct ubifs_budget_req *req);
-long long ubifs_budg_get_free_space(struct ubifs_info *c);
+long long ubifs_get_free_space(struct ubifs_info *c);
 int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
 void ubifs_convert_page_budget(struct ubifs_info *c);
+long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
 long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
 /* find.c */
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
 /* tnc.c */
 int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
                        struct ubifs_znode **zn, int *n);
-int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
-                     void *node);
 int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
                        void *node, const struct qstr *nm);
 int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 5f60363b9343..5311c1acdd40 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = {
 const struct file_operations xfs_dir_file_operations = {
        .read           = generic_read_dir,
        .readdir        = xfs_file_readdir,
+        .llseek         = generic_file_llseek,
        .unlocked_ioctl = xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = xfs_file_compat_ioctl,
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 91bcd979242c..095d271f3434 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -355,7 +355,7 @@ xfs_vn_ci_lookup(
        /* else case-insensitive match... */
        dname.name = ci_name.name;
        dname.len = ci_name.len;
-        dentry = d_add_ci(VFS_I(ip), dentry, &dname);
+        dentry = d_add_ci(dentry, VFS_I(ip), &dname);
        kmem_free(ci_name.name);
        return dentry;
 }
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index cdc2d3464a1a..2813cdd72375 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -18,7 +18,6 @@
 #ifndef __XFS_DMAPI_H__
 #define __XFS_DMAPI_H__
-#include <linux/version.h>
 /*      Values used to define the on-disk version of dm_attrname_t. All
 *      on-disk attribute names start with the 8-byte string "SGI_DMI_".
 *