Merge 3.10-rc5 into char-misc-next

author: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2013-06-09 01:34:53 -0400
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2013-06-09 01:34:53 -0400
commit: 38a4671cad3f0d277cf48445b49e42a475ebfb6a (patch)
tree: 7348f3dd5f4e1bec39758dcff748a9dcbbf3430c /fs
parent: d652f7022b359afd5d34fc9fffd71df118521ead (diff)
parent: 317ddd256b9c24b0d78fa8018f80f1e495481a10 (diff)
51 files changed, 916 insertions, 448 deletions
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8615ee89ab55..f95dddced968 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -265,8 +265,8 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                result = filldir(dirent, keybuf, keysize, filp->f_pos,
                                 (ino_t) value, d_type);
        }
+        if (!result)
-        filp->f_pos++;
+                filp->f_pos++;
        befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 8e33ec65847b..58df174deb10 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/vfs.h>
 #include <linux/fs.h>
+#include <linux/inet.h>
 #include "cifsglob.h"
 #include "cifsproto.h"
 #include "cifsfs.h"
@@ -48,58 +49,74 @@ void cifs_dfs_release_automount_timer(void)
 }
 /**
- * cifs_get_share_name  -       extracts share name from UNC
+ * cifs_build_devname - build a devicename from a UNC and optional prepath
- * @node_name:  pointer to UNC string
+ * @nodename:   pointer to UNC string
+ * @prepath:    pointer to prefixpath (or NULL if there isn't one)
 *
- * Extracts sharename form full UNC.
+ * Build a new cifs devicename after chasing a DFS referral. Allocate a buffer
- * i.e. strips from UNC trailing path that is not part of share
+ * big enough to hold the final thing. Copy the UNC from the nodename, and
- * name and fixup missing '\' in the beginning of DFS node refferal
+ * concatenate the prepath onto the end of it if there is one.
- * if necessary.
+ *
- * Returns pointer to share name on success or ERR_PTR on error.
+ * Returns pointer to the built string, or a ERR_PTR. Caller is responsible
- * Caller is responsible for freeing returned string.
+ * for freeing the returned string.
 */
-static char *cifs_get_share_name(const char *node_name)
+static char *
+cifs_build_devname(char *nodename, const char *prepath)
 {
-        int len;
+        size_t pplen;
-        char *UNC;
+        size_t unclen;
-        char *pSep;
+        char *dev;
+        char *pos;
-        len = strlen(node_name);
-        UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */,
+        /* skip over any preceding delimiters */
-                         GFP_KERNEL);
+        nodename += strspn(nodename, "\\");
-        if (!UNC)
+        if (!*nodename)
-                return ERR_PTR(-ENOMEM);
+                return ERR_PTR(-EINVAL);
-        /* get share name and server name */
+        /* get length of UNC and set pos to last char */
-        if (node_name[1] != '\\') {
+        unclen = strlen(nodename);
-                UNC[0] = '\\';
+        pos = nodename + unclen - 1;
-                strncpy(UNC+1, node_name, len);
-                len++;
-                UNC[len] = 0;
-        } else {
-                strncpy(UNC, node_name, len);
-                UNC[len] = 0;
-        }
-        /* find server name end */
+        /* trim off any trailing delimiters */
-        pSep = memchr(UNC+2, '\\', len-2);
+        while (*pos == '\\') {
-        if (!pSep) {
+                --pos;
-                cifs_dbg(VFS, "%s: no server name end in node name: %s\n",
+                --unclen;
-                         __func__, node_name);
-                kfree(UNC);
-                return ERR_PTR(-EINVAL);
        }
-        /* find sharename end */
+        /* allocate a buffer:
-        pSep++;
+         * +2 for preceding "//"
-        pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC));
+         * +1 for delimiter between UNC and prepath
-        if (pSep) {
+         * +1 for trailing NULL
-                /* trim path up to sharename end
+         */
-                 * now we have share name in UNC */
+        pplen = prepath ? strlen(prepath) : 0;
-                *pSep = 0;
+        dev = kmalloc(2 + unclen + 1 + pplen + 1, GFP_KERNEL);
+        if (!dev)
+                return ERR_PTR(-ENOMEM);
+        pos = dev;
+        /* add the initial "//" */
+        *pos = '/';
+        ++pos;
+        *pos = '/';
+        ++pos;
+        /* copy in the UNC portion from referral */
+        memcpy(pos, nodename, unclen);
+        pos += unclen;
+        /* copy the prefixpath remainder (if there is one) */
+        if (pplen) {
+                *pos = '/';
+                ++pos;
+                memcpy(pos, prepath, pplen);
+                pos += pplen;
        }
-        return UNC;
+        /* NULL terminator */
+        *pos = '\0';
+        convert_delimiter(dev, '/');
+        return dev;
 }
@@ -123,6 +140,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
 {
        int rc;
        char *mountdata = NULL;
+        const char *prepath = NULL;
        int md_len;
        char *tkn_e;
        char *srvIP = NULL;
@@ -132,7 +150,10 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
        if (sb_mountdata == NULL)
                return ERR_PTR(-EINVAL);
-        *devname = cifs_get_share_name(ref->node_name);
+        if (strlen(fullpath) - ref->path_consumed)
+                prepath = fullpath + ref->path_consumed;
+        *devname = cifs_build_devname(ref->node_name, prepath);
        if (IS_ERR(*devname)) {
                rc = PTR_ERR(*devname);
                *devname = NULL;
@@ -146,12 +167,14 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
                goto compose_mount_options_err;
        }
-        /* md_len = strlen(...) + 12 for 'sep+prefixpath='
+        /*
-         * assuming that we have 'unc=' and 'ip=' in
+         * In most cases, we'll be building a shorter string than the original,
-         * the original sb_mountdata
+         * but we do have to assume that the address in the ip= option may be
+         * much longer than the original. Add the max length of an address
+         * string to the length of the original string to allow for worst case.
         */
-        md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12;
+        md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
-        mountdata = kzalloc(md_len+1, GFP_KERNEL);
+        mountdata = kzalloc(md_len + 1, GFP_KERNEL);
        if (mountdata == NULL) {
                rc = -ENOMEM;
                goto compose_mount_options_err;
@@ -195,26 +218,6 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
                strncat(mountdata, &sep, 1);
        strcat(mountdata, "ip=");
        strcat(mountdata, srvIP);
-        strncat(mountdata, &sep, 1);
-        strcat(mountdata, "unc=");
-        strcat(mountdata, *devname);
-        /* find & copy prefixpath */
-        tkn_e = strchr(ref->node_name + 2, '\\');
-        if (tkn_e == NULL) {
-                /* invalid unc, missing share name*/
-                rc = -EINVAL;
-                goto compose_mount_options_err;
-        }
-        tkn_e = strchr(tkn_e + 1, '\\');
-        if (tkn_e || (strlen(fullpath) - ref->path_consumed)) {
-                strncat(mountdata, &sep, 1);
-                strcat(mountdata, "prefixpath=");
-                if (tkn_e)
-                        strcat(mountdata, tkn_e + 1);
-                strcat(mountdata, fullpath + ref->path_consumed);
-        }
        /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
        /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 72e4efee1389..3752b9f6d9e4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -372,9 +372,6 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
        cifs_show_security(s, tcon->ses->server);
        cifs_show_cache_flavor(s, cifs_sb);
-        seq_printf(s, ",unc=");
-        seq_escape(s, tcon->treeName, " \t\n\\");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
                seq_printf(s, ",multiuser");
        else if (tcon->ses->user_name)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 99eeaa17ee00..e3bc39bb9d12 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1061,6 +1061,7 @@ static int cifs_parse_security_flavors(char *value,
 #endif
        case Opt_sec_none:
                vol->nullauth = 1;
+                vol->secFlg |= CIFSSEC_MAY_NTLM;
                break;
        default:
                cifs_dbg(VFS, "bad security option: %s\n", value);
@@ -1257,14 +1258,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
        vol->backupuid_specified = false; /* no backup intent for a user */
        vol->backupgid_specified = false; /* no backup intent for a group */
-        /*
+        switch (cifs_parse_devname(devname, vol)) {
-         * For now, we ignore -EINVAL errors under the assumption that the
+        case 0:
-         * unc= and prefixpath= options will be usable.
+                break;
-         */
+        case -ENOMEM:
-        if (cifs_parse_devname(devname, vol) == -ENOMEM) {
+                cifs_dbg(VFS, "Unable to allocate memory for devname.\n");
-                printk(KERN_ERR "CIFS: Unable to allocate memory to parse "
+                goto cifs_parse_mount_err;
-                                "device string.\n");
+        case -EINVAL:
-                goto out_nomem;
+                cifs_dbg(VFS, "Malformed UNC in devname.\n");
+                goto cifs_parse_mount_err;
+        default:
+                cifs_dbg(VFS, "Unknown error parsing devname.\n");
+                goto cifs_parse_mount_err;
        }
        while ((data = strsep(&options, separator)) != NULL) {
@@ -1826,7 +1831,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
        }
 #endif
        if (!vol->UNC) {
-                cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string or in unc= option!\n");
+                cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
                goto cifs_parse_mount_err;
        }
@@ -3274,8 +3279,8 @@ build_unc_path_to_root(const struct smb_vol *vol,
        pos = full_path + unc_len;
        if (pplen) {
-                *pos++ = CIFS_DIR_SEP(cifs_sb);
+                *pos = CIFS_DIR_SEP(cifs_sb);
-                strncpy(pos, vol->prepath, pplen);
+                strncpy(pos + 1, vol->prepath, pplen);
                pos += pplen;
        }
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index e7512e497611..7ede7306599f 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -34,7 +34,7 @@
 /**
 * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
- * @unc: UNC path specifying the server
+ * @unc: UNC path specifying the server (with '/' as delimiter)
 * @ip_addr: Where to return the IP address.
 *
 * The IP address will be returned in string form, and the caller is
@@ -64,7 +64,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
        hostname = unc + 2;
        /* Search for server name delimiter */
-        sep = memchr(hostname, '\\', len);
+        sep = memchr(hostname, '/', len);
        if (sep)
                len = sep - hostname;
        else
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 201f0a0d6b0a..a7abbea2c096 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -295,6 +295,12 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 static int
 ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
+        int rc;
+        rc = filemap_write_and_wait(file->f_mapping);
+        if (rc)
+                return rc;
        return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
 }
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index bfb531564319..8dd524f32284 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -44,8 +44,11 @@ static ssize_t efivarfs_file_write(struct file *file,
        bytes = efivar_entry_set_get_size(var, attributes, &datasize,
                                          data, &set);
-        if (!set && bytes)
+        if (!set && bytes) {
+                if (bytes == -ENOENT)
+                        bytes = -EIO;
                goto out;
+        }
        if (bytes == -ENOENT) {
                drop_nlink(inode);
@@ -76,7 +79,14 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
        int err;
        err = efivar_entry_size(var, &datasize);
-        if (err)
+        /*
+         * efivarfs represents uncommitted variables with
+         * zero-length files. Reading them should return EOF.
+         */
+        if (err == -ENOENT)
+                return 0;
+        else if (err)
                return err;
        data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 254df56b847b..f3f783dc4f75 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -180,6 +180,8 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 {
        struct inode *inode;
+        struct dentry *parent;
+        struct fuse_conn *fc;
        inode = ACCESS_ONCE(entry->d_inode);
        if (inode && is_bad_inode(inode))
@@ -187,10 +189,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
        else if (fuse_dentry_time(entry) < get_jiffies_64()) {
                int err;
                struct fuse_entry_out outarg;
-                struct fuse_conn *fc;
                struct fuse_req *req;
                struct fuse_forget_link *forget;
-                struct dentry *parent;
                u64 attr_version;
                /* For negative dentries, always do a fresh lookup */
@@ -241,8 +241,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                                       entry_attr_timeout(&outarg),
                                       attr_version);
                fuse_change_entry_timeout(entry, &outarg);
+        } else if (inode) {
+                fc = get_fuse_conn(inode);
+                if (fc->readdirplus_auto) {
+                        parent = dget_parent(entry);
+                        fuse_advise_use_readdirplus(parent->d_inode);
+                        dput(parent);
+                }
        }
-        fuse_advise_use_readdirplus(inode);
        return 1;
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d1c9b85b3f58..e570081f9f76 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -16,6 +16,7 @@
 #include <linux/compat.h>
 #include <linux/swap.h>
 #include <linux/aio.h>
+#include <linux/falloc.h>
 static const struct file_operations fuse_direct_io_file_operations;
@@ -1278,7 +1279,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
        iov_iter_init(&ii, iov, nr_segs, count, 0);
-        req = fuse_get_req(fc, fuse_iter_npages(&ii));
+        if (io->async)
+                req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
+        else
+                req = fuse_get_req(fc, fuse_iter_npages(&ii));
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -1314,7 +1318,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
                        break;
                if (count) {
                        fuse_put_request(fc, req);
-                        req = fuse_get_req(fc, fuse_iter_npages(&ii));
+                        if (io->async)
+                                req = fuse_get_req_for_background(fc,
+                                        fuse_iter_npages(&ii));
+                        else
+                                req = fuse_get_req(fc, fuse_iter_npages(&ii));
                        if (IS_ERR(req))
                                break;
                }
@@ -2365,6 +2373,11 @@ static void fuse_do_truncate(struct file *file)
        fuse_do_setattr(inode, &attr, file);
 }
+static inline loff_t fuse_round_up(loff_t off)
+{
+        return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+}
 static ssize_t
 fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
                        loff_t offset, unsigned long nr_segs)
@@ -2372,6 +2385,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        ssize_t ret = 0;
        struct file *file = iocb->ki_filp;
        struct fuse_file *ff = file->private_data;
+        bool async_dio = ff->fc->async_dio;
        loff_t pos = 0;
        struct inode *inode;
        loff_t i_size;
@@ -2383,10 +2397,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        i_size = i_size_read(inode);
        /* optimization for short read */
-        if (rw != WRITE && offset + count > i_size) {
+        if (async_dio && rw != WRITE && offset + count > i_size) {
                if (offset >= i_size)
                        return 0;
-                count = i_size - offset;
+                count = min_t(loff_t, count, fuse_round_up(i_size - offset));
        }
        io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2404,7 +2418,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
         * By default, we want to optimize all I/Os with async request
         * submission to the client filesystem if supported.
         */
-        io->async = ff->fc->async_dio;
+        io->async = async_dio;
        io->iocb = iocb;
        /*
@@ -2412,7 +2426,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
         * to wait on real async I/O requests, so we must submit this request
         * synchronously.
         */
-        if (!is_sync_kiocb(iocb) && (offset + count > i_size))
+        if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
                io->async = false;
        if (rw == WRITE)
@@ -2424,7 +2438,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
                fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
                /* we have a non-extending, async request, so return */
-                if (ret > 0 && !is_sync_kiocb(iocb))
+                if (!is_sync_kiocb(iocb))
                        return -EIOCBQUEUED;
                ret = wait_on_sync_kiocb(iocb);
@@ -2446,6 +2460,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
                                loff_t length)
 {
        struct fuse_file *ff = file->private_data;
+        struct inode *inode = file->f_inode;
        struct fuse_conn *fc = ff->fc;
        struct fuse_req *req;
        struct fuse_fallocate_in inarg = {
@@ -2459,9 +2474,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        if (fc->no_fallocate)
                return -EOPNOTSUPP;
+        if (mode & FALLOC_FL_PUNCH_HOLE) {
+                mutex_lock(&inode->i_mutex);
+                fuse_set_nowrite(inode);
+        }
        req = fuse_get_req_nopages(fc);
-        if (IS_ERR(req))
+        if (IS_ERR(req)) {
-                return PTR_ERR(req);
+                err = PTR_ERR(req);
+                goto out;
+        }
        req->in.h.opcode = FUSE_FALLOCATE;
        req->in.h.nodeid = ff->nodeid;
@@ -2476,6 +2498,24 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        }
        fuse_put_request(fc, req);
+        if (err)
+                goto out;
+        /* we could have extended the file */
+        if (!(mode & FALLOC_FL_KEEP_SIZE))
+                fuse_write_update_size(inode, offset + length);
+        if (mode & FALLOC_FL_PUNCH_HOLE)
+                truncate_pagecache_range(inode, offset, offset + length - 1);
+        fuse_invalidate_attr(inode);
+out:
+        if (mode & FALLOC_FL_PUNCH_HOLE) {
+                fuse_release_nowrite(inode);
+                mutex_unlock(&inode->i_mutex);
+        }
        return err;
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6201f81e4d3a..9a0cdde14a08 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -867,10 +867,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->dont_mask = 1;
                        if (arg->flags & FUSE_AUTO_INVAL_DATA)
                                fc->auto_inval_data = 1;
-                        if (arg->flags & FUSE_DO_READDIRPLUS)
+                        if (arg->flags & FUSE_DO_READDIRPLUS) {
                                fc->do_readdirplus = 1;
-                        if (arg->flags & FUSE_READDIRPLUS_AUTO)
+                                if (arg->flags & FUSE_READDIRPLUS_AUTO)
-                                fc->readdirplus_auto = 1;
+                                        fc->readdirplus_auto = 1;
+                        }
                        if (arg->flags & FUSE_ASYNC_DIO)
                                fc->async_dio = 1;
                } else {
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1dc9a13ce6bb..93b5809c20bb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1286,17 +1286,26 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
        if (ret)
                return ret;
+        ret = get_write_access(inode);
+        if (ret)
+                return ret;
        inode_dio_wait(inode);
        ret = gfs2_rs_alloc(GFS2_I(inode));
        if (ret)
-                return ret;
+                goto out;
        oldsize = inode->i_size;
-        if (newsize >= oldsize)
+        if (newsize >= oldsize) {
-                return do_grow(inode, newsize);
+                ret = do_grow(inode, newsize);
+                goto out;
+        }
-        return do_shrink(inode, oldsize, newsize);
+        ret = do_shrink(inode, oldsize, newsize);
+out:
+        put_write_access(inode);
+        return ret;
 }
 int gfs2_truncatei_resume(struct gfs2_inode *ip)
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c3e82bd23179..b631c9043460 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -354,22 +354,31 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
                return ERR_PTR(-EIO);
        }
-        hc = kmalloc(hsize, GFP_NOFS);
+        hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN);
-        ret = -ENOMEM;
+        if (hc == NULL)
+                hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL);
        if (hc == NULL)
                return ERR_PTR(-ENOMEM);
        ret = gfs2_dir_read_data(ip, hc, hsize);
        if (ret < 0) {
-                kfree(hc);
+                if (is_vmalloc_addr(hc))
+                        vfree(hc);
+                else
+                        kfree(hc);
                return ERR_PTR(ret);
        }
        spin_lock(&inode->i_lock);
-        if (ip->i_hash_cache)
+        if (ip->i_hash_cache) {
-                kfree(hc);
+                if (is_vmalloc_addr(hc))
-        else
+                        vfree(hc);
+                else
+                        kfree(hc);
+        } else {
                ip->i_hash_cache = hc;
+        }
        spin_unlock(&inode->i_lock);
        return ip->i_hash_cache;
@@ -385,7 +394,10 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip)
 {
        __be64 *hc = ip->i_hash_cache;
        ip->i_hash_cache = NULL;
-        kfree(hc);
+        if (is_vmalloc_addr(hc))
+                vfree(hc);
+        else
+                kfree(hc);
 }
 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
@@ -1113,7 +1125,10 @@ static int dir_double_exhash(struct gfs2_inode *dip)
        if (IS_ERR(hc))
                return PTR_ERR(hc);
-        h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
+        h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN);
+        if (hc2 == NULL)
+                hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL);
        if (!hc2)
                return -ENOMEM;
@@ -1145,7 +1160,10 @@ fail:
        gfs2_dinode_out(dip, dibh->b_data);
        brelse(dibh);
 out_kfree:
-        kfree(hc2);
+        if (is_vmalloc_addr(hc2))
+                vfree(hc2);
+        else
+                kfree(hc2);
        return error;
 }
@@ -1846,6 +1864,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
        ht = kzalloc(size, GFP_NOFS);
+        if (ht == NULL)
+                ht = vzalloc(size);
        if (!ht)
                return -ENOMEM;
@@ -1933,7 +1953,10 @@ out_rlist:
        gfs2_rlist_free(&rlist);
        gfs2_quota_unhold(dip);
 out:
-        kfree(ht);
+        if (is_vmalloc_addr(ht))
+                vfree(ht);
+        else
+                kfree(ht);
        return error;
 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index acd16764b133..ad0dc38d87ab 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -402,16 +402,20 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        /* Update file times before taking page lock */
        file_update_time(vma->vm_file);
+        ret = get_write_access(inode);
+        if (ret)
+                goto out;
        ret = gfs2_rs_alloc(ip);
        if (ret)
-                return ret;
+                goto out_write_access;
        gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
        if (ret)
-                goto out;
+                goto out_uninit;
        set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
        set_bit(GIF_SW_PAGED, &ip->i_flags);
@@ -480,12 +484,15 @@ out_quota_unlock:
        gfs2_quota_unlock(ip);
 out_unlock:
        gfs2_glock_dq(&gh);
-out:
+out_uninit:
        gfs2_holder_uninit(&gh);
        if (ret == 0) {
                set_page_dirty(page);
                wait_for_stable_page(page);
        }
+out_write_access:
+        put_write_access(inode);
+out:
        sb_end_pagefault(inode->i_sb);
        return block_page_mkwrite_return(ret);
 }
@@ -594,10 +601,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
        kfree(file->private_data);
        file->private_data = NULL;
-        if ((file->f_mode & FMODE_WRITE) &&
+        if (!(file->f_mode & FMODE_WRITE))
-            (atomic_read(&inode->i_writecount) == 1))
+                return 0;
-                gfs2_rs_delete(ip);
+        gfs2_rs_delete(ip);
        return 0;
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8833a4f264e3..62b484e4a9e4 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -189,6 +189,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
        return inode;
 fail_refresh:
+        ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
        ip->i_iopen_gh.gh_gl->gl_object = NULL;
        gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 fail_iopen:
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 68b4c8f1fce8..6c33d7b6e0c4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -419,7 +419,9 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
                if (total > limit)
                        num = limit;
                gfs2_log_unlock(sdp);
-                page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num);
+                page = gfs2_get_log_desc(sdp,
+                                         is_databuf ? GFS2_LOG_DESC_JDATA :
+                                         GFS2_LOG_DESC_METADATA, num + 1, num);
                ld = page_address(page);
                gfs2_log_lock(sdp);
                ptr = (__be64 *)(ld + 1);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5232525934ae..9809156e3d04 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -638,8 +638,10 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 */
 void gfs2_rs_delete(struct gfs2_inode *ip)
 {
+        struct inode *inode = &ip->i_inode;
        down_write(&ip->i_rw_mutex);
-        if (ip->i_res) {
+        if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) {
                gfs2_rs_deltree(ip->i_res);
                BUG_ON(ip->i_res->rs_free);
                kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 917c8e1eb4ae..e5639dec66c4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1444,6 +1444,7 @@ static void gfs2_evict_inode(struct inode *inode)
        /* Must not read inode block until block type has been verified */
        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
        if (unlikely(error)) {
+                ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
                goto out;
        }
@@ -1514,8 +1515,10 @@ out_unlock:
        if (gfs2_rs_active(ip->i_res))
                gfs2_rs_deltree(ip->i_res);
-        if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
+        if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+                ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
                gfs2_glock_dq(&ip->i_iopen_gh);
+        }
        gfs2_holder_uninit(&ip->i_iopen_gh);
        gfs2_glock_dq_uninit(&gh);
        if (error && error != GLR_TRYFAILED && error != -EROFS)
@@ -1534,6 +1537,7 @@ out:
        ip->i_gl = NULL;
        if (ip->i_iopen_gh.gh_gl) {
                ip->i_iopen_gh.gh_gl->gl_object = NULL;
+                ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
        }
 }
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 546f6d39713a..834ac13c04b7 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -33,25 +33,27 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
        if (whence == SEEK_DATA || whence == SEEK_HOLE)
                return -EINVAL;
+        mutex_lock(&i->i_mutex);
        hpfs_lock(s);
        /*printk("dir lseek\n");*/
        if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
-        mutex_lock(&i->i_mutex);
        pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1;
        while (pos != new_off) {
                if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
                else goto fail;
                if (pos == 12) goto fail;
        }
-        mutex_unlock(&i->i_mutex);
+        hpfs_add_pos(i, &filp->f_pos);
 ok:
+        filp->f_pos = new_off;
        hpfs_unlock(s);
-        return filp->f_pos = new_off;
-fail:
        mutex_unlock(&i->i_mutex);
+        return new_off;
+fail:
        /*printk("illegal lseek: %016llx\n", new_off);*/
        hpfs_unlock(s);
+        mutex_unlock(&i->i_mutex);
        return -ESPIPE;
 }
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 3027f4dbbab5..e4ba5fe4c3b5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -109,10 +109,14 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
 {
        struct inode *inode = mapping->host;
+        hpfs_lock(inode->i_sb);
        if (to > inode->i_size) {
                truncate_pagecache(inode, to, inode->i_size);
                hpfs_truncate(inode);
        }
+        hpfs_unlock(inode->i_sb);
 }
 static int hpfs_write_begin(struct file *file, struct address_space *mapping,
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index c57499dca89c..360d27c48887 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2009,7 +2009,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
        bio->bi_end_io = lbmIODone;
        bio->bi_private = bp;
-        submit_bio(READ_SYNC, bio);
+        /*check if journaling to disk has been disabled*/
+        if (log->no_integrity) {
+                bio->bi_size = 0;
+                lbmIODone(bio, 0);
+        } else {
+                submit_bio(READ_SYNC, bio);
+        }
        wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2003e830ed1c..788e0a9c1fb0 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -611,11 +611,28 @@ static int jfs_freeze(struct super_block *sb)
 {
        struct jfs_sb_info *sbi = JFS_SBI(sb);
        struct jfs_log *log = sbi->log;
+        int rc = 0;
        if (!(sb->s_flags & MS_RDONLY)) {
                txQuiesce(sb);
-                lmLogShutdown(log);
+                rc = lmLogShutdown(log);
-                updateSuper(sb, FM_CLEAN);
+                if (rc) {
+                        jfs_error(sb, "jfs_freeze: lmLogShutdown failed");
+                        /* let operations fail rather than hang */
+                        txResume(sb);
+                        return rc;
+                }
+                rc = updateSuper(sb, FM_CLEAN);
+                if (rc) {
+                        jfs_err("jfs_freeze: updateSuper failed\n");
+                        /*
+                         * Don't fail here. Everything succeeded except
+                         * marking the superblock clean, so there's really
+                         * no harm in leaving it frozen for now.
+                         */
+                }
        }
        return 0;
 }
@@ -627,13 +644,18 @@ static int jfs_unfreeze(struct super_block *sb)
        int rc = 0;
        if (!(sb->s_flags & MS_RDONLY)) {
-                updateSuper(sb, FM_MOUNT);
+                rc = updateSuper(sb, FM_MOUNT);
-                if ((rc = lmLogInit(log)))
+                if (rc) {
-                        jfs_err("jfs_unlock failed with return code %d", rc);
+                        jfs_error(sb, "jfs_unfreeze: updateSuper failed");
-                else
+                        goto out;
-                        txResume(sb);
+                }
+                rc = lmLogInit(log);
+                if (rc)
+                        jfs_error(sb, "jfs_unfreeze: lmLogInit failed");
+out:
+                txResume(sb);
        }
-        return 0;
+        return rc;
 }
 static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4e2fe714d5c2..d7ba5616989c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1078,7 +1078,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
        struct nfs4_state *state = opendata->state;
        struct nfs_inode *nfsi = NFS_I(state->inode);
        struct nfs_delegation *delegation;
-        int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC);
+        int open_mode = opendata->o_arg.open_flags;
        fmode_t fmode = opendata->o_arg.fmode;
        nfs4_stateid stateid;
        int ret = -EAGAIN;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a366107a7331..2d7525fbcf25 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1942,6 +1942,7 @@ static int nfs23_validate_mount_data(void *options,
                args->namlen            = data->namlen;
                args->bsize             = data->bsize;
+                args->auth_flavors[0] = RPC_AUTH_UNIX;
                if (data->flags & NFS_MOUNT_SECFLAVOUR)
                        args->auth_flavors[0] = data->pseudoflavor;
                if (!args->nfs_server.hostname)
@@ -2637,6 +2638,7 @@ static int nfs4_validate_mount_data(void *options,
                        goto out_no_address;
                args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+                args->auth_flavors[0] = RPC_AUTH_UNIX;
                if (data->auth_flavourlen) {
                        if (data->auth_flavourlen > 1)
                                goto out_inval_auth;
diff --git a/fs/pnode.c b/fs/pnode.c
index 3d2a7141b87a..9af0df15256e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -83,7 +83,8 @@ static int do_make_slave(struct mount *mnt)
                if (peer_mnt == mnt)
                        peer_mnt = NULL;
        }
-        if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share))
+        if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
+            list_empty(&mnt->mnt_share))
                mnt_release_group_id(mnt);
        list_del_init(&mnt->mnt_share);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd51e50001fe..c3834dad09b3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2118,6 +2118,7 @@ static int show_timer(struct seq_file *m, void *v)
                nstr[notify & ~SIGEV_THREAD_ID],
                (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
                pid_nr_ns(timer->it_pid, tp->ns));
+        seq_printf(m, "ClockID: %d\n", timer->it_clock);
        return 0;
 }
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 8798d065e400..afa6be6fc397 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -120,7 +120,7 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct inode *inode = file_inode(filp);
        struct super_block *s = inode->i_sb;
        struct qnx6_sb_info *sbi = QNX6_SB(s);
-        loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
+        loff_t pos = filp->f_pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
        unsigned long npages = dir_pages(inode);
        unsigned long n = pos >> PAGE_CACHE_SHIFT;
        unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 66c53b642a88..6c2d136561cb 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -204,6 +204,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
                                next_pos = deh_offset(deh) + 1;
                                if (item_moved(&tmp_ih, &path_to_entry)) {
+                                        set_cpu_key_k_offset(&pos_key,
+                                                             next_pos);
                                        goto research;
                                }
                        }       /* for */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 77d6d47abc83..f844533792ee 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1811,11 +1811,16 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
                                  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
        memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
        args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
-        if (insert_inode_locked4(inode, args.objectid,
-                             reiserfs_find_actor, &args) < 0) {
+        reiserfs_write_unlock(inode->i_sb);
+        err = insert_inode_locked4(inode, args.objectid,
+                             reiserfs_find_actor, &args);
+        reiserfs_write_lock(inode->i_sb);
+        if (err) {
                err = -EINVAL;
                goto out_bad_inode;
        }
        if (old_format_only(sb))
                /* not a perfect generation count, as object ids can be reused, but
                 ** this is as good as reiserfs can do right now.
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4cce1d9552fb..821bcf70e467 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -318,7 +318,19 @@ static int delete_one_xattr(struct dentry *dentry, void *data)
 static int chown_one_xattr(struct dentry *dentry, void *data)
 {
        struct iattr *attrs = data;
-        return reiserfs_setattr(dentry, attrs);
+        int ia_valid = attrs->ia_valid;
+        int err;
+        /*
+         * We only want the ownership bits. Otherwise, we'll do
+         * things like change a directory to a regular file if
+         * ATTR_MODE is set.
+         */
+        attrs->ia_valid &= (ATTR_UID|ATTR_GID);
+        err = reiserfs_setattr(dentry, attrs);
+        attrs->ia_valid = ia_valid;
+        return err;
 }
 /* No i_mutex, but the inode is unconnected. */
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index d7c01ef64eda..6c8767fdfc6a 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -443,6 +443,9 @@ int reiserfs_acl_chmod(struct inode *inode)
        int depth;
        int error;
+        if (IS_PRIVATE(inode))
+                return 0;
        if (S_ISLNK(inode->i_mode))
                return -EOPNOTSUPP;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 1d32f1d52763..306d883d89bc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -21,6 +21,8 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
 #include "xfs_vnodeops.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
 #include "xfs_trace.h"
 #include <linux/slab.h>
 #include <linux/xattr.h>
@@ -34,7 +36,9 @@
 */
 STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
+xfs_acl_from_disk(
+        struct xfs_acl  *aclp,
+        int             max_entries)
 {
        struct posix_acl_entry *acl_e;
        struct posix_acl *acl;
@@ -42,7 +46,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
        unsigned int count, i;
        count = be32_to_cpu(aclp->acl_cnt);
-        if (count > XFS_ACL_MAX_ENTRIES)
+        if (count > max_entries)
                return ERR_PTR(-EFSCORRUPTED);
        acl = posix_acl_alloc(count, GFP_KERNEL);
@@ -108,9 +112,9 @@ xfs_get_acl(struct inode *inode, int type)
        struct xfs_inode *ip = XFS_I(inode);
        struct posix_acl *acl;
        struct xfs_acl *xfs_acl;
-        int len = sizeof(struct xfs_acl);
        unsigned char *ea_name;
        int error;
+        int len;
        acl = get_cached_acl(inode, type);
        if (acl != ACL_NOT_CACHED)
@@ -133,8 +137,8 @@ xfs_get_acl(struct inode *inode, int type)
         * If we have a cached ACLs value just return it, not need to
         * go out to the disk.
         */
+        len = XFS_ACL_MAX_SIZE(ip->i_mount);
-        xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+        xfs_acl = kzalloc(len, GFP_KERNEL);
        if (!xfs_acl)
                return ERR_PTR(-ENOMEM);
@@ -153,7 +157,7 @@ xfs_get_acl(struct inode *inode, int type)
                goto out;
        }
-        acl = xfs_acl_from_disk(xfs_acl);
+        acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
        if (IS_ERR(acl))
                goto out;
@@ -189,16 +193,17 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        if (acl) {
                struct xfs_acl *xfs_acl;
-                int len;
+                int len = XFS_ACL_MAX_SIZE(ip->i_mount);
-                xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+                xfs_acl = kzalloc(len, GFP_KERNEL);
                if (!xfs_acl)
                        return -ENOMEM;
                xfs_acl_to_disk(xfs_acl, acl);
-                len = sizeof(struct xfs_acl) -
-                        (sizeof(struct xfs_acl_entry) *
+                /* subtract away the unused acl entries */
-                         (XFS_ACL_MAX_ENTRIES - acl->a_count));
+                len -= sizeof(struct xfs_acl_entry) *
+                         (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
                error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
                                len, ATTR_ROOT);
@@ -243,7 +248,7 @@ xfs_set_mode(struct inode *inode, umode_t mode)
 static int
 xfs_acl_exists(struct inode *inode, unsigned char *name)
 {
-        int len = sizeof(struct xfs_acl);
+        int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
        return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
                            ATTR_ROOT|ATTR_KERNOVAL) == 0);
@@ -379,7 +384,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
                goto out_release;
        error = -EINVAL;
-        if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+        if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
                goto out_release;
        if (type == ACL_TYPE_ACCESS) {
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 39632d941354..4016a567b83c 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -22,19 +22,36 @@ struct inode;
 struct posix_acl;
 struct xfs_inode;
-#define XFS_ACL_MAX_ENTRIES 25
 #define XFS_ACL_NOT_PRESENT (-1)
 /* On-disk XFS access control list structure */
+struct xfs_acl_entry {
+        __be32  ae_tag;
+        __be32  ae_id;
+        __be16  ae_perm;
+        __be16  ae_pad;         /* fill the implicit hole in the structure */
+};
 struct xfs_acl {
-        __be32          acl_cnt;
+        __be32                  acl_cnt;
-        struct xfs_acl_entry {
+        struct xfs_acl_entry    acl_entry[0];
-                __be32  ae_tag;
-                __be32  ae_id;
-                __be16  ae_perm;
-        } acl_entry[XFS_ACL_MAX_ENTRIES];
 };
+/*
+ * The number of ACL entries allowed is defined by the on-disk format.
+ * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is
+ * limited only by the maximum size of the xattr that stores the information.
+ */
+#define XFS_ACL_MAX_ENTRIES(mp) \
+        (xfs_sb_version_hascrc(&mp->m_sb) \
+                ?  (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
+                                                sizeof(struct xfs_acl_entry) \
+                : 25)
+#define XFS_ACL_MAX_SIZE(mp) \
+        (sizeof(struct xfs_acl) + \
+                sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
 /* On-disk XFS extended attribute names */
 #define SGI_ACL_FILE            (unsigned char *)"SGI_ACL_FILE"
 #define SGI_ACL_DEFAULT         (unsigned char *)"SGI_ACL_DEFAULT"
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 0bce1b348580..31d3cd129269 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1412,7 +1412,7 @@ xfs_attr3_leaf_add_work(
                name_rmt->valuelen = 0;
                name_rmt->valueblk = 0;
                args->rmtblkno = 1;
-                args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
+                args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
        }
        xfs_trans_log_buf(args->trans, bp,
             XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -1445,11 +1445,12 @@ xfs_attr3_leaf_add_work(
 STATIC void
 xfs_attr3_leaf_compact(
        struct xfs_da_args      *args,
-        struct xfs_attr3_icleaf_hdr *ichdr_d,
+        struct xfs_attr3_icleaf_hdr *ichdr_dst,
        struct xfs_buf          *bp)
 {
-        xfs_attr_leafblock_t    *leaf_s, *leaf_d;
+        struct xfs_attr_leafblock *leaf_src;
-        struct xfs_attr3_icleaf_hdr ichdr_s;
+        struct xfs_attr_leafblock *leaf_dst;
+        struct xfs_attr3_icleaf_hdr ichdr_src;
        struct xfs_trans        *trans = args->trans;
        struct xfs_mount        *mp = trans->t_mountp;
        char                    *tmpbuffer;
@@ -1457,29 +1458,38 @@ xfs_attr3_leaf_compact(
        trace_xfs_attr_leaf_compact(args);
        tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
-        ASSERT(tmpbuffer != NULL);
        memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
        memset(bp->b_addr, 0, XFS_LBSIZE(mp));
+        leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
+        leaf_dst = bp->b_addr;
        /*
-         * Copy basic information
+         * Copy the on-disk header back into the destination buffer to ensure
+         * all the information in the header that is not part of the incore
+         * header structure is preserved.
         */
-        leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
+        memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
-        leaf_d = bp->b_addr;
-        ichdr_s = *ichdr_d;     /* struct copy */
+        /* Initialise the incore headers */
-        ichdr_d->firstused = XFS_LBSIZE(mp);
+        ichdr_src = *ichdr_dst; /* struct copy */
-        ichdr_d->usedbytes = 0;
+        ichdr_dst->firstused = XFS_LBSIZE(mp);
-        ichdr_d->count = 0;
+        ichdr_dst->usedbytes = 0;
-        ichdr_d->holes = 0;
+        ichdr_dst->count = 0;
-        ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s);
+        ichdr_dst->holes = 0;
-        ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
+        ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
+        ichdr_dst->freemap[0].size = ichdr_dst->firstused -
+                                                ichdr_dst->freemap[0].base;
+        /* write the header back to initialise the underlying buffer */
+        xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
        /*
         * Copy all entry's in the same (sorted) order,
         * but allocate name/value pairs packed and in sequence.
         */
-        xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0,
+        xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0,
-                                ichdr_s.count, mp);
+                                ichdr_src.count, mp);
        /*
         * this logs the entire buffer, but the caller must write the header
         * back to the buffer when it is finished modifying it.
@@ -2181,14 +2191,24 @@ xfs_attr3_leaf_unbalance(
                struct xfs_attr_leafblock *tmp_leaf;
                struct xfs_attr3_icleaf_hdr tmphdr;
-                tmp_leaf = kmem_alloc(state->blocksize, KM_SLEEP);
+                tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP);
-                memset(tmp_leaf, 0, state->blocksize);
-                memset(&tmphdr, 0, sizeof(tmphdr));
+                /*
+                 * Copy the header into the temp leaf so that all the stuff
+                 * not in the incore header is present and gets copied back in
+                 * once we've moved all the entries.
+                 */
+                memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf));
+                memset(&tmphdr, 0, sizeof(tmphdr));
                tmphdr.magic = savehdr.magic;
                tmphdr.forw = savehdr.forw;
                tmphdr.back = savehdr.back;
                tmphdr.firstused = state->blocksize;
+                /* write the header to the temp buffer to initialise it */
+                xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
                if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
                                         drop_blk->bp, &drophdr)) {
                        xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
@@ -2334,8 +2354,9 @@ xfs_attr3_leaf_lookup_int(
                        args->index = probe;
                        args->valuelen = be32_to_cpu(name_rmt->valuelen);
                        args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                        args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount,
+                        args->rmtblkcnt = xfs_attr3_rmt_blocks(
-                                                       args->valuelen);
+                                                        args->dp->i_mount,
+                                                        args->valuelen);
                        return XFS_ERROR(EEXIST);
                }
        }
@@ -2386,7 +2407,8 @@ xfs_attr3_leaf_getvalue(
                ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
                valuelen = be32_to_cpu(name_rmt->valuelen);
                args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen);
+                args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
+                                                       valuelen);
                if (args->flags & ATTR_KERNOVAL) {
                        args->valuelen = valuelen;
                        return 0;
@@ -2712,7 +2734,8 @@ xfs_attr3_leaf_list_int(
                                args.valuelen = valuelen;
                                args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
                                args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
-                                args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen);
+                                args.rmtblkcnt = xfs_attr3_rmt_blocks(
+                                                        args.dp->i_mount, valuelen);
                                retval = xfs_attr_rmtval_get(&args);
                                if (retval)
                                        return retval;
@@ -3235,7 +3258,7 @@ xfs_attr3_leaf_inactive(
                        name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
                        if (name_rmt->valueblk) {
                                lp->valueblk = be32_to_cpu(name_rmt->valueblk);
-                                lp->valuelen = XFS_B_TO_FSB(dp->i_mount,
+                                lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
                                                    be32_to_cpu(name_rmt->valuelen));
                                lp++;
                        }
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index dee84466dcc9..ef6b0c124528 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -47,22 +47,55 @@
 * Each contiguous block has a header, so it is not just a simple attribute
 * length to FSB conversion.
 */
-static int
+int
 xfs_attr3_rmt_blocks(
        struct xfs_mount *mp,
        int             attrlen)
 {
-        int             buflen = XFS_ATTR3_RMT_BUF_SPACE(mp,
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                                                         mp->m_sb.sb_blocksize);
+                int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-        return (attrlen + buflen - 1) / buflen;
+                return (attrlen + buflen - 1) / buflen;
+        }
+        return XFS_B_TO_FSB(mp, attrlen);
+}
+/*
+ * Checking of the remote attribute header is split into two parts. The verifier
+ * does CRC, location and bounds checking, the unpacking function checks the
+ * attribute parameters and owner.
+ */
+static bool
+xfs_attr3_rmt_hdr_ok(
+        struct xfs_mount        *mp,
+        void                    *ptr,
+        xfs_ino_t               ino,
+        uint32_t                offset,
+        uint32_t                size,
+        xfs_daddr_t             bno)
+{
+        struct xfs_attr3_rmt_hdr *rmt = ptr;
+        if (bno != be64_to_cpu(rmt->rm_blkno))
+                return false;
+        if (offset != be32_to_cpu(rmt->rm_offset))
+                return false;
+        if (size != be32_to_cpu(rmt->rm_bytes))
+                return false;
+        if (ino != be64_to_cpu(rmt->rm_owner))
+                return false;
+        /* ok */
+        return true;
 }
 static bool
 xfs_attr3_rmt_verify(
-        struct xfs_buf          *bp)
+        struct xfs_mount        *mp,
+        void                    *ptr,
+        int                     fsbsize,
+        xfs_daddr_t             bno)
 {
-        struct xfs_mount        *mp = bp->b_target->bt_mount;
+        struct xfs_attr3_rmt_hdr *rmt = ptr;
-        struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return false;
@@ -70,7 +103,9 @@ xfs_attr3_rmt_verify(
                return false;
        if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
                return false;
-        if (bp->b_bn != be64_to_cpu(rmt->rm_blkno))
+        if (be64_to_cpu(rmt->rm_blkno) != bno)
+                return false;
+        if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
                return false;
        if (be32_to_cpu(rmt->rm_offset) +
                                be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
@@ -86,17 +121,40 @@ xfs_attr3_rmt_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
+        char            *ptr;
+        int             len;
+        bool            corrupt = false;
+        xfs_daddr_t     bno;
        /* no verification of non-crc buffers */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
-        if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+        ptr = bp->b_addr;
-                              XFS_ATTR3_RMT_CRC_OFF) ||
+        bno = bp->b_bn;
-            !xfs_attr3_rmt_verify(bp)) {
+        len = BBTOB(bp->b_length);
+        ASSERT(len >= XFS_LBSIZE(mp));
+        while (len > 0) {
+                if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
+                                      XFS_ATTR3_RMT_CRC_OFF)) {
+                        corrupt = true;
+                        break;
+                }
+                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+                        corrupt = true;
+                        break;
+                }
+                len -= XFS_LBSIZE(mp);
+                ptr += XFS_LBSIZE(mp);
+                bno += mp->m_bsize;
+        }
+        if (corrupt) {
                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        } else
+                ASSERT(len == 0);
 }
 static void
@@ -105,23 +163,39 @@ xfs_attr3_rmt_write_verify(
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
        struct xfs_buf_log_item *bip = bp->b_fspriv;
+        char            *ptr;
+        int             len;
+        xfs_daddr_t     bno;
        /* no verification of non-crc buffers */
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
-        if (!xfs_attr3_rmt_verify(bp)) {
+        ptr = bp->b_addr;
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+        bno = bp->b_bn;
-                xfs_buf_ioerror(bp, EFSCORRUPTED);
+        len = BBTOB(bp->b_length);
-                return;
+        ASSERT(len >= XFS_LBSIZE(mp));
-        }
+        while (len > 0) {
+                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
+                        XFS_CORRUPTION_ERROR(__func__,
+                                            XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+                        xfs_buf_ioerror(bp, EFSCORRUPTED);
+                        return;
+                }
+                if (bip) {
+                        struct xfs_attr3_rmt_hdr *rmt;
+                        rmt = (struct xfs_attr3_rmt_hdr *)ptr;
+                        rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+                }
+                xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
-        if (bip) {
+                len -= XFS_LBSIZE(mp);
-                struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+                ptr += XFS_LBSIZE(mp);
-                rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+                bno += mp->m_bsize;
        }
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        ASSERT(len == 0);
-                         XFS_ATTR3_RMT_CRC_OFF);
 }
 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
@@ -129,15 +203,16 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
        .verify_write = xfs_attr3_rmt_write_verify,
 };
-static int
+STATIC int
 xfs_attr3_rmt_hdr_set(
        struct xfs_mount        *mp,
+        void                    *ptr,
        xfs_ino_t               ino,
        uint32_t                offset,
        uint32_t                size,
-        struct xfs_buf          *bp)
+        xfs_daddr_t             bno)
 {
-        struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+        struct xfs_attr3_rmt_hdr *rmt = ptr;
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return 0;
@@ -147,36 +222,107 @@ xfs_attr3_rmt_hdr_set(
        rmt->rm_bytes = cpu_to_be32(size);
        uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
        rmt->rm_owner = cpu_to_be64(ino);
-        rmt->rm_blkno = cpu_to_be64(bp->b_bn);
+        rmt->rm_blkno = cpu_to_be64(bno);
-        bp->b_ops = &xfs_attr3_rmt_buf_ops;
        return sizeof(struct xfs_attr3_rmt_hdr);
 }
 /*
- * Checking of the remote attribute header is split into two parts. the verifier
+ * Helper functions to copy attribute data in and out of the one disk extents
- * does CRC, location and bounds checking, the unpacking function checks the
- * attribute parameters and owner.
 */
-static bool
+STATIC int
-xfs_attr3_rmt_hdr_ok(
+xfs_attr_rmtval_copyout(
-        struct xfs_mount        *mp,
+        struct xfs_mount *mp,
-        xfs_ino_t               ino,
+        struct xfs_buf  *bp,
-        uint32_t                offset,
+        xfs_ino_t       ino,
-        uint32_t                size,
+        int             *offset,
-        struct xfs_buf          *bp)
+        int             *valuelen,
+        char            **dst)
 {
-        struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
+        char            *src = bp->b_addr;
+        xfs_daddr_t     bno = bp->b_bn;
+        int             len = BBTOB(bp->b_length);
-        if (offset != be32_to_cpu(rmt->rm_offset))
+        ASSERT(len >= XFS_LBSIZE(mp));
-                return false;
-        if (size != be32_to_cpu(rmt->rm_bytes))
-                return false;
-        if (ino != be64_to_cpu(rmt->rm_owner))
-                return false;
-        /* ok */
+        while (len > 0 && *valuelen > 0) {
-        return true;
+                int hdr_size = 0;
+                int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+                byte_cnt = min_t(int, *valuelen, byte_cnt);
+                if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                        if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
+                                                  byte_cnt, bno)) {
+                                xfs_alert(mp,
+"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
+                                        bno, *offset, byte_cnt, ino);
+                                return EFSCORRUPTED;
+                        }
+                        hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
+                }
+                memcpy(*dst, src + hdr_size, byte_cnt);
+                /* roll buffer forwards */
+                len -= XFS_LBSIZE(mp);
+                src += XFS_LBSIZE(mp);
+                bno += mp->m_bsize;
+                /* roll attribute data forwards */
+                *valuelen -= byte_cnt;
+                *dst += byte_cnt;
+                *offset += byte_cnt;
+        }
+        return 0;
+}
+STATIC void
+xfs_attr_rmtval_copyin(
+        struct xfs_mount *mp,
+        struct xfs_buf  *bp,
+        xfs_ino_t       ino,
+        int             *offset,
+        int             *valuelen,
+        char            **src)
+{
+        char            *dst = bp->b_addr;
+        xfs_daddr_t     bno = bp->b_bn;
+        int             len = BBTOB(bp->b_length);
+        ASSERT(len >= XFS_LBSIZE(mp));
+        while (len > 0 && *valuelen > 0) {
+                int hdr_size;
+                int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
+                byte_cnt = min(*valuelen, byte_cnt);
+                hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
+                                                 byte_cnt, bno);
+                memcpy(dst + hdr_size, *src, byte_cnt);
+                /*
+                 * If this is the last block, zero the remainder of it.
+                 * Check that we are actually the last block, too.
+                 */
+                if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
+                        ASSERT(*valuelen - byte_cnt == 0);
+                        ASSERT(len == XFS_LBSIZE(mp));
+                        memset(dst + hdr_size + byte_cnt, 0,
+                                        XFS_LBSIZE(mp) - hdr_size - byte_cnt);
+                }
+                /* roll buffer forwards */
+                len -= XFS_LBSIZE(mp);
+                dst += XFS_LBSIZE(mp);
+                bno += mp->m_bsize;
+                /* roll attribute data forwards */
+                *valuelen -= byte_cnt;
+                *src += byte_cnt;
+                *offset += byte_cnt;
+        }
 }
 /*
@@ -190,13 +336,12 @@ xfs_attr_rmtval_get(
        struct xfs_bmbt_irec    map[ATTR_RMTVALUE_MAPSIZE];
        struct xfs_mount        *mp = args->dp->i_mount;
        struct xfs_buf          *bp;
-        xfs_daddr_t             dblkno;
        xfs_dablk_t             lblkno = args->rmtblkno;
-        void                    *dst = args->value;
+        char                    *dst = args->value;
        int                     valuelen = args->valuelen;
        int                     nmap;
        int                     error;
-        int                     blkcnt;
+        int                     blkcnt = args->rmtblkcnt;
        int                     i;
        int                     offset = 0;
@@ -207,52 +352,36 @@ xfs_attr_rmtval_get(
        while (valuelen > 0) {
                nmap = ATTR_RMTVALUE_MAPSIZE;
                error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
-                                       args->rmtblkcnt, map, &nmap,
+                                       blkcnt, map, &nmap,
                                       XFS_BMAPI_ATTRFORK);
                if (error)
                        return error;
                ASSERT(nmap >= 1);
                for (i = 0; (i < nmap) && (valuelen > 0); i++) {
-                        int     byte_cnt;
+                        xfs_daddr_t     dblkno;
-                        char    *src;
+                        int             dblkcnt;
                        ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
                               (map[i].br_startblock != HOLESTARTBLOCK));
                        dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
-                        blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+                        dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
                        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-                                                   dblkno, blkcnt, 0, &bp,
+                                                   dblkno, dblkcnt, 0, &bp,
                                                   &xfs_attr3_rmt_buf_ops);
                        if (error)
                                return error;
-                        byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length));
+                        error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
-                        byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
+                                                        &offset, &valuelen,
+                                                        &dst);
-                        src = bp->b_addr;
-                        if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                                if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
-                                                        offset, byte_cnt, bp)) {
-                                        xfs_alert(mp,
-"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
-                                                offset, byte_cnt, args->dp->i_ino);
-                                        xfs_buf_relse(bp);
-                                        return EFSCORRUPTED;
-                                }
-                                src += sizeof(struct xfs_attr3_rmt_hdr);
-                        }
-                        memcpy(dst, src, byte_cnt);
                        xfs_buf_relse(bp);
+                        if (error)
+                                return error;
-                        offset += byte_cnt;
+                        /* roll attribute extent map forwards */
-                        dst += byte_cnt;
-                        valuelen -= byte_cnt;
                        lblkno += map[i].br_blockcount;
+                        blkcnt -= map[i].br_blockcount;
                }
        }
        ASSERT(valuelen == 0);
@@ -270,17 +399,13 @@ xfs_attr_rmtval_set(
        struct xfs_inode        *dp = args->dp;
        struct xfs_mount        *mp = dp->i_mount;
        struct xfs_bmbt_irec    map;
-        struct xfs_buf          *bp;
-        xfs_daddr_t             dblkno;
        xfs_dablk_t             lblkno;
        xfs_fileoff_t           lfileoff = 0;
-        void                    *src = args->value;
+        char                    *src = args->value;
        int                     blkcnt;
        int                     valuelen;
        int                     nmap;
        int                     error;
-        int                     hdrcnt = 0;
-        bool                    crcs = xfs_sb_version_hascrc(&mp->m_sb);
        int                     offset = 0;
        trace_xfs_attr_rmtval_set(args);
@@ -289,24 +414,14 @@ xfs_attr_rmtval_set(
         * Find a "hole" in the attribute address space large enough for
         * us to drop the new attribute's value into. Because CRC enable
         * attributes have headers, we can't just do a straight byte to FSB
-         * conversion. We calculate the worst case block count in this case
+         * conversion and have to take the header space into account.
-         * and we may not need that many, so we have to handle this when
-         * allocating the blocks below. 
         */
-        if (!crcs)
+        blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
-                blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
-        else
-                blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
        error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
                                                   XFS_ATTR_FORK);
        if (error)
                return error;
-        /* Start with the attribute data. We'll allocate the rest afterwards. */
-        if (crcs)
-                blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
        args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
        args->rmtblkcnt = blkcnt;
@@ -349,26 +464,6 @@ xfs_attr_rmtval_set(
                       (map.br_startblock != HOLESTARTBLOCK));
                lblkno += map.br_blockcount;
                blkcnt -= map.br_blockcount;
-                hdrcnt++;
-                /*
-                 * If we have enough blocks for the attribute data, calculate
-                 * how many extra blocks we need for headers. We might run
-                 * through this multiple times in the case that the additional
-                 * headers in the blocks needed for the data fragments spills
-                 * into requiring more blocks. e.g. for 512 byte blocks, we'll
-                 * spill for another block every 9 headers we require in this
-                 * loop.
-                 */
-                if (crcs && blkcnt == 0) {
-                        int total_len;
-                        total_len = args->valuelen +
-                                    hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
-                        blkcnt = XFS_B_TO_FSB(mp, total_len);
-                        blkcnt -= args->rmtblkcnt;
-                        args->rmtblkcnt += blkcnt;
-                }
                /*
                 * Start the next trans in the chain.
@@ -385,18 +480,19 @@ xfs_attr_rmtval_set(
         * the INCOMPLETE flag.
         */
        lblkno = args->rmtblkno;
+        blkcnt = args->rmtblkcnt;
        valuelen = args->valuelen;
        while (valuelen > 0) {
-                int     byte_cnt;
+                struct xfs_buf  *bp;
-                char    *buf;
+                xfs_daddr_t     dblkno;
+                int             dblkcnt;
+                ASSERT(blkcnt > 0);
-                /*
-                 * Try to remember where we decided to put the value.
-                 */
                xfs_bmap_init(args->flist, args->firstblock);
                nmap = 1;
                error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
-                                       args->rmtblkcnt, &map, &nmap,
+                                       blkcnt, &map, &nmap,
                                       XFS_BMAPI_ATTRFORK);
                if (error)
                        return(error);
@@ -405,41 +501,27 @@ xfs_attr_rmtval_set(
                       (map.br_startblock != HOLESTARTBLOCK));
                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+                dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
-                bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0);
+                bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
                if (!bp)
                        return ENOMEM;
                bp->b_ops = &xfs_attr3_rmt_buf_ops;
-                byte_cnt = BBTOB(bp->b_length);
+                xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
-                byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt);
+                                       &valuelen, &src);
-                if (valuelen < byte_cnt)
-                        byte_cnt = valuelen;
-                buf = bp->b_addr;
-                buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
-                                             byte_cnt, bp);
-                memcpy(buf, src, byte_cnt);
-                if (byte_cnt < BBTOB(bp->b_length))
-                        xfs_buf_zero(bp, byte_cnt,
-                                     BBTOB(bp->b_length) - byte_cnt);
                error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
                xfs_buf_relse(bp);
                if (error)
                        return error;
-                src += byte_cnt;
-                valuelen -= byte_cnt;
-                offset += byte_cnt;
-                hdrcnt--;
+                /* roll attribute extent map forwards */
                lblkno += map.br_blockcount;
+                blkcnt -= map.br_blockcount;
        }
        ASSERT(valuelen == 0);
-        ASSERT(hdrcnt == 0);
        return 0;
 }
@@ -448,33 +530,40 @@ xfs_attr_rmtval_set(
 * out-of-line buffer that it is stored on.
 */
 int
-xfs_attr_rmtval_remove(xfs_da_args_t *args)
+xfs_attr_rmtval_remove(
+        struct xfs_da_args      *args)
 {
-        xfs_mount_t *mp;
+        struct xfs_mount        *mp = args->dp->i_mount;
-        xfs_bmbt_irec_t map;
+        xfs_dablk_t             lblkno;
-        xfs_buf_t *bp;
+        int                     blkcnt;
-        xfs_daddr_t dblkno;
+        int                     error;
-        xfs_dablk_t lblkno;
+        int                     done;
-        int valuelen, blkcnt, nmap, error, done, committed;
        trace_xfs_attr_rmtval_remove(args);
-        mp = args->dp->i_mount;
        /*
-         * Roll through the "value", invalidating the attribute value's
+         * Roll through the "value", invalidating the attribute value's blocks.
-         * blocks.
+         * Note that args->rmtblkcnt is the minimum number of data blocks we'll
+         * see for a CRC enabled remote attribute. Each extent will have a
+         * header, and so we may have more blocks than we realise here.  If we
+         * fail to map the blocks correctly, we'll have problems with the buffer
+         * lookups.
         */
        lblkno = args->rmtblkno;
-        valuelen = args->rmtblkcnt;
+        blkcnt = args->rmtblkcnt;
-        while (valuelen > 0) {
+        while (blkcnt > 0) {
+                struct xfs_bmbt_irec    map;
+                struct xfs_buf          *bp;
+                xfs_daddr_t             dblkno;
+                int                     dblkcnt;
+                int                     nmap;
                /*
                 * Try to remember where we decided to put the value.
                 */
                nmap = 1;
                error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
-                                       args->rmtblkcnt, &map, &nmap,
+                                       blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
-                                       XFS_BMAPI_ATTRFORK);
                if (error)
                        return(error);
                ASSERT(nmap == 1);
@@ -482,21 +571,20 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
                       (map.br_startblock != HOLESTARTBLOCK));
                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+                dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
                /*
                 * If the "remote" value is in the cache, remove it.
                 */
-                bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
+                bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
                if (bp) {
                        xfs_buf_stale(bp);
                        xfs_buf_relse(bp);
                        bp = NULL;
                }
-                valuelen -= map.br_blockcount;
                lblkno += map.br_blockcount;
+                blkcnt -= map.br_blockcount;
        }
        /*
@@ -506,6 +594,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
        blkcnt = args->rmtblkcnt;
        done = 0;
        while (!done) {
+                int committed;
                xfs_bmap_init(args->flist, args->firstblock);
                error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
                                    XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
index c7cca60a062a..92a8fd7977cc 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -20,6 +20,14 @@
 #define XFS_ATTR3_RMT_MAGIC     0x5841524d      /* XARM */
+/*
+ * There is one of these headers per filesystem block in a remote attribute.
+ * This is done to ensure there is a 1:1 mapping between the attribute value
+ * length and the number of blocks needed to store the attribute. This makes the
+ * verification of a buffer a little more complex, but greatly simplifies the
+ * allocation, reading and writing of these attributes as we don't have to guess
+ * the number of blocks needed to store the attribute data.
+ */
 struct xfs_attr3_rmt_hdr {
        __be32  rm_magic;
        __be32  rm_offset;
@@ -39,6 +47,8 @@ struct xfs_attr3_rmt_hdr {
 extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
+int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
 int xfs_attr_rmtval_get(struct xfs_da_args *args);
 int xfs_attr_rmtval_set(struct xfs_da_args *args);
 int xfs_attr_rmtval_remove(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 0d2554299688..1b2472a46e46 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -513,6 +513,7 @@ _xfs_buf_find(
                xfs_alert(btp->bt_mount,
                          "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
                          __func__, blkno, eofs);
+                WARN_ON(1);
                return NULL;
        }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index cf263476d6b4..4ec431777048 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -262,12 +262,7 @@ xfs_buf_item_format_segment(
                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
                        vecp->i_len = nbits * XFS_BLF_CHUNK;
                        vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/*
+                        nvecs++;
- * You would think we need to bump the nvecs here too, but we do not
- * this number is used by recovery, and it gets confused by the boundary
- * split here
- *                      nvecs++;
- */
                        vecp++;
                        first_bit = next_bit;
                        last_bit = next_bit;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index f852b082a084..c407e1ccff43 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -219,6 +219,14 @@ xfs_swap_extents(
        int             taforkblks = 0;
        __uint64_t      tmp;
+        /*
+         * We have no way of updating owner information in the BMBT blocks for
+         * each inode on CRC enabled filesystems, so to avoid corrupting the
+         * this metadata we simply don't allow extent swaps to occur.
+         */
+        if (xfs_sb_version_hascrc(&mp->m_sb))
+                return XFS_ERROR(EINVAL);
        tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
        if (!tempifp) {
                error = XFS_ERROR(ENOMEM);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index a3b1bd841a80..995f1f505a52 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -715,6 +715,7 @@ struct xfs_dir3_free_hdr {
        __be32                  firstdb;        /* db of first entry */
        __be32                  nvalid;         /* count of valid entries */
        __be32                  nused;          /* count of used entries */
+        __be32                  pad;            /* 64 bit alignment. */
 };
 struct xfs_dir3_free {
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5246de4912d4..2226a00acd15 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -263,18 +263,19 @@ xfs_dir3_free_get_buf(
         * Initialize the new block to be empty, and remember
         * its first slot as our empty slot.
         */
-        hdr.magic = XFS_DIR2_FREE_MAGIC;
+        memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
-        hdr.firstdb = 0;
+        memset(&hdr, 0, sizeof(hdr));
-        hdr.nused = 0;
-        hdr.nvalid = 0;
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
                hdr.magic = XFS_DIR3_FREE_MAGIC;
                hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
                hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
                uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
-        }
+        } else
+                hdr.magic = XFS_DIR2_FREE_MAGIC;
        xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
        *bpp = bp;
        return 0;
@@ -1921,8 +1922,6 @@ xfs_dir2_node_addname_int(
                         */
                        freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
                                        xfs_dir3_free_max_bests(mp);
-                        free->hdr.nvalid = 0;
-                        free->hdr.nused = 0;
                } else {
                        free = fbp->b_addr;
                        bests = xfs_dir3_free_bests_p(mp, free);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a41f8bf1da37..044e97a33c8d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -249,8 +249,11 @@ xfs_qm_init_dquot_blk(
                d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
                d->dd_diskdq.d_id = cpu_to_be32(curid);
                d->dd_diskdq.d_flags = type;
-                if (xfs_sb_version_hascrc(&mp->m_sb))
+                if (xfs_sb_version_hascrc(&mp->m_sb)) {
                        uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+                        xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+                                         XFS_DQUOT_CRC_OFF);
+                }
        }
        xfs_trans_dquot_buf(tp, bp,
@@ -286,23 +289,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
        dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
 }
-STATIC void
-xfs_dquot_buf_calc_crc(
-        struct xfs_mount        *mp,
-        struct xfs_buf          *bp)
-{
-        struct xfs_dqblk        *d = (struct xfs_dqblk *)bp->b_addr;
-        int                     i;
-        if (!xfs_sb_version_hascrc(&mp->m_sb))
-                return;
-        for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
-                xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
-                                 offsetof(struct xfs_dqblk, dd_crc));
-        }
-}
 STATIC bool
 xfs_dquot_buf_verify_crc(
        struct xfs_mount        *mp,
@@ -328,12 +314,11 @@ xfs_dquot_buf_verify_crc(
        for (i = 0; i < ndquots; i++, d++) {
                if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
-                                 offsetof(struct xfs_dqblk, dd_crc)))
+                                 XFS_DQUOT_CRC_OFF))
                        return false;
                if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
                        return false;
        }
        return true;
 }
@@ -393,6 +378,11 @@ xfs_dquot_buf_read_verify(
        }
 }
+/*
+ * we don't calculate the CRC here as that is done when the dquot is flushed to
+ * the buffer after the update is done. This ensures that the dquot in the
+ * buffer always has an up-to-date CRC value.
+ */
 void
 xfs_dquot_buf_write_verify(
        struct xfs_buf  *bp)
@@ -404,7 +394,6 @@ xfs_dquot_buf_write_verify(
                xfs_buf_ioerror(bp, EFSCORRUPTED);
                return;
        }
-        xfs_dquot_buf_calc_crc(mp, bp);
 }
 const struct xfs_buf_ops xfs_dquot_buf_ops = {
@@ -1151,11 +1140,17 @@ xfs_qm_dqflush(
         * copy the lsn into the on-disk dquot now while we have the in memory
         * dquot here. This can't be done later in the write verifier as we
         * can't get access to the log item at that point in time.
+         *
+         * We also calculate the CRC here so that the on-disk dquot in the
+         * buffer always has a valid CRC. This ensures there is no possibility
+         * of a dquot without an up-to-date CRC getting to disk.
         */
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
                dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
+                xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
+                                 XFS_DQUOT_CRC_OFF);
        }
        /*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6dda3f949b04..d04695545397 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -236,6 +236,7 @@ typedef struct xfs_fsop_resblks {
 #define XFS_FSOP_GEOM_FLAGS_PROJID32    0x0800  /* 32-bit project IDs   */
 #define XFS_FSOP_GEOM_FLAGS_DIRV2CI     0x1000  /* ASCII only CI names  */
 #define XFS_FSOP_GEOM_FLAGS_LAZYSB      0x4000  /* lazy superblock counters */
+#define XFS_FSOP_GEOM_FLAGS_V5SB        0x8000  /* version 5 superblock */
 /*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 87595b211da1..3c3644ea825b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -99,7 +99,9 @@ xfs_fs_geometry(
                        (xfs_sb_version_hasattr2(&mp->m_sb) ?
                                XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
                        (xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
-                                XFS_FSOP_GEOM_FLAGS_PROJID32 : 0);
+                                XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
+                        (xfs_sb_version_hascrc(&mp->m_sb) ?
+                                XFS_FSOP_GEOM_FLAGS_V5SB : 0);
                geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
                                mp->m_sb.sb_logsectsize : BBSIZE;
                geo->rtsectsize = mp->m_sb.sb_blocksize;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index efbe1accb6ca..7f7be5f98f52 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1638,6 +1638,10 @@ xfs_iunlink(
                dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
                offset = ip->i_imap.im_boffset +
                        offsetof(xfs_dinode_t, di_next_unlinked);
+                /* need to recalc the inode CRC if appropriate */
+                xfs_dinode_calc_crc(mp, dip);
                xfs_trans_inode_buf(tp, ibp);
                xfs_trans_log_buf(tp, ibp, offset,
                                  (offset + sizeof(xfs_agino_t) - 1));
@@ -1723,6 +1727,10 @@ xfs_iunlink_remove(
                        dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
                        offset = ip->i_imap.im_boffset +
                                offsetof(xfs_dinode_t, di_next_unlinked);
+                        /* need to recalc the inode CRC if appropriate */
+                        xfs_dinode_calc_crc(mp, dip);
                        xfs_trans_inode_buf(tp, ibp);
                        xfs_trans_log_buf(tp, ibp, offset,
                                          (offset + sizeof(xfs_agino_t) - 1));
@@ -1796,6 +1804,10 @@ xfs_iunlink_remove(
                        dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
                        offset = ip->i_imap.im_boffset +
                                offsetof(xfs_dinode_t, di_next_unlinked);
+                        /* need to recalc the inode CRC if appropriate */
+                        xfs_dinode_calc_crc(mp, dip);
                        xfs_trans_inode_buf(tp, ibp);
                        xfs_trans_log_buf(tp, ibp, offset,
                                          (offset + sizeof(xfs_agino_t) - 1));
@@ -1809,6 +1821,10 @@ xfs_iunlink_remove(
                last_dip->di_next_unlinked = cpu_to_be32(next_agino);
                ASSERT(next_agino != 0);
                offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
+                /* need to recalc the inode CRC if appropriate */
+                xfs_dinode_calc_crc(mp, last_dip);
                xfs_trans_inode_buf(tp, last_ibp);
                xfs_trans_log_buf(tp, last_ibp, offset,
                                  (offset + sizeof(xfs_agino_t) - 1));
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d82efaa2ac73..ca9ecaa81112 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -455,6 +455,28 @@ xfs_vn_getattr(
        return 0;
 }
+static void
+xfs_setattr_mode(
+        struct xfs_trans        *tp,
+        struct xfs_inode        *ip,
+        struct iattr            *iattr)
+{
+        struct inode    *inode = VFS_I(ip);
+        umode_t         mode = iattr->ia_mode;
+        ASSERT(tp);
+        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+        if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                mode &= ~S_ISGID;
+        ip->i_d.di_mode &= S_IFMT;
+        ip->i_d.di_mode |= mode & ~S_IFMT;
+        inode->i_mode &= S_IFMT;
+        inode->i_mode |= mode & ~S_IFMT;
+}
 int
 xfs_setattr_nonsize(
        struct xfs_inode        *ip,
@@ -606,18 +628,8 @@ xfs_setattr_nonsize(
        /*
         * Change file access modes.
         */
-        if (mask & ATTR_MODE) {
+        if (mask & ATTR_MODE)
-                umode_t mode = iattr->ia_mode;
+                xfs_setattr_mode(tp, ip, iattr);
-                if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                        mode &= ~S_ISGID;
-                ip->i_d.di_mode &= S_IFMT;
-                ip->i_d.di_mode |= mode & ~S_IFMT;
-                inode->i_mode &= S_IFMT;
-                inode->i_mode |= mode & ~S_IFMT;
-        }
        /*
         * Change file access or modified times.
@@ -714,9 +726,8 @@ xfs_setattr_size(
                return XFS_ERROR(error);
        ASSERT(S_ISREG(ip->i_d.di_mode));
-        ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+        ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-                        ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+                        ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
-                        ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
        if (!(flags & XFS_ATTR_NOLOCK)) {
                lock_flags |= XFS_IOLOCK_EXCL;
@@ -860,6 +871,12 @@ xfs_setattr_size(
                xfs_inode_clear_eofblocks_tag(ip);
        }
+        /*
+         * Change file access modes.
+         */
+        if (mask & ATTR_MODE)
+                xfs_setattr_mode(tp, ip, iattr);
        if (mask & ATTR_CTIME) {
                inode->i_ctime = iattr->ia_ctime;
                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec17eec..45a85ff84da1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans(
 }
 /*
- * Sort the log items in the transaction. Cancelled buffers need
+ * Sort the log items in the transaction.
- * to be put first so they are processed before any items that might
+ *
- * modify the buffers. If they are cancelled, then the modifications
+ * The ordering constraints are defined by the inode allocation and unlink
- * don't need to be replayed.
+ * behaviour. The rules are:
+ *
+ *      1. Every item is only logged once in a given transaction. Hence it
+ *         represents the last logged state of the item. Hence ordering is
+ *         dependent on the order in which operations need to be performed so
+ *         required initial conditions are always met.
+ *
+ *      2. Cancelled buffers are recorded in pass 1 in a separate table and
+ *         there's nothing to replay from them so we can simply cull them
+ *         from the transaction. However, we can't do that until after we've
+ *         replayed all the other items because they may be dependent on the
+ *         cancelled buffer and replaying the cancelled buffer can remove it
+ *         form the cancelled buffer table. Hence they have tobe done last.
+ *
+ *      3. Inode allocation buffers must be replayed before inode items that
+ *         read the buffer and replay changes into it.
+ *
+ *      4. Inode unlink buffers must be replayed after inode items are replayed.
+ *         This ensures that inodes are completely flushed to the inode buffer
+ *         in a "free" state before we remove the unlinked inode list pointer.
+ *
+ * Hence the ordering needs to be inode allocation buffers first, inode items
+ * second, inode unlink buffers third and cancelled buffers last.
+ *
+ * But there's a problem with that - we can't tell an inode allocation buffer
+ * apart from a regular buffer, so we can't separate them. We can, however,
+ * tell an inode unlink buffer from the others, and so we can separate them out
+ * from all the other buffers and move them to last.
+ *
+ * Hence, 4 lists, in order from head to tail:
+ *      - buffer_list for all buffers except cancelled/inode unlink buffers
+ *      - item_list for all non-buffer items
+ *      - inode_buffer_list for inode unlink buffers
+ *      - cancel_list for the cancelled buffers
 */
 STATIC int
 xlog_recover_reorder_trans(
@@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans(
 {
        xlog_recover_item_t     *item, *n;
        LIST_HEAD(sort_list);
+        LIST_HEAD(cancel_list);
+        LIST_HEAD(buffer_list);
+        LIST_HEAD(inode_buffer_list);
+        LIST_HEAD(inode_list);
        list_splice_init(&trans->r_itemq, &sort_list);
        list_for_each_entry_safe(item, n, &sort_list, ri_list) {
@@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans(
                switch (ITEM_TYPE(item)) {
                case XFS_LI_BUF:
-                        if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
+                        if (buf_f->blf_flags & XFS_BLF_CANCEL) {
                                trace_xfs_log_recover_item_reorder_head(log,
                                                        trans, item, pass);
-                                list_move(&item->ri_list, &trans->r_itemq);
+                                list_move(&item->ri_list, &cancel_list);
                                break;
                        }
+                        if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
+                                list_move(&item->ri_list, &inode_buffer_list);
+                                break;
+                        }
+                        list_move_tail(&item->ri_list, &buffer_list);
+                        break;
                case XFS_LI_INODE:
                case XFS_LI_DQUOT:
                case XFS_LI_QUOTAOFF:
@@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans(
                case XFS_LI_EFI:
                        trace_xfs_log_recover_item_reorder_tail(log,
                                                        trans, item, pass);
-                        list_move_tail(&item->ri_list, &trans->r_itemq);
+                        list_move_tail(&item->ri_list, &inode_list);
                        break;
                default:
                        xfs_warn(log->l_mp,
@@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans(
                }
        }
        ASSERT(list_empty(&sort_list));
+        if (!list_empty(&buffer_list))
+                list_splice(&buffer_list, &trans->r_itemq);
+        if (!list_empty(&inode_list))
+                list_splice_tail(&inode_list, &trans->r_itemq);
+        if (!list_empty(&inode_buffer_list))
+                list_splice_tail(&inode_buffer_list, &trans->r_itemq);
+        if (!list_empty(&cancel_list))
+                list_splice_tail(&cancel_list, &trans->r_itemq);
        return 0;
 }
@@ -1861,6 +1912,15 @@ xlog_recover_do_inode_buffer(
                buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
                                              next_unlinked_offset);
                *buffer_nextp = *logged_nextp;
+                /*
+                 * If necessary, recalculate the CRC in the on-disk inode. We
+                 * have to leave the inode in a consistent state for whoever
+                 * reads it next....
+                 */
+                xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
+                                xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
        }
        return 0;
@@ -2097,6 +2157,17 @@ xlog_recover_do_reg_buffer(
                       ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
                /*
+                 * The dirty regions logged in the buffer, even though
+                 * contiguous, may span multiple chunks. This is because the
+                 * dirty region may span a physical page boundary in a buffer
+                 * and hence be split into two separate vectors for writing into
+                 * the log. Hence we need to trim nbits back to the length of
+                 * the current region being copied out of the log.
+                 */
+                if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
+                        nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
+                /*
                 * Do a sanity check if this is a dquot buffer. Just checking
                 * the first dquot in the buffer should do. XXXThis is
                 * probably a good thing to do for other buf types also.
@@ -2255,6 +2326,12 @@ xfs_qm_dqcheck(
        d->dd_diskdq.d_flags = type;
        d->dd_diskdq.d_id = cpu_to_be32(id);
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+                xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+                                 XFS_DQUOT_CRC_OFF);
+        }
        return errs;
 }
@@ -2782,6 +2859,10 @@ xlog_recover_dquot_pass2(
        }
        memcpy(ddq, recddq, item->ri_buf[1].i_len);
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+                                 XFS_DQUOT_CRC_OFF);
+        }
        ASSERT(dq_f->qlf_size == 2);
        ASSERT(bp->b_target->bt_mount == mp);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index f41702b43003..b75c9bb6e71e 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -41,6 +41,7 @@
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_cksum.h"
 /*
 * The global quota manager. There is only one of these for the entire
@@ -839,7 +840,7 @@ xfs_qm_reset_dqcounts(
        xfs_dqid_t      id,
        uint            type)
 {
-        xfs_disk_dquot_t        *ddq;
+        struct xfs_dqblk        *dqb;
        int                     j;
        trace_xfs_reset_dqcounts(bp, _RET_IP_);
@@ -853,8 +854,12 @@ xfs_qm_reset_dqcounts(
        do_div(j, sizeof(xfs_dqblk_t));
        ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
 #endif
-        ddq = bp->b_addr;
+        dqb = bp->b_addr;
        for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
+                struct xfs_disk_dquot   *ddq;
+                ddq = (struct xfs_disk_dquot *)&dqb[j];
                /*
                 * Do a sanity check, and if needed, repair the dqblk. Don't
                 * output any warnings because it's perfectly possible to
@@ -871,7 +876,12 @@ xfs_qm_reset_dqcounts(
                ddq->d_bwarns = 0;
                ddq->d_iwarns = 0;
                ddq->d_rtbwarns = 0;
-                ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+                if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                        xfs_update_cksum((char *)&dqb[j],
+                                         sizeof(struct xfs_dqblk),
+                                         XFS_DQUOT_CRC_OFF);
+                }
        }
 }
@@ -907,19 +917,29 @@ xfs_qm_dqiter_bufs(
                              XFS_FSB_TO_DADDR(mp, bno),
                              mp->m_quotainfo->qi_dqchunklen, 0, &bp,
                              &xfs_dquot_buf_ops);
-                if (error)
-                        break;
                /*
-                 * XXX(hch): need to figure out if it makes sense to validate
+                 * CRC and validation errors will return a EFSCORRUPTED here. If
-                 *           the CRC here.
+                 * this occurs, re-read without CRC validation so that we can
+                 * repair the damage via xfs_qm_reset_dqcounts(). This process
+                 * will leave a trace in the log indicating corruption has
+                 * been detected.
                 */
+                if (error == EFSCORRUPTED) {
+                        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                                      XFS_FSB_TO_DADDR(mp, bno),
+                                      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+                                      NULL);
+                }
+                if (error)
+                        break;
                xfs_qm_reset_dqcounts(mp, bp, firstid, type);
                xfs_buf_delwri_queue(bp, buffer_list);
                xfs_buf_relse(bp);
-                /*
-                 * goto the next block.
+                /* goto the next block. */
-                 */
                bno++;
                firstid += mp->m_quotainfo->qi_dqperchunk;
        }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index c41190cad6e9..6cdf6ffc36a1 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -489,31 +489,36 @@ xfs_qm_scall_setqlim(
        if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
                return 0;
-        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-        error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
-                                  0, 0, XFS_DEFAULT_LOG_COUNT);
-        if (error) {
-                xfs_trans_cancel(tp, 0);
-                return (error);
-        }
        /*
         * We don't want to race with a quotaoff so take the quotaoff lock.
-         * (We don't hold an inode lock, so there's nothing else to stop
+         * We don't hold an inode lock, so there's nothing else to stop
-         * a quotaoff from happening). (XXXThis doesn't currently happen
+         * a quotaoff from happening.
-         * because we take the vfslock before calling xfs_qm_sysent).
         */
        mutex_lock(&q->qi_quotaofflock);
        /*
-         * Get the dquot (locked), and join it to the transaction.
+         * Get the dquot (locked) before we start, as we need to do a
-         * Allocate the dquot if this doesn't exist.
+         * transaction to allocate it if it doesn't exist. Once we have the
+         * dquot, unlock it so we can start the next transaction safely. We hold
+         * a reference to the dquot, so it's safe to do this unlock/lock without
+         * it being reclaimed in the mean time.
         */
-        if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+        error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
-                xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+        if (error) {
                ASSERT(error != ENOENT);
                goto out_unlock;
        }
+        xfs_dqunlock(dqp);
+        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+        error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
+                                  0, 0, XFS_DEFAULT_LOG_COUNT);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                goto out_rele;
+        }
+        xfs_dqlock(dqp);
        xfs_trans_dqjoin(tp, dqp);
        ddq = &dqp->q_core;
@@ -621,9 +626,10 @@ xfs_qm_scall_setqlim(
        xfs_trans_log_dquot(tp, dqp);
        error = xfs_trans_commit(tp, 0);
-        xfs_qm_dqrele(dqp);
- out_unlock:
+out_rele:
+        xfs_qm_dqrele(dqp);
+out_unlock:
        mutex_unlock(&q->qi_quotaofflock);
        return error;
 }
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index c61e31c7d997..c38068f26c55 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,6 +87,8 @@ typedef struct xfs_dqblk {
        uuid_t            dd_uuid;      /* location information */
 } xfs_dqblk_t;
+#define XFS_DQUOT_CRC_OFF       offsetof(struct xfs_dqblk, dd_crc)
 /*
 * flags for q_flags field in the dquot.
 */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ea341cea68cb..3033ba5e9762 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1373,6 +1373,17 @@ xfs_finish_flags(
        }
        /*
+         * V5 filesystems always use attr2 format for attributes.
+         */
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+                xfs_warn(mp,
+"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
+                        MNTOPT_NOATTR2, MNTOPT_ATTR2);
+                return XFS_ERROR(EINVAL);
+        }
+        /*
         * mkfs'ed attr2 will turn on attr2 mount unless explicitly
         * told by noattr2 to turn it off
         */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 5f234389327c..195a403e1522 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -56,16 +56,9 @@ xfs_symlink_blocks(
        struct xfs_mount *mp,
        int             pathlen)
 {
-        int             fsblocks = 0;
+        int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-        int             len = pathlen;
-        do {
+        return (pathlen + buflen - 1) / buflen;
-                fsblocks++;
-                len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-        } while (len > 0);
-        ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
-        return fsblocks;
 }
 static int
@@ -405,7 +398,7 @@ xfs_symlink(
        if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
                fs_blocks = 0;
        else
-                fs_blocks = XFS_B_TO_FSB(mp, pathlen);
+                fs_blocks = xfs_symlink_blocks(mp, pathlen);
        resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
        error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
                        XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
@@ -512,7 +505,7 @@ xfs_symlink(
                cur_chunk = target_path;
                offset = 0;
                for (n = 0; n < nmaps; n++) {
-                        char *buf;
+                        char    *buf;
                        d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
                        byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
@@ -525,9 +518,7 @@ xfs_symlink(
                        bp->b_ops = &xfs_symlink_buf_ops;
                        byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
-                        if (pathlen < byte_cnt) {
+                        byte_cnt = min(byte_cnt, pathlen);
-                                byte_cnt = pathlen;
-                        }
                        buf = bp->b_addr;
                        buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
@@ -542,6 +533,7 @@ xfs_symlink(
                        xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
                                                        (char *)bp->b_addr);
                }
+                ASSERT(pathlen == 0);
        }
        /*
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2013-06-09 01:34:53 -0400
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2013-06-09 01:34:53 -0400
commit	38a4671cad3f0d277cf48445b49e42a475ebfb6a (patch)
tree	7348f3dd5f4e1bec39758dcff748a9dcbbf3430c /fs
parent	d652f7022b359afd5d34fc9fffd71df118521ead (diff)
parent	317ddd256b9c24b0d78fa8018f80f1e495481a10 (diff)