Merge branch 'linus' into x86/irqstats

author: Ingo Molnar <mingo@elte.hu> 2008-06-16 05:27:53 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-06-16 05:27:53 -0400
commit: c54f9da1c8ceee19436430afac0798a989eb886d (patch)
tree: 412f51c3f2641e4205b767cec95ce6107cd39d36 /fs
parent: a2eddfa95919a730e0e5ed17e9c303fe5ba249cd (diff)
parent: 066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff)
53 files changed, 645 insertions, 426 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 55e8ee1900a5..3263084eef9e 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC
 config BINFMT_FLAT
        bool "Kernel support for flat binaries"
-        depends on !MMU
+        depends on !MMU && (!FRV || BROKEN)
        help
          Support uClinux FLAT format binaries.
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index a78d5b236bb1..587ef5123cd8 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -8,7 +8,7 @@
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
- * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ * Authors: David Woodhouse <dwmw2@infradead.org>
 *          David Howells <dhowells@redhat.com>
 *
 */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 08db82e1343a..bb47217f6a18 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -8,7 +8,7 @@
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
- * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
+ * Authors: David Woodhouse <dwmw2@infradead.org>
 *          David Howells <dhowells@redhat.com>
 *
 */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 4b572b801d8d..7e3faeef6818 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -10,7 +10,7 @@
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Authors: David Howells <dhowells@redhat.com>
- *          David Woodhouse <dwmw2@redhat.com>
+ *          David Woodhouse <dwmw2@infradead.org>
 *
 */
diff --git a/fs/aio.c b/fs/aio.c
index b5253e77eb2f..0fb3117ddd93 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -591,10 +591,6 @@ static void use_mm(struct mm_struct *mm)
        atomic_inc(&mm->mm_count);
        tsk->mm = mm;
        tsk->active_mm = mm;
-        /*
-         * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise
-         * it won't work. Update it accordingly if you change it here
-         */
        switch_mm(active_mm, mm, tsk);
        task_unlock(tsk);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ddd35d873391..d051a32e6270 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -390,7 +390,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
        }
        /* expand the stack mapping to use up the entire allocation granule */
-        fullsize = ksize((char *) current->mm->start_brk);
+        fullsize = kobjsize((char *) current->mm->start_brk);
        if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size,
                                    fullsize, 0, 0)))
                stack_size = fullsize;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 3b40d45a3a16..2cb1acda3a82 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -548,7 +548,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                        PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
                /* Remap to use all availabe slack region space */
                if (realdatastart && (realdatastart < (unsigned long)-4096)) {
-                        reallen = ksize((void *)realdatastart);
+                        reallen = kobjsize((void *)realdatastart);
                        if (reallen > len) {
                                realdatastart = do_mremap(realdatastart, len,
                                        reallen, MREMAP_FIXED, realdatastart);
@@ -600,7 +600,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                        PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
                /* Remap to use all availabe slack region space */
                if (textpos && (textpos < (unsigned long) -4096)) {
-                        reallen = ksize((void *)textpos);
+                        reallen = kobjsize((void *)textpos);
                        if (reallen > len) {
                                textpos = do_mremap(textpos, len, reallen,
                                        MREMAP_FIXED, textpos);
@@ -683,7 +683,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                 */
                current->mm->start_brk = datapos + data_len + bss_len;
                current->mm->brk = (current->mm->start_brk + 3) & ~3;
-                current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
+                current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len;
        }
        if (flags & FLAT_FLAG_KTRACE)
@@ -790,7 +790,7 @@ static int load_flat_file(struct linux_binprm * bprm,
        /* zero the BSS,  BRK and stack areas */
        memset((void*)(datapos + data_len), 0, bss_len + 
-                        (memp + ksize((void *) memp) - stack_len -      /* end brk */
+                        (memp + kobjsize((void *) memp) - stack_len -   /* end brk */
                        libinfo->lib_list[id].start_brk) +              /* start brk */
                        stack_len);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7d822fae7765..470c10ceb0fb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -12,6 +12,7 @@
 #include <linux/kmod.h>
 #include <linux/major.h>
 #include <linux/smp_lock.h>
+#include <linux/device_cgroup.h>
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
 #include <linux/module.h>
@@ -928,9 +929,14 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
 {
        struct module *owner = NULL;
        struct gendisk *disk;
-        int ret = -ENXIO;
+        int ret;
        int part;
+        ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode);
+        if (ret != 0)
+                return ret;
+        ret = -ENXIO;
        file->f_mapping = bdev->bd_inode->i_mapping;
        lock_kernel();
        disk = get_gendisk(bdev->bd_dev, &part);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 28e3d5c5fcac..1f3465201fdf 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -2,6 +2,11 @@ Version 1.53
 ------------
 DFS support added (Microsoft Distributed File System client support needed
 for referrals which enable a hierarchical name space among servers).
+Disable temporary caching of mode bits to servers which do not support
+storing of mode (e.g. Windows servers, when client mounts without cifsacl
+mount option) and add new "dynperm" mount option to enable temporary caching
+of mode (enable old behavior).  Fix hang on mount caused when server crashes
+tcp session during negotiate protocol.
 Version 1.52
 ------------
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index cb52cbbe45ff..f58e41d3ba48 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -186,6 +186,11 @@ asn1_length_decode(struct asn1_ctx *ctx, unsigned int *def, unsigned int *len)
                        }
                }
        }
+        /* don't trust len bigger than ctx buffer */
+        if (*len > ctx->end - ctx->pointer)
+                return 0;
        return 1;
 }
@@ -203,6 +208,10 @@ asn1_header_decode(struct asn1_ctx *ctx,
        if (!asn1_length_decode(ctx, &def, &len))
                return 0;
+        /* primitive shall be definite, indefinite shall be constructed */
+        if (*con == ASN1_PRI && !def)
+                return 0;
        if (def)
                *eoc = ctx->pointer + len;
        else
@@ -389,6 +398,11 @@ asn1_oid_decode(struct asn1_ctx *ctx,
        unsigned long *optr;
        size = eoc - ctx->pointer + 1;
+        /* first subid actually encodes first two subids */
+        if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
+                return 0;
        *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
        if (*oid == NULL)
                return 0;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5df93fd6303f..86b4d5f405ae 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -97,9 +97,6 @@ cifs_read_super(struct super_block *sb, void *data,
 {
        struct inode *inode;
        struct cifs_sb_info *cifs_sb;
-#ifdef CONFIG_CIFS_DFS_UPCALL
-        int len;
-#endif
        int rc = 0;
        /* BB should we make this contingent on mount parm? */
@@ -117,15 +114,17 @@ cifs_read_super(struct super_block *sb, void *data,
         * complex operation (mount), and in case of fail
         * just exit instead of doing mount and attempting
         * undo it if this copy fails?*/
-        len = strlen(data);
+        if (data) {
-        cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
+                int len = strlen(data);
-        if (cifs_sb->mountdata == NULL) {
+                cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
-                kfree(sb->s_fs_info);
+                if (cifs_sb->mountdata == NULL) {
-                sb->s_fs_info = NULL;
+                        kfree(sb->s_fs_info);
-                return -ENOMEM;
+                        sb->s_fs_info = NULL;
+                        return -ENOMEM;
+                }
+                strncpy(cifs_sb->mountdata, data, len + 1);
+                cifs_sb->mountdata[len] = '\0';
        }
-        strncpy(cifs_sb->mountdata, data, len + 1);
-        cifs_sb->mountdata[len] = '\0';
 #endif
        rc = cifs_mount(sb, cifs_sb, data, devname);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 08914053242b..9cfcf326ead3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -333,7 +333,6 @@ struct cifsFileInfo {
        bool messageMode:1;     /* for pipes: message vs byte mode */
        atomic_t wrtPending;   /* handle in use - defer close */
        struct semaphore fh_sem; /* prevents reopen race after dead ses*/
-        char *search_resume_name; /* BB removeme BB */
        struct cifs_search_info srch_inf;
 };
@@ -626,7 +625,7 @@ GLOBAL_EXTERN atomic_t tcpSesAllocCount;
 GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
 GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
-/* Various Debug counters to remove someday (BB) */
+/* Various Debug counters */
 GLOBAL_EXTERN atomic_t bufAllocCount;    /* current number allocated  */
 #ifdef CONFIG_CIFS_STATS2
 GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 65d58b4e6a61..0f327c224da3 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -79,6 +79,19 @@
 #define TRANS2_GET_DFS_REFERRAL       0x10
 #define TRANS2_REPORT_DFS_INCOSISTENCY 0x11
+/* SMB Transact (Named Pipe) subcommand codes */
+#define TRANS_SET_NMPIPE_STATE      0x0001
+#define TRANS_RAW_READ_NMPIPE       0x0011
+#define TRANS_QUERY_NMPIPE_STATE    0x0021
+#define TRANS_QUERY_NMPIPE_INFO     0x0022
+#define TRANS_PEEK_NMPIPE           0x0023
+#define TRANS_TRANSACT_NMPIPE       0x0026
+#define TRANS_RAW_WRITE_NMPIPE      0x0031
+#define TRANS_READ_NMPIPE           0x0036
+#define TRANS_WRITE_NMPIPE          0x0037
+#define TRANS_WAIT_NMPIPE           0x0053
+#define TRANS_CALL_NMPIPE           0x0054
 /* NT Transact subcommand codes */
 #define NT_TRANSACT_CREATE            0x01
 #define NT_TRANSACT_IOCTL             0x02
@@ -328,12 +341,13 @@
 #define CREATE_COMPLETE_IF_OPLK 0x00000100      /* should be zero */
 #define CREATE_NO_EA_KNOWLEDGE  0x00000200
 #define CREATE_EIGHT_DOT_THREE  0x00000400      /* doc says this is obsolete
-                                                 open for recovery flag - should
+                                                 "open for recovery" flag - should
-                                                 be zero */
+                                                 be zero in any case */
+#define CREATE_OPEN_FOR_RECOVERY 0x00000400
 #define CREATE_RANDOM_ACCESS    0x00000800
 #define CREATE_DELETE_ON_CLOSE  0x00001000
 #define CREATE_OPEN_BY_ID       0x00002000
-#define CREATE_OPEN_BACKUP_INTN 0x00004000
+#define CREATE_OPEN_BACKUP_INTENT 0x00004000
 #define CREATE_NO_COMPRESSION   0x00008000
 #define CREATE_RESERVE_OPFILTER 0x00100000      /* should be zero */
 #define OPEN_REPARSE_POINT      0x00200000
@@ -722,7 +736,6 @@ typedef struct smb_com_tconx_rsp_ext {
 #define SMB_CSC_CACHE_AUTO_REINT   0x0004
 #define SMB_CSC_CACHE_VDO          0x0008
 #define SMB_CSC_NO_CACHING         0x000C
 #define SMB_UNIQUE_FILE_NAME    0x0010
 #define SMB_EXTENDED_SIGNATURES 0x0020
@@ -806,7 +819,7 @@ typedef struct smb_com_findclose_req {
 #define ICOUNT_MASK             0x00FF
 #define PIPE_READ_MODE          0x0100
 #define NAMED_PIPE_TYPE         0x0400
-#define PIPE_END_POINT          0x0800
+#define PIPE_END_POINT          0x4000
 #define BLOCKING_NAMED_PIPE     0x8000
 typedef struct smb_com_open_req {       /* also handles create */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 9b8b4cfdf993..4511b708f0f3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1728,7 +1728,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
 {
        int rc = 0;
        LOCK_REQ *pSMB = NULL;
-        LOCK_RSP *pSMBr = NULL;
+/*      LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */
        int bytes_returned;
        int timeout = 0;
        __u16 count;
@@ -1739,8 +1739,6 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
        if (rc)
                return rc;
-        pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */
        if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) {
                timeout = CIFS_ASYNC_OP; /* no response expected */
                pSMB->Timeout = 0;
@@ -1774,7 +1772,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
        if (waitFlag) {
                rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
-                        (struct smb_hdr *) pSMBr, &bytes_returned);
+                        (struct smb_hdr *) pSMB, &bytes_returned);
                cifs_small_buf_release(pSMB);
        } else {
                rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB,
@@ -3927,9 +3925,9 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
        }
        ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
-        if (ref->VersionNumber != 3) {
+        if (ref->VersionNumber != cpu_to_le16(3)) {
                cERROR(1, ("Referrals of V%d version are not supported,"
-                        "should be V3", ref->VersionNumber));
+                        "should be V3", le16_to_cpu(ref->VersionNumber)));
                rc = -EINVAL;
                goto parse_DFS_referrals_exit;
        }
@@ -3977,7 +3975,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
                if (rc)
                        goto parse_DFS_referrals_exit;
-                ref += ref->Size;
+                ref += le16_to_cpu(ref->Size);
        }
 parse_DFS_referrals_exit:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 023434f72c15..e8fa46c7cff2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -653,6 +653,7 @@ multi_t2_fnd:
        spin_lock(&GlobalMid_Lock);
        server->tcpStatus = CifsExiting;
        spin_unlock(&GlobalMid_Lock);
+        wake_up_all(&server->response_q);
        /* don't exit until kthread_stop is called */
        set_current_state(TASK_UNINTERRUPTIBLE);
@@ -2120,6 +2121,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
                }
+                if ((volume_info.cifs_acl) && (volume_info.dynperm))
+                        cERROR(1, ("mount option dynperm ignored if cifsacl "
+                                   "mount option supported"));
                tcon =
                    find_unc(sin_server.sin_addr.s_addr, volume_info.UNC,
                             volume_info.username);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index f0b5b5f3dd2e..fb69c1fa85c9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -260,7 +260,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                                                 buf, inode->i_sb, xid,
                                                 &fileHandle);
                        if (newinode) {
-                                newinode->i_mode = mode;
+                                if (cifs_sb->mnt_cifs_flags &
+                                    CIFS_MOUNT_DYNPERM)
+                                        newinode->i_mode = mode;
                                if ((oplock & CIFS_CREATE_ACTION) &&
                                    (cifs_sb->mnt_cifs_flags &
                                     CIFS_MOUNT_SET_UID)) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8636cec2642c..0aac824371a5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -546,7 +546,6 @@ int cifs_close(struct inode *inode, struct file *file)
                        msleep(timeout);
                        timeout *= 8;
                }
-                kfree(pSMBFile->search_resume_name);
                kfree(file->private_data);
                file->private_data = NULL;
        } else
@@ -605,12 +604,6 @@ int cifs_closedir(struct inode *inode, struct file *file)
                        else
                                cifs_buf_release(ptmp);
                }
-                ptmp = pCFileStruct->search_resume_name;
-                if (ptmp) {
-                        cFYI(1, ("closedir free resume name"));
-                        pCFileStruct->search_resume_name = NULL;
-                        kfree(ptmp);
-                }
                kfree(file->private_data);
                file->private_data = NULL;
        }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 129dbfe4dca7..722be543ceec 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -418,6 +418,7 @@ int cifs_get_inode_info(struct inode **pinode,
        char *buf = NULL;
        bool adjustTZ = false;
        bool is_dfs_referral = false;
+        umode_t default_mode;
        pTcon = cifs_sb->tcon;
        cFYI(1, ("Getting info on %s", full_path));
@@ -530,47 +531,42 @@ int cifs_get_inode_info(struct inode **pinode,
                inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
        }
-        /* set default mode. will override for dirs below */
+        /* get default inode mode */
-        if (atomic_read(&cifsInfo->inUse) == 0)
+        if (attr & ATTR_DIRECTORY)
-                /* new inode, can safely set these fields */
+                default_mode = cifs_sb->mnt_dir_mode;
-                inode->i_mode = cifs_sb->mnt_file_mode;
+        else
-        else /* since we set the inode type below we need to mask off
+                default_mode = cifs_sb->mnt_file_mode;
-             to avoid strange results if type changes and both
-             get orred in */
+        /* set permission bits */
-                inode->i_mode &= ~S_IFMT;
+        if (atomic_read(&cifsInfo->inUse) == 0 ||
-/*      if (attr & ATTR_REPARSE)  */
+            (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
-        /* We no longer handle these as symlinks because we could not
+                inode->i_mode = default_mode;
-           follow them due to the absolute path with drive letter */
+        else {
-        if (attr & ATTR_DIRECTORY) {
+                /* just reenable write bits if !ATTR_READONLY */
-        /* override default perms since we do not do byte range locking
+                if ((inode->i_mode & S_IWUGO) == 0 &&
-           on dirs */
+                    (attr & ATTR_READONLY) == 0)
-                inode->i_mode = cifs_sb->mnt_dir_mode;
+                        inode->i_mode |= (S_IWUGO & default_mode);
-                inode->i_mode |= S_IFDIR;
+                        inode->i_mode &= ~S_IFMT;
-        } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
+        }
-                   (cifsInfo->cifsAttrs & ATTR_SYSTEM) &&
+        /* clear write bits if ATTR_READONLY is set */
-                   /* No need to le64 convert size of zero */
+        if (attr & ATTR_READONLY)
-                   (pfindData->EndOfFile == 0)) {
+                inode->i_mode &= ~S_IWUGO;
-                inode->i_mode = cifs_sb->mnt_file_mode;
-                inode->i_mode |= S_IFIFO;
+        /* set inode type */
-/* BB Finish for SFU style symlinks and devices */
+        if ((attr & ATTR_SYSTEM) &&
-        } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
+            (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
-                   (cifsInfo->cifsAttrs & ATTR_SYSTEM)) {
+                /* no need to fix endianness on 0 */
-                if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile),
+                if (pfindData->EndOfFile == 0)
-                                     full_path, cifs_sb, xid))
+                        inode->i_mode |= S_IFIFO;
-                        cFYI(1, ("Unrecognized sfu inode type"));
+                else if (decode_sfu_inode(inode,
+                                le64_to_cpu(pfindData->EndOfFile),
-                cFYI(1, ("sfu mode 0%o", inode->i_mode));
+                                full_path, cifs_sb, xid))
+                        cFYI(1, ("unknown SFU file type\n"));
        } else {
-                inode->i_mode |= S_IFREG;
+                if (attr & ATTR_DIRECTORY)
-                /* treat dos attribute of read-only as read-only mode eg 555 */
+                        inode->i_mode |= S_IFDIR;
-                if (cifsInfo->cifsAttrs & ATTR_READONLY)
+                else
-                        inode->i_mode &= ~(S_IWUGO);
+                        inode->i_mode |= S_IFREG;
-                else if ((inode->i_mode & S_IWUGO) == 0)
-                        /* the ATTR_READONLY flag may have been */
-                        /* changed on server -- set any w bits  */
-                        /* allowed by mnt_file_mode             */
-                        inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode);
-        /* BB add code to validate if device or weird share or device type? */
        }
        spin_lock(&inode->i_lock);
@@ -1019,8 +1015,11 @@ mkdir_get_info:
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                        }
                        if (direntry->d_inode) {
-                                direntry->d_inode->i_mode = mode;
+                                if (cifs_sb->mnt_cifs_flags &
-                                direntry->d_inode->i_mode |= S_IFDIR;
+                                     CIFS_MOUNT_DYNPERM)
+                                        direntry->d_inode->i_mode =
+                                                (mode | S_IFDIR);
                                if (cifs_sb->mnt_cifs_flags &
                                     CIFS_MOUNT_SET_UID) {
                                        direntry->d_inode->i_uid =
@@ -1547,13 +1546,26 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                } else
                        goto cifs_setattr_exit;
        }
-        if (attrs->ia_valid & ATTR_UID) {
-                cFYI(1, ("UID changed to %d", attrs->ia_uid));
+        /*
-                uid = attrs->ia_uid;
+         * Without unix extensions we can't send ownership changes to the
-        }
+         * server, so silently ignore them. This is consistent with how
-        if (attrs->ia_valid & ATTR_GID) {
+         * local DOS/Windows filesystems behave (VFAT, NTFS, etc). With
-                cFYI(1, ("GID changed to %d", attrs->ia_gid));
+         * CIFSACL support + proper Windows to Unix idmapping, we may be
-                gid = attrs->ia_gid;
+         * able to support this in the future.
+         */
+        if (!pTcon->unix_ext &&
+            !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
+                attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
+        } else {
+                if (attrs->ia_valid & ATTR_UID) {
+                        cFYI(1, ("UID changed to %d", attrs->ia_uid));
+                        uid = attrs->ia_uid;
+                }
+                if (attrs->ia_valid & ATTR_GID) {
+                        cFYI(1, ("GID changed to %d", attrs->ia_gid));
+                        gid = attrs->ia_gid;
+                }
        }
        time_buf.Attributes = 0;
@@ -1563,7 +1575,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                attrs->ia_valid &= ~ATTR_MODE;
        if (attrs->ia_valid & ATTR_MODE) {
-                cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode));
+                cFYI(1, ("Mode changed to 0%o", attrs->ia_mode));
                mode = attrs->ia_mode;
        }
@@ -1578,18 +1590,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 #ifdef CONFIG_CIFS_EXPERIMENTAL
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
                        rc = mode_to_acl(inode, full_path, mode);
-                else if ((mode & S_IWUGO) == 0) {
+                else
-#else
-                if ((mode & S_IWUGO) == 0) {
 #endif
-                        /* not writeable */
+                if (((mode & S_IWUGO) == 0) &&
-                        if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
+                    (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
-                                set_dosattr = true;
+                        set_dosattr = true;
-                                time_buf.Attributes =
+                        time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs |
-                                        cpu_to_le32(cifsInode->cifsAttrs |
+                                                          ATTR_READONLY);
-                                                    ATTR_READONLY);
+                        /* fix up mode if we're not using dynperm */
-                        }
+                        if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
-                } else if (cifsInode->cifsAttrs & ATTR_READONLY) {
+                                attrs->ia_mode = inode->i_mode & ~S_IWUGO;
+                } else if ((mode & S_IWUGO) &&
+                           (cifsInode->cifsAttrs & ATTR_READONLY)) {
                        /* If file is readonly on server, we would
                        not be able to write to it - so if any write
                        bit is enabled for user or group or other we
@@ -1600,6 +1612,20 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                        /* Windows ignores set to zero */
                        if (time_buf.Attributes == 0)
                                time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
+                        /* reset local inode permissions to normal */
+                        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
+                                attrs->ia_mode &= ~(S_IALLUGO);
+                                if (S_ISDIR(inode->i_mode))
+                                        attrs->ia_mode |=
+                                                cifs_sb->mnt_dir_mode;
+                                else
+                                        attrs->ia_mode |=
+                                                cifs_sb->mnt_file_mode;
+                        }
+                } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
+                        /* ignore mode change - ATTR_READONLY hasn't changed */
+                        attrs->ia_valid &= ~ATTR_MODE;
                }
        }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1d69b8014e0b..4b17f8fe3157 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -519,8 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                        pnotify = (struct file_notify_information *)
                                ((char *)&pSMBr->hdr.Protocol + data_offset);
                        cFYI(1, ("dnotify on %s Action: 0x%x",
-                                 pnotify->FileName,
+                                 pnotify->FileName, pnotify->Action));
-                                pnotify->Action));  /* BB removeme BB */
                        /*   cifs_dump_mem("Rcvd notify Data: ",buf,
                                sizeof(struct smb_hdr)+60); */
                        return true;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 713c25110197..83f306954883 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -132,6 +132,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
        __u32 attr;
        __u64 allocation_size;
        __u64 end_of_file;
+        umode_t default_mode;
        /* save mtime and size */
        local_mtime = tmp_inode->i_mtime;
@@ -187,48 +188,54 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
        if (atomic_read(&cifsInfo->inUse) == 0) {
                tmp_inode->i_uid = cifs_sb->mnt_uid;
                tmp_inode->i_gid = cifs_sb->mnt_gid;
-                /* set default mode. will override for dirs below */
+        }
-                tmp_inode->i_mode = cifs_sb->mnt_file_mode;
-        } else {
+        if (attr & ATTR_DIRECTORY)
-                /* mask off the type bits since it gets set
+                default_mode = cifs_sb->mnt_dir_mode;
-                below and we do not want to get two type
+        else
-                bits set */
+                default_mode = cifs_sb->mnt_file_mode;
+        /* set initial permissions */
+        if ((atomic_read(&cifsInfo->inUse) == 0) ||
+            (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
+                tmp_inode->i_mode = default_mode;
+        else {
+                /* just reenable write bits if !ATTR_READONLY */
+                if ((tmp_inode->i_mode & S_IWUGO) == 0 &&
+                    (attr & ATTR_READONLY) == 0)
+                        tmp_inode->i_mode |= (S_IWUGO & default_mode);
                tmp_inode->i_mode &= ~S_IFMT;
        }
-        if (attr & ATTR_DIRECTORY) {
+        /* clear write bits if ATTR_READONLY is set */
-                *pobject_type = DT_DIR;
+        if (attr & ATTR_READONLY)
-                /* override default perms since we do not lock dirs */
+                tmp_inode->i_mode &= ~S_IWUGO;
-                if (atomic_read(&cifsInfo->inUse) == 0)
-                        tmp_inode->i_mode = cifs_sb->mnt_dir_mode;
+        /* set inode type */
-                tmp_inode->i_mode |= S_IFDIR;
+        if ((attr & ATTR_SYSTEM) &&
-        } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) &&
+            (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
-                   (attr & ATTR_SYSTEM)) {
                if (end_of_file == 0)  {
-                        *pobject_type = DT_FIFO;
                        tmp_inode->i_mode |= S_IFIFO;
+                        *pobject_type = DT_FIFO;
                } else {
-                        /* rather than get the type here, we mark the
+                        /*
-                        inode as needing revalidate and get the real type
+                         * trying to get the type can be slow, so just call
-                        (blk vs chr vs. symlink) later ie in lookup */
+                         * this a regular file for now, and mark for reval
-                        *pobject_type = DT_REG;
+                         */
                        tmp_inode->i_mode |= S_IFREG;
+                        *pobject_type = DT_REG;
                        cifsInfo->time = 0;
                }
-/* we no longer mark these because we could not follow them */
-/*        } else if (attr & ATTR_REPARSE) {
-                *pobject_type = DT_LNK;
-                tmp_inode->i_mode |= S_IFLNK; */
        } else {
-                *pobject_type = DT_REG;
+                if (attr & ATTR_DIRECTORY) {
-                tmp_inode->i_mode |= S_IFREG;
+                        tmp_inode->i_mode |= S_IFDIR;
-                if (attr & ATTR_READONLY)
+                        *pobject_type = DT_DIR;
-                        tmp_inode->i_mode &= ~(S_IWUGO);
+                } else {
-                else if ((tmp_inode->i_mode & S_IWUGO) == 0)
+                        tmp_inode->i_mode |= S_IFREG;
-                        /* the ATTR_READONLY flag may have been changed on   */
+                        *pobject_type = DT_REG;
-                        /* server -- set any w bits allowed by mnt_file_mode */
+                }
-                        tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode);
+        }
-        } /* could add code here - to validate if device or weird share type? */
        /* can not fill in nlink here as in qpathinfo version and Unx search */
        if (atomic_read(&cifsInfo->inUse) == 0)
@@ -675,8 +682,6 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
                        cifsFile->invalidHandle = true;
                        CIFSFindClose(xid, pTcon, cifsFile->netfid);
                }
-                kfree(cifsFile->search_resume_name);
-                cifsFile->search_resume_name = NULL;
                if (cifsFile->srch_inf.ntwrk_buf_start) {
                        cFYI(1, ("freeing SMB ff cache buf on search rewind"));
                        if (cifsFile->srch_inf.smallBuf)
@@ -1043,9 +1048,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
                } /* else {
                        cifsFile->invalidHandle = true;
                        CIFSFindClose(xid, pTcon, cifsFile->netfid);
-                }
+                } */
-                kfree(cifsFile->search_resume_name);
-                cifsFile->search_resume_name = NULL; */
                rc = find_cifs_entry(xid, pTcon, file,
                                &current_entry, &num_to_fill);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index cd62d75b2cc0..e2832bc7869a 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1906,9 +1906,9 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
                        goto out;
                }
        }
-        mutex_unlock(&key_tfm_list_mutex);
        (*tfm) = key_tfm->key_tfm;
        (*tfm_mutex) = &key_tfm->key_tfm_mutex;
 out:
+        mutex_unlock(&key_tfm_list_mutex);
        return rc;
 }
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 951ee33a022d..c15c25745e05 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -660,8 +660,6 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
 int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
                                      struct ecryptfs_auth_tok **auth_tok,
                                      char *sig);
-int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start,
-                         int num_zeros);
 int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
                         loff_t offset, size_t size);
 int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index ebf55150be56..75c2ea9fee35 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -157,20 +157,6 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
                               ecryptfs_page_idx, rc);
                        goto out;
                }
-                if (start_offset_in_page) {
-                        /* Read in the page from the lower
-                         * into the eCryptfs inode page cache,
-                         * decrypting */
-                        rc = ecryptfs_decrypt_page(ecryptfs_page);
-                        if (rc) {
-                                printk(KERN_ERR "%s: Error decrypting "
-                                       "page; rc = [%d]\n",
-                                       __func__, rc);
-                                ClearPageUptodate(ecryptfs_page);
-                                page_cache_release(ecryptfs_page);
-                                goto out;
-                        }
-                }
                ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
                /*
@@ -349,14 +335,6 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
                               ecryptfs_page_idx, rc);
                        goto out;
                }
-                rc = ecryptfs_decrypt_page(ecryptfs_page);
-                if (rc) {
-                        printk(KERN_ERR "%s: Error decrypting "
-                               "page; rc = [%d]\n", __func__, rc);
-                        ClearPageUptodate(ecryptfs_page);
-                        page_cache_release(ecryptfs_page);
-                        goto out;
-                }
                ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
                memcpy((data + data_offset),
                       ((char *)ecryptfs_page_virt + start_offset_in_page),
diff --git a/fs/exec.c b/fs/exec.c
index 3c2ba7ce11d4..9448f1b50b4a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -860,6 +860,7 @@ static int de_thread(struct task_struct *tsk)
 no_thread_group:
        exit_itimers(sig);
+        flush_itimer_signals();
        if (leader)
                release_task(leader);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 28cfd0b40527..77278e947e94 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -580,7 +580,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        }
        blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
-        data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
+        data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count %
+                                         EXT3_ADDR_PER_BLOCK(sb));
        end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
        /* Get each reserved primary GDT block and verify it holds backups */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 30494c5da843..9cc80b9cc8d8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 }
+static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
+                        ext4_group_t block_group)
+{
+        ext4_group_t actual_group;
+        ext4_get_group_no_and_offset(sb, block, &actual_group, 0);
+        if (actual_group == block_group)
+                return 1;
+        return 0;
+}
+static int ext4_group_used_meta_blocks(struct super_block *sb,
+                                ext4_group_t block_group)
+{
+        ext4_fsblk_t tmp;
+        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        /* block bitmap, inode bitmap, and inode table blocks */
+        int used_blocks = sbi->s_itb_per_group + 2;
+        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
+                struct ext4_group_desc *gdp;
+                struct buffer_head *bh;
+                gdp = ext4_get_group_desc(sb, block_group, &bh);
+                if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
+                                        block_group))
+                        used_blocks--;
+                if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp),
+                                        block_group))
+                        used_blocks--;
+                tmp = ext4_inode_table(sb, gdp);
+                for (; tmp < ext4_inode_table(sb, gdp) +
+                                sbi->s_itb_per_group; tmp++) {
+                        if (!ext4_block_in_group(sb, tmp, block_group))
+                                used_blocks -= 1;
+                }
+        }
+        return used_blocks;
+}
 /* Initializes an uninitialized block bitmap if given, and returns the
 * number of blocks free in the group. */
 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
        free_blocks = group_blocks - bit_max;
        if (bh) {
-                ext4_fsblk_t start;
+                ext4_fsblk_t start, tmp;
+                int flex_bg = 0;
                for (bit = 0; bit < bit_max; bit++)
                        ext4_set_bit(bit, bh->b_data);
                start = ext4_group_first_block_no(sb, block_group);
-                /* Set bits for block and inode bitmaps, and inode table */
+                if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-                ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
+                                              EXT4_FEATURE_INCOMPAT_FLEX_BG))
-                ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data);
+                        flex_bg = 1;
-                for (bit = (ext4_inode_table(sb, gdp) - start),
-                     bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
-                        ext4_set_bit(bit, bh->b_data);
+                /* Set bits for block and inode bitmaps, and inode table */
+                tmp = ext4_block_bitmap(sb, gdp);
+                if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+                        ext4_set_bit(tmp - start, bh->b_data);
+                tmp = ext4_inode_bitmap(sb, gdp);
+                if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+                        ext4_set_bit(tmp - start, bh->b_data);
+                tmp = ext4_inode_table(sb, gdp);
+                for (; tmp < ext4_inode_table(sb, gdp) +
+                                sbi->s_itb_per_group; tmp++) {
+                        if (!flex_bg ||
+                                ext4_block_in_group(sb, tmp, block_group))
+                                ext4_set_bit(tmp - start, bh->b_data);
+                }
                /*
                 * Also if the number of blocks within the group is
                 * less than the blocksize * 8 ( which is the size
@@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                 */
                mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
        }
+        return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
-        return free_blocks - sbi->s_itb_per_group - 2;
 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 873ad9b3418c..c9900aade150 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        sbi = EXT4_SB(sb);
        es = sbi->s_es;
-        ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
-                        gdp->bg_free_blocks_count);
        err = -EIO;
        bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        if (!gdp)
                goto out_err;
+        ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
+                        gdp->bg_free_blocks_count);
        err = ext4_journal_get_write_access(handle, gdp_bh);
        if (err)
                goto out_err;
@@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
 static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
                                struct ext4_prealloc_space *pa)
 {
-        unsigned len = ac->ac_o_ex.fe_len;
+        unsigned int len = ac->ac_o_ex.fe_len;
        ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
                                        &ac->ac_b_ex.fe_group,
                                        &ac->ac_b_ex.fe_start);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9f086a6a472b..9ecb92f68543 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        }
        blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
-        data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count;
+        data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
+                                         EXT4_ADDR_PER_BLOCK(sb));
        end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
        /* Get each reserved primary GDT block and verify it holds backups */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 09d9359c8055..cb96f127c366 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
        unsigned long def_mount_opts;
        struct super_block *sb = vfs->mnt_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        journal_t *journal = sbi->s_journal;
        struct ext4_super_block *es = sbi->s_es;
        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -729,8 +730,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_printf(seq, ",commit=%u",
                           (unsigned) (sbi->s_commit_interval / HZ));
        }
-        if (test_opt(sb, BARRIER))
+        /*
-                seq_puts(seq, ",barrier=1");
+         * We're changing the default of barrier mount option, so
+         * let's always display its mount state so it's clear what its
+         * status is.
+         */
+        seq_puts(seq, ",barrier=");
+        seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
+        if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
+                seq_puts(seq, ",journal_async_commit");
        if (test_opt(sb, NOBH))
                seq_puts(seq, ",nobh");
        if (!test_opt(sb, EXTENTS))
@@ -1907,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
        sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
        set_opt(sbi->s_mount_opt, RESERVATION);
+        set_opt(sbi->s_mount_opt, BARRIER);
        /*
         * turn on extents feature by default in ext4 filesystem
@@ -2189,6 +2198,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
                if (ext4_load_journal(sb, es, journal_devnum))
                        goto failed_mount3;
+                if (!(sb->s_flags & MS_RDONLY) &&
+                    EXT4_SB(sb)->s_journal->j_failed_commit) {
+                        printk(KERN_CRIT "EXT4-fs error (device %s): "
+                               "ext4_fill_super: Journal transaction "
+                               "%u is corrupt\n", sb->s_id, 
+                               EXT4_SB(sb)->s_journal->j_failed_commit);
+                        if (test_opt (sb, ERRORS_RO)) {
+                                printk (KERN_CRIT
+                                        "Mounting filesystem read-only\n");
+                                sb->s_flags |= MS_RDONLY;
+                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+                        }
+                        if (test_opt(sb, ERRORS_PANIC)) {
+                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+                                ext4_commit_super(sb, es, 1);
+                                printk(KERN_CRIT
+                                       "EXT4-fs (device %s): mount failed\n",
+                                      sb->s_id);
+                                goto failed_mount4;
+                        }
+                }
        } else if (journal_inum) {
                if (ext4_create_journal(sb, es, journal_inum))
                        goto failed_mount3;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 27cc1164ec36..771326b8047e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -257,26 +257,34 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 }
 EXPORT_SYMBOL_GPL(fat_getattr);
-static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode,
+static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
-                          mode_t mode)
+                             struct inode *inode, umode_t *mode_ptr)
 {
-        mode_t mask, req = mode & ~S_IFMT;
+        mode_t mask, perm;
-        if (S_ISREG(mode))
+        /*
+         * Note, the basic check is already done by a caller of
+         * (attr->ia_mode & ~MSDOS_VALID_MODE)
+         */
+        if (S_ISREG(inode->i_mode))
                mask = sbi->options.fs_fmask;
        else
                mask = sbi->options.fs_dmask;
+        perm = *mode_ptr & ~(S_IFMT | mask);
        /*
         * Of the r and x bits, all (subject to umask) must be present. Of the
         * w bits, either all (subject to umask) or none must be present.
         */
-        req &= ~mask;
+        if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
-        if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
                return -EPERM;
-        if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask)))
+        if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
                return -EPERM;
+        *mode_ptr &= S_IFMT | perm;
        return 0;
 }
@@ -299,7 +307,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
        struct inode *inode = dentry->d_inode;
-        int mask, error = 0;
+        int error = 0;
        unsigned int ia_valid;
        lock_kernel();
@@ -332,12 +340,13 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
                        error = 0;
                goto out;
        }
        if (((attr->ia_valid & ATTR_UID) &&
             (attr->ia_uid != sbi->options.fs_uid)) ||
            ((attr->ia_valid & ATTR_GID) &&
             (attr->ia_gid != sbi->options.fs_gid)) ||
            ((attr->ia_valid & ATTR_MODE) &&
-             fat_check_mode(sbi, inode, attr->ia_mode) < 0))
+             (attr->ia_mode & ~MSDOS_VALID_MODE)))
                error = -EPERM;
        if (error) {
@@ -346,15 +355,16 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
                goto out;
        }
-        error = inode_setattr(inode, attr);
+        /*
-        if (error)
+         * We don't return -EPERM here. Yes, strange, but this is too
-                goto out;
+         * old behavior.
+         */
+        if (attr->ia_valid & ATTR_MODE) {
+                if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0)
+                        attr->ia_valid &= ~ATTR_MODE;
+        }
-        if (S_ISDIR(inode->i_mode))
+        error = inode_setattr(inode, attr);
-                mask = sbi->options.fs_dmask;
-        else
-                mask = sbi->options.fs_fmask;
-        inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask);
 out:
        unlock_kernel();
        return error;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fb77e0962132..43e99513334a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -488,7 +488,12 @@ static struct fuse_conn *new_conn(struct super_block *sb)
                err = bdi_init(&fc->bdi);
                if (err)
                        goto error_kfree;
-                err = bdi_register_dev(&fc->bdi, fc->dev);
+                if (sb->s_bdev) {
+                        err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+                                           MAJOR(fc->dev), MINOR(fc->dev));
+                } else {
+                        err = bdi_register_dev(&fc->bdi, fc->dev);
+                }
                if (err)
                        goto error_bdi_destroy;
                /*
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 4d99685fdce4..a2ed72f7ceee 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -168,6 +168,7 @@ static int journal_submit_commit_record(journal_t *journal,
                spin_unlock(&journal->j_state_lock);
                /* And try again, without the barrier */
+                lock_buffer(bh);
                set_buffer_uptodate(bh);
                set_buffer_dirty(bh);
                ret = submit_bh(WRITE, bh);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 5d0405a9e7ca..058f50f65b76 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
                        *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
                                     obh->b_size);
                }
+                put_bh(obh);
        }
        return 0;
 }
@@ -610,9 +611,8 @@ static int do_one_pass(journal_t *journal,
                                chksum_err = chksum_seen = 0;
                                if (info->end_transaction) {
-                                        printk(KERN_ERR "JBD: Transaction %u "
+                                        journal->j_failed_commit =
-                                                "found to be corrupt.\n",
+                                                info->end_transaction;
-                                                next_commit_ID - 1);
                                        brelse(bh);
                                        break;
                                }
@@ -643,10 +643,8 @@ static int do_one_pass(journal_t *journal,
                                        if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
                                           JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
-                                                printk(KERN_ERR
+                                                journal->j_failed_commit =
-                                                       "JBD: Transaction %u "
+                                                        next_commit_ID;
-                                                       "found to be corrupt.\n",
-                                                       next_commit_ID);
                                                brelse(bh);
                                                break;
                                        }
diff --git a/fs/libfs.c b/fs/libfs.c
index b004dfadd891..892d41cb3382 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -528,6 +528,23 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
        return count;
 }
+ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
+                                const void *from, size_t available)
+{
+        loff_t pos = *ppos;
+        if (pos < 0)
+                return -EINVAL;
+        if (pos >= available)
+                return 0;
+        if (count > available - pos)
+                count = available - pos;
+        memcpy(to, from + pos, count);
+        *ppos = pos + count;
+        return count;
+}
 /*
 * Transaction based IO.
 * The file expects a single write which triggers the transaction, and then
@@ -800,6 +817,7 @@ EXPORT_SYMBOL(simple_statfs);
 EXPORT_SYMBOL(simple_sync_file);
 EXPORT_SYMBOL(simple_unlink);
 EXPORT_SYMBOL(simple_read_from_buffer);
+EXPORT_SYMBOL(memory_read_from_buffer);
 EXPORT_SYMBOL(simple_transaction_get);
 EXPORT_SYMBOL(simple_transaction_read);
 EXPORT_SYMBOL(simple_transaction_release);
diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c
index 9101807dc81a..e2f72ca98037 100644
--- a/fs/ntfs/upcase.c
+++ b/fs/ntfs/upcase.c
@@ -77,11 +77,10 @@ ntfschar *generate_default_upcase(void)
                uc[i] = cpu_to_le16(i);
        for (r = 0; uc_run_table[r][0]; r++)
                for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
-                        uc[i] = cpu_to_le16(le16_to_cpu(uc[i]) +
+                        le16_add_cpu(&uc[i], uc_run_table[r][2]);
-                                        uc_run_table[r][2]);
        for (r = 0; uc_dup_table[r][0]; r++)
                for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
-                        uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1);
+                        le16_add_cpu(&uc[i + 1], -1);
        for (r = 0; uc_word_table[r][0]; r++)
                uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]);
        return uc;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1e44ad14881a..a27d61581bd6 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -142,53 +142,43 @@ static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
 static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
-static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
-                           u32 msgkey, struct task_struct *task, u8 node)
-{
 #ifdef CONFIG_DEBUG_FS
+void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+                    u32 msgkey, struct task_struct *task, u8 node)
+{
        INIT_LIST_HEAD(&nst->st_net_debug_item);
        nst->st_task = task;
        nst->st_msg_type = msgtype;
        nst->st_msg_key = msgkey;
        nst->st_node = node;
-#endif
 }
-static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
 {
-#ifdef CONFIG_DEBUG_FS
        do_gettimeofday(&nst->st_sock_time);
-#endif
 }
-static void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
 {
-#ifdef CONFIG_DEBUG_FS
        do_gettimeofday(&nst->st_send_time);
-#endif
 }
-static void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
 {
-#ifdef CONFIG_DEBUG_FS
        do_gettimeofday(&nst->st_status_time);
-#endif
 }
-static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
                                         struct o2net_sock_container *sc)
 {
-#ifdef CONFIG_DEBUG_FS
        nst->st_sc = sc;
-#endif
 }
-static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
+void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
 {
-#ifdef CONFIG_DEBUG_FS
        nst->st_id = msg_id;
-#endif
 }
+#endif /* CONFIG_DEBUG_FS */
 static inline int o2net_reconnect_delay(void)
 {
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index a705d5d19036..fd6179eb26d4 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -128,23 +128,23 @@ void o2net_debug_del_nst(struct o2net_send_tracking *nst);
 void o2net_debug_add_sc(struct o2net_sock_container *sc);
 void o2net_debug_del_sc(struct o2net_sock_container *sc);
 #else
-static int o2net_debugfs_init(void)
+static inline int o2net_debugfs_init(void)
 {
        return 0;
 }
-static void o2net_debugfs_exit(void)
+static inline void o2net_debugfs_exit(void)
 {
 }
-static void o2net_debug_add_nst(struct o2net_send_tracking *nst)
+static inline void o2net_debug_add_nst(struct o2net_send_tracking *nst)
 {
 }
-static void o2net_debug_del_nst(struct o2net_send_tracking *nst)
+static inline void o2net_debug_del_nst(struct o2net_send_tracking *nst)
 {
 }
-static void o2net_debug_add_sc(struct o2net_sock_container *sc)
+static inline void o2net_debug_add_sc(struct o2net_sock_container *sc)
 {
 }
-static void o2net_debug_del_sc(struct o2net_sock_container *sc)
+static inline void o2net_debug_del_sc(struct o2net_sock_container *sc)
 {
 }
 #endif  /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 8d58cfe410b1..18307ff81b77 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -224,10 +224,42 @@ struct o2net_send_tracking {
        struct timeval                  st_send_time;
        struct timeval                  st_status_time;
 };
+void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+                    u32 msgkey, struct task_struct *task, u8 node);
+void o2net_set_nst_sock_time(struct o2net_send_tracking *nst);
+void o2net_set_nst_send_time(struct o2net_send_tracking *nst);
+void o2net_set_nst_status_time(struct o2net_send_tracking *nst);
+void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+                                  struct o2net_sock_container *sc);
+void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id);
 #else
 struct o2net_send_tracking {
        u32     dummy;
 };
+static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
+                                  u32 msgkey, struct task_struct *task, u8 node)
+{
+}
+static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
+{
+}
+static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
+{
+}
+static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
+{
+}
+static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
+                                                struct o2net_sock_container *sc)
+{
+}
+static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
+                                        u32 msg_id)
+{
+}
 #endif  /* CONFIG_DEBUG_FS */
 #endif /* O2CLUSTER_TCP_INTERNAL_H */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index d34a62a3a625..8c686d22f9c7 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -60,25 +60,25 @@ void dlm_destroy_debugfs_root(void);
 #else
-static int dlm_debug_init(struct dlm_ctxt *dlm)
+static inline int dlm_debug_init(struct dlm_ctxt *dlm)
 {
        return 0;
 }
-static void dlm_debug_shutdown(struct dlm_ctxt *dlm)
+static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
 {
 }
-static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
+static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
 {
        return 0;
 }
-static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
+static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
 {
 }
-static int dlm_create_debugfs_root(void)
+static inline int dlm_create_debugfs_root(void)
 {
        return 0;
 }
-static void dlm_destroy_debugfs_root(void)
+static inline void dlm_destroy_debugfs_root(void)
 {
 }
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index b503772cd0ec..6b97d11f6bf8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -61,7 +61,7 @@
 * negotiated by the client.  The client negotiates based on the maximum
 * version advertised in /sys/fs/ocfs2/max_locking_protocol.  The major
 * number from the "SETV" message must match
- * user_stack.sp_proto->lp_max_version.pv_major, and the minor number
+ * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number
 * must be less than or equal to ...->lp_max_version.pv_minor.
 *
 * Once this information has been set, mounts will be allowed.  From this
@@ -153,7 +153,7 @@ union ocfs2_control_message {
        struct ocfs2_control_message_down       u_down;
 };
-static struct ocfs2_stack_plugin user_stack;
+static struct ocfs2_stack_plugin ocfs2_user_plugin;
 static atomic_t ocfs2_control_opened;
 static int ocfs2_control_this_node = -1;
@@ -399,7 +399,7 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
        char *ptr = NULL;
        struct ocfs2_control_private *p = file->private_data;
        struct ocfs2_protocol_version *max =
-                &user_stack.sp_proto->lp_max_version;
+                &ocfs2_user_plugin.sp_proto->lp_max_version;
        if (ocfs2_control_get_handshake_state(file) !=
            OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
@@ -680,7 +680,7 @@ static void fsdlm_lock_ast_wrapper(void *astarg)
        struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg);
        int status = lksb->sb_status;
-        BUG_ON(user_stack.sp_proto == NULL);
+        BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
        /*
         * For now we're punting on the issue of other non-standard errors
@@ -693,16 +693,16 @@ static void fsdlm_lock_ast_wrapper(void *astarg)
         */
        if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
-                user_stack.sp_proto->lp_unlock_ast(astarg, 0);
+                ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0);
        else
-                user_stack.sp_proto->lp_lock_ast(astarg);
+                ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg);
 }
 static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
 {
-        BUG_ON(user_stack.sp_proto == NULL);
+        BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
-        user_stack.sp_proto->lp_blocking_ast(astarg, level);
+        ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level);
 }
 static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
@@ -838,7 +838,7 @@ static int user_cluster_this_node(unsigned int *this_node)
        return 0;
 }
-static struct ocfs2_stack_operations user_stack_ops = {
+static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
        .connect        = user_cluster_connect,
        .disconnect     = user_cluster_disconnect,
        .this_node      = user_cluster_this_node,
@@ -849,20 +849,20 @@ static struct ocfs2_stack_operations user_stack_ops = {
        .dump_lksb      = user_dlm_dump_lksb,
 };
-static struct ocfs2_stack_plugin user_stack = {
+static struct ocfs2_stack_plugin ocfs2_user_plugin = {
        .sp_name        = "user",
-        .sp_ops         = &user_stack_ops,
+        .sp_ops         = &ocfs2_user_plugin_ops,
        .sp_owner       = THIS_MODULE,
 };
-static int __init user_stack_init(void)
+static int __init ocfs2_user_plugin_init(void)
 {
        int rc;
        rc = ocfs2_control_init();
        if (!rc) {
-                rc = ocfs2_stack_glue_register(&user_stack);
+                rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
                if (rc)
                        ocfs2_control_exit();
        }
@@ -870,14 +870,14 @@ static int __init user_stack_init(void)
        return rc;
 }
-static void __exit user_stack_exit(void)
+static void __exit ocfs2_user_plugin_exit(void)
 {
-        ocfs2_stack_glue_unregister(&user_stack);
+        ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
        ocfs2_control_exit();
 }
 MODULE_AUTHOR("Oracle");
 MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
 MODULE_LICENSE("GPL");
-module_init(user_stack_init);
+module_init(ocfs2_user_plugin_init);
-module_exit(user_stack_exit);
+module_exit(ocfs2_user_plugin_exit);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9e3b8c33c24b..797d775e0354 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -288,7 +288,7 @@ static void render_cap_t(struct seq_file *m, const char *header,
        seq_printf(m, "%s", header);
        CAP_FOR_EACH_U32(__capi) {
                seq_printf(m, "%08x",
-                           a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
+                           a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
        }
        seq_printf(m, "\n");
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c447e0743a3c..3b455371e7ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -127,6 +127,25 @@ struct pid_entry {
                NULL, &proc_single_file_operations,     \
                { .proc_show = &proc_##OTYPE } )
+/*
+ * Count the number of hardlinks for the pid_entry table, excluding the .
+ * and .. links.
+ */
+static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
+        unsigned int n)
+{
+        unsigned int i;
+        unsigned int count;
+        count = 0;
+        for (i = 0; i < n; ++i) {
+                if (S_ISDIR(entries[i].mode))
+                        ++count;
+        }
+        return count;
+}
 int maps_protect;
 EXPORT_SYMBOL(maps_protect);
@@ -2585,10 +2604,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
        inode->i_op = &proc_tgid_base_inode_operations;
        inode->i_fop = &proc_tgid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
-        inode->i_nlink = 5;
-#ifdef CONFIG_SECURITY
+        inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
-        inode->i_nlink += 1;
+                ARRAY_SIZE(tgid_base_stuff));
-#endif
        dentry->d_op = &pid_dentry_operations;
@@ -2816,10 +2834,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
        inode->i_op = &proc_tid_base_inode_operations;
        inode->i_fop = &proc_tid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
-        inode->i_nlink = 4;
-#ifdef CONFIG_SECURITY
+        inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
-        inode->i_nlink += 1;
+                ARRAY_SIZE(tid_base_stuff));
-#endif
        dentry->d_op = &pid_dentry_operations;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6f4e8dc97da1..b08d10017911 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -425,7 +425,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
                        }
                }
                unlock_new_inode(inode);
-        }
+        } else
+               module_put(de->owner);
        return inode;
 out_ino:
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 903e617bec58..53cb3f89f2c2 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -139,7 +139,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
 #define K(x) ((x) << (PAGE_SHIFT - 10))
        si_meminfo(&i);
        si_swapinfo(&i);
-        committed = atomic_read(&vm_committed_space);
+        committed = atomic_long_read(&vm_committed_space);
        allowed = ((totalram_pages - hugetlb_total_pages())
                * sysctl_overcommit_ratio / 100) + total_swap_pages;
@@ -725,7 +725,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
        pfn = src / KPMSIZE;
        count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
        if (src & KPMMASK || count & KPMMASK)
-                return -EIO;
+                return -EINVAL;
        while (count > 0) {
                ppage = NULL;
@@ -735,7 +735,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
                if (!ppage)
                        pcount = 0;
                else
-                        pcount = atomic_read(&ppage->_count);
+                        pcount = page_mapcount(ppage);
                if (put_user(pcount, out++)) {
                        ret = -EFAULT;
@@ -791,7 +791,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
        pfn = src / KPMSIZE;
        count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
        if (src & KPMMASK || count & KPMMASK)
-                return -EIO;
+                return -EINVAL;
        while (count > 0) {
                ppage = NULL;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 88717c0f941b..ab8ccc9d14ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -315,9 +315,9 @@ struct mem_size_stats {
 };
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-                           void *private)
+                           struct mm_walk *walk)
 {
-        struct mem_size_stats *mss = private;
+        struct mem_size_stats *mss = walk->private;
        struct vm_area_struct *vma = mss->vma;
        pte_t *pte, ptent;
        spinlock_t *ptl;
@@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        return 0;
 }
-static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
 static int show_smap(struct seq_file *m, void *v)
 {
        struct vm_area_struct *vma = v;
        struct mem_size_stats mss;
        int ret;
+        struct mm_walk smaps_walk = {
+                .pmd_entry = smaps_pte_range,
+                .mm = vma->vm_mm,
+                .private = &mss,
+        };
        memset(&mss, 0, sizeof mss);
        mss.vma = vma;
        if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-                walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
+                walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
-                                &smaps_walk, &mss);
        ret = show_map(m, v);
        if (ret)
@@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = {
 };
 static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
-                                unsigned long end, void *private)
+                                unsigned long end, struct mm_walk *walk)
 {
-        struct vm_area_struct *vma = private;
+        struct vm_area_struct *vma = walk->private;
        pte_t *pte, ptent;
        spinlock_t *ptl;
        struct page *page;
@@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        return 0;
 }
-static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *ppos)
 {
@@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
                return -ESRCH;
        mm = get_task_mm(task);
        if (mm) {
+                static struct mm_walk clear_refs_walk;
+                memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
+                clear_refs_walk.pmd_entry = clear_refs_pte_range;
+                clear_refs_walk.mm = mm;
                down_read(&mm->mmap_sem);
-                for (vma = mm->mmap; vma; vma = vma->vm_next)
+                for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                        clear_refs_walk.private = vma;
                        if (!is_vm_hugetlb_page(vma))
-                                walk_page_range(mm, vma->vm_start, vma->vm_end,
+                                walk_page_range(vma->vm_start, vma->vm_end,
-                                                &clear_refs_walk, vma);
+                                                &clear_refs_walk);
+                }
                flush_tlb_mm(mm);
                up_read(&mm->mmap_sem);
                mmput(mm);
@@ -496,7 +502,7 @@ const struct file_operations proc_clear_refs_operations = {
 };
 struct pagemapread {
-        char __user *out, *end;
+        u64 __user *out, *end;
 };
 #define PM_ENTRY_BYTES      sizeof(u64)
@@ -519,28 +525,18 @@ struct pagemapread {
 static int add_to_pagemap(unsigned long addr, u64 pfn,
                          struct pagemapread *pm)
 {
-        /*
-         * Make sure there's room in the buffer for an
-         * entire entry.  Otherwise, only copy part of
-         * the pfn.
-         */
-        if (pm->out + PM_ENTRY_BYTES >= pm->end) {
-                if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
-                        return -EFAULT;
-                pm->out = pm->end;
-                return PM_END_OF_BUFFER;
-        }
        if (put_user(pfn, pm->out))
                return -EFAULT;
-        pm->out += PM_ENTRY_BYTES;
+        pm->out++;
+        if (pm->out >= pm->end)
+                return PM_END_OF_BUFFER;
        return 0;
 }
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
-                                void *private)
+                                struct mm_walk *walk)
 {
-        struct pagemapread *pm = private;
+        struct pagemapread *pm = walk->private;
        unsigned long addr;
        int err = 0;
        for (addr = start; addr < end; addr += PAGE_SIZE) {
@@ -557,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
        return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
 }
+static unsigned long pte_to_pagemap_entry(pte_t pte)
+{
+        unsigned long pme = 0;
+        if (is_swap_pte(pte))
+                pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
+                        | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
+        else if (pte_present(pte))
+                pme = PM_PFRAME(pte_pfn(pte))
+                        | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+        return pme;
+}
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-                             void *private)
+                             struct mm_walk *walk)
 {
-        struct pagemapread *pm = private;
+        struct vm_area_struct *vma;
+        struct pagemapread *pm = walk->private;
        pte_t *pte;
        int err = 0;
+        /* find the first VMA at or above 'addr' */
+        vma = find_vma(walk->mm, addr);
        for (; addr != end; addr += PAGE_SIZE) {
                u64 pfn = PM_NOT_PRESENT;
-                pte = pte_offset_map(pmd, addr);
-                if (is_swap_pte(*pte))
+                /* check to see if we've left 'vma' behind
-                        pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte))
+                 * and need a new, higher one */
-                                | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
+                if (vma && (addr >= vma->vm_end))
-                else if (pte_present(*pte))
+                        vma = find_vma(walk->mm, addr);
-                        pfn = PM_PFRAME(pte_pfn(*pte))
-                                | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
+                /* check that 'vma' actually covers this address,
-                /* unmap so we're not in atomic when we copy to userspace */
+                 * and that it isn't a huge page vma */
-                pte_unmap(pte);
+                if (vma && (vma->vm_start <= addr) &&
+                    !is_vm_hugetlb_page(vma)) {
+                        pte = pte_offset_map(pmd, addr);
+                        pfn = pte_to_pagemap_entry(*pte);
+                        /* unmap before userspace copy */
+                        pte_unmap(pte);
+                }
                err = add_to_pagemap(addr, pfn, pm);
                if (err)
                        return err;
@@ -634,7 +651,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        ret = -EINVAL;
        /* file position must be aligned */
-        if (*ppos % PM_ENTRY_BYTES)
+        if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
                goto out_task;
        ret = 0;
@@ -664,8 +681,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out_pages;
        }
-        pm.out = buf;
+        pm.out = (u64 *)buf;
-        pm.end = buf + count;
+        pm.end = (u64 *)(buf + count);
        if (!ptrace_may_attach(task)) {
                ret = -EIO;
@@ -685,14 +702,14 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                 * user buffer is tracked in "pm", and the walk
                 * will stop when we hit the end of the buffer.
                 */
-                ret = walk_page_range(mm, start_vaddr, end_vaddr,
+                ret = walk_page_range(start_vaddr, end_vaddr,
-                                        &pagemap_walk, &pm);
+                                        &pagemap_walk);
                if (ret == PM_END_OF_BUFFER)
                        ret = 0;
                /* don't need mmap_sem for these, but this looks cleaner */
-                *ppos += pm.out - buf;
+                *ppos += (char *)pm.out - buf;
                if (!ret)
-                        ret = pm.out - buf;
+                        ret = (char *)pm.out - buf;
        }
 out_pages:
diff --git a/fs/splice.c b/fs/splice.c
index 78150038b584..aa5f6f60b305 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
                 */
                wait_on_page_writeback(page);
-                if (PagePrivate(page))
+                if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
-                        try_to_release_page(page, GFP_KERNEL);
+                        goto out_unlock;
                /*
                 * If we succeeded in removing the mapping, set LRU flag
@@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
         * Raced with truncate or failed to remove page from current
         * address space, unlock and return failure.
         */
+out_unlock:
        unlock_page(page);
        return 1;
 }
@@ -983,7 +984,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
        while (len) {
                size_t read_len;
-                loff_t pos = sd->pos;
+                loff_t pos = sd->pos, prev_pos = pos;
                ret = do_splice_to(in, &pos, pipe, len, flags);
                if (unlikely(ret <= 0))
@@ -998,15 +999,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
                 * could get stuck data in the internal pipe:
                 */
                ret = actor(pipe, sd);
-                if (unlikely(ret <= 0))
+                if (unlikely(ret <= 0)) {
+                        sd->pos = prev_pos;
                        goto out_release;
+                }
                bytes += ret;
                len -= ret;
                sd->pos = pos;
-                if (ret < read_len)
+                if (ret < read_len) {
+                        sd->pos = prev_pos + ret;
                        goto out_release;
+                }
        }
 done:
@@ -1072,7 +1077,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
        ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
        if (ret > 0)
-                *ppos += ret;
+                *ppos = sd.pos;
        return ret;
 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5105015a75ad..98e0e86093b4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -387,6 +387,8 @@ _xfs_buf_lookup_pages(
                if (unlikely(page == NULL)) {
                        if (flags & XBF_READ_AHEAD) {
                                bp->b_page_count = i;
+                                for (i = 0; i < bp->b_page_count; i++)
+                                        unlock_page(bp->b_pages[i]);
                                return -ENOMEM;
                        }
@@ -416,17 +418,24 @@ _xfs_buf_lookup_pages(
                ASSERT(!PagePrivate(page));
                if (!PageUptodate(page)) {
                        page_count--;
-                        if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) {
+                        if (blocksize >= PAGE_CACHE_SIZE) {
+                                if (flags & XBF_READ)
+                                        bp->b_flags |= _XBF_PAGE_LOCKED;
+                        } else if (!PagePrivate(page)) {
                                if (test_page_region(page, offset, nbytes))
                                        page_count++;
                        }
                }
-                unlock_page(page);
                bp->b_pages[i] = page;
                offset = 0;
        }
+        if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
+                for (i = 0; i < bp->b_page_count; i++)
+                        unlock_page(bp->b_pages[i]);
+        }
        if (page_count == bp->b_page_count)
                bp->b_flags |= XBF_DONE;
@@ -746,6 +755,7 @@ xfs_buf_associate_memory(
        bp->b_count_desired = len;
        bp->b_buffer_length = buflen;
        bp->b_flags |= XBF_MAPPED;
+        bp->b_flags &= ~_XBF_PAGE_LOCKED;
        return 0;
 }
@@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
        xfs_buf_t               *bp,
        int                     schedule)
 {
-        if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+        if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
+                bp->b_flags &= ~_XBF_PAGE_LOCKED;
                xfs_buf_ioend(bp, schedule);
+        }
 }
 STATIC void
@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
+                if (bp->b_flags & _XBF_PAGE_LOCKED)
+                        unlock_page(page);
        } while (bvec >= bio->bi_io_vec);
        _xfs_buf_ioend(bp, 1);
@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
         * filesystem block size is not smaller than the page size.
         */
        if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
-            (bp->b_flags & XBF_READ) &&
+            ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
+              (XBF_READ|_XBF_PAGE_LOCKED)) &&
            (blocksize >= PAGE_CACHE_SIZE)) {
                bio = bio_alloc(GFP_NOIO, 1);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 841d7883528d..f948ec7ba9a4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -66,6 +66,25 @@ typedef enum {
        _XBF_PAGES = (1 << 18),     /* backed by refcounted pages          */
        _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
        _XBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
+        /*
+         * Special flag for supporting metadata blocks smaller than a FSB.
+         *
+         * In this case we can have multiple xfs_buf_t on a single page and
+         * need to lock out concurrent xfs_buf_t readers as they only
+         * serialise access to the buffer.
+         *
+         * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
+         * between reads of the page. Hence we can have one thread read the
+         * page and modify it, but then race with another thread that thinks
+         * the page is not up-to-date and hence reads it again.
+         *
+         * The result is that the first modifcation to the page is lost.
+         * This sort of AGF/AGI reading race can happen when unlinking inodes
+         * that require truncation and results in the AGI unlinked list
+         * modifications being lost.
+         */
+        _XBF_PAGE_LOCKED = (1 << 22),
 } xfs_buf_flags_t;
 typedef enum {
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 65e78c13d4ae..5f60363b9343 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -184,19 +184,24 @@ xfs_file_release(
        return -xfs_release(XFS_I(inode));
 }
+/*
+ * We ignore the datasync flag here because a datasync is effectively
+ * identical to an fsync. That is, datasync implies that we need to write
+ * only the metadata needed to be able to access the data that is written
+ * if we crash after the call completes. Hence if we are writing beyond
+ * EOF we have to log the inode size change as well, which makes it a
+ * full fsync. If we don't write beyond EOF, the inode core will be
+ * clean in memory and so we don't need to log the inode, just like
+ * fsync.
+ */
 STATIC int
 xfs_file_fsync(
        struct file     *filp,
        struct dentry   *dentry,
        int             datasync)
 {
-        int             flags = FSYNC_WAIT;
-        if (datasync)
-                flags |= FSYNC_DATA;
        xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
-        return -xfs_fsync(XFS_I(dentry->d_inode), flags,
+        return -xfs_fsync(XFS_I(dentry->d_inode));
-                        (xfs_off_t)0, (xfs_off_t)-1);
 }
 /*
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 9d73cb5c0fc7..25eb2a9e8d9b 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -230,14 +230,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
 #define ATTR_NOSIZETOK  0x400   /* Don't get the SIZE token */
 /*
- * Flags to vop_fsync/reclaim.
- */
-#define FSYNC_NOWAIT    0       /* asynchronous flush */
-#define FSYNC_WAIT      0x1     /* synchronous fsync or forced reclaim */
-#define FSYNC_INVAL     0x2     /* flush and invalidate cached data */
-#define FSYNC_DATA      0x4     /* synchronous fsync of data only */
-/*
 * Tracking vnode activity.
 */
 #if defined(XFS_INODE_TRACE)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index cf0bb9c1d621..e569bf5d6cf0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2974,6 +2974,7 @@ xfs_iflush_cluster(
        xfs_mount_t             *mp = ip->i_mount;
        xfs_perag_t             *pag = xfs_get_perag(mp, ip->i_ino);
        unsigned long           first_index, mask;
+        unsigned long           inodes_per_cluster;
        int                     ilist_size;
        xfs_inode_t             **ilist;
        xfs_inode_t             *iq;
@@ -2985,8 +2986,9 @@ xfs_iflush_cluster(
        ASSERT(pag->pagi_inodeok);
        ASSERT(pag->pag_ici_init);
-        ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+        inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
-        ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
+        ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
+        ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
        if (!ilist)
                return 0;
@@ -2995,8 +2997,7 @@ xfs_iflush_cluster(
        read_lock(&pag->pag_ici_lock);
        /* really need a gang lookup range call here */
        nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
-                                        first_index,
+                                        first_index, inodes_per_cluster);
-                                        XFS_INODE_CLUSTER_SIZE(mp));
        if (nr_found == 0)
                goto out_free;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 70702a60b4bb..e475e3717eb3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -856,18 +856,14 @@ xfs_readlink(
 /*
 * xfs_fsync
 *
- * This is called to sync the inode and its data out to disk.
+ * This is called to sync the inode and its data out to disk.  We need to hold
- * We need to hold the I/O lock while flushing the data, and
+ * the I/O lock while flushing the data, and the inode lock while flushing the
- * the inode lock while flushing the inode.  The inode lock CANNOT
+ * inode.  The inode lock CANNOT be held while flushing the data, so acquire
- * be held while flushing the data, so acquire after we're done
+ * after we're done with that.
- * with that.
 */
 int
 xfs_fsync(
-        xfs_inode_t     *ip,
+        xfs_inode_t     *ip)
-        int             flag,
-        xfs_off_t       start,
-        xfs_off_t       stop)
 {
        xfs_trans_t     *tp;
        int             error;
@@ -875,103 +871,79 @@ xfs_fsync(
        xfs_itrace_entry(ip);
-        ASSERT(start >= 0 && stop >= -1);
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return XFS_ERROR(EIO);
-        if (flag & FSYNC_DATA)
+        /* capture size updates in I/O completion before writing the inode. */
-                filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
+        error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
+        if (error)
+                return XFS_ERROR(error);
        /*
-         * We always need to make sure that the required inode state
+         * We always need to make sure that the required inode state is safe on
-         * is safe on disk.  The vnode might be clean but because
+         * disk.  The vnode might be clean but we still might need to force the
-         * of committed transactions that haven't hit the disk yet.
+         * log because of committed transactions that haven't hit the disk yet.
-         * Likewise, there could be unflushed non-transactional
+         * Likewise, there could be unflushed non-transactional changes to the
-         * changes to the inode core that have to go to disk.
+         * inode core that have to go to disk and this requires us to issue
+         * a synchronous transaction to capture these changes correctly.
         *
-         * The following code depends on one assumption:  that
+         * This code relies on the assumption that if the update_* fields
-         * any transaction that changes an inode logs the core
+         * of the inode are clear and the inode is unpinned then it is clean
-         * because it has to change some field in the inode core
+         * and no action is required.
-         * (typically nextents or nblocks).  That assumption
-         * implies that any transactions against an inode will
-         * catch any non-transactional updates.  If inode-altering
-         * transactions exist that violate this assumption, the
-         * code breaks.  Right now, it figures that if the involved
-         * update_* field is clear and the inode is unpinned, the
-         * inode is clean.  Either it's been flushed or it's been
-         * committed and the commit has hit the disk unpinning the inode.
-         * (Note that xfs_inode_item_format() called at commit clears
-         * the update_* fields.)
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
-        /* If we are flushing data then we care about update_size
+        if (!(ip->i_update_size || ip->i_update_core)) {
-         * being set, otherwise we care about update_core
-         */
-        if ((flag & FSYNC_DATA) ?
-                        (ip->i_update_size == 0) :
-                        (ip->i_update_core == 0)) {
                /*
-                 * Timestamps/size haven't changed since last inode
+                 * Timestamps/size haven't changed since last inode flush or
-                 * flush or inode transaction commit.  That means
+                 * inode transaction commit.  That means either nothing got
-                 * either nothing got written or a transaction
+                 * written or a transaction committed which caught the updates.
-                 * committed which caught the updates.  If the
+                 * If the latter happened and the transaction hasn't hit the
-                 * latter happened and the transaction hasn't
+                 * disk yet, the inode will be still be pinned.  If it is,
-                 * hit the disk yet, the inode will be still
+                 * force the log.
-                 * be pinned.  If it is, force the log.
                 */
                xfs_iunlock(ip, XFS_ILOCK_SHARED);
                if (xfs_ipincount(ip)) {
-                        _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
+                        error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
-                                      XFS_LOG_FORCE |
+                                      XFS_LOG_FORCE | XFS_LOG_SYNC,
-                                      ((flag & FSYNC_WAIT)
-                                       ? XFS_LOG_SYNC : 0),
                                      &log_flushed);
                } else {
                        /*
-                         * If the inode is not pinned and nothing
+                         * If the inode is not pinned and nothing has changed
-                         * has changed we don't need to flush the
+                         * we don't need to flush the cache.
-                         * cache.
                         */
                        changed = 0;
                }
-                error = 0;
        } else  {
                /*
-                 * Kick off a transaction to log the inode
+                 * Kick off a transaction to log the inode core to get the
-                 * core to get the updates.  Make it
+                 * updates.  The sync transaction will also force the log.
-                 * sync if FSYNC_WAIT is passed in (which
-                 * is done by everybody but specfs).  The
-                 * sync transaction will also force the log.
                 */
                xfs_iunlock(ip, XFS_ILOCK_SHARED);
                tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
-                if ((error = xfs_trans_reserve(tp, 0,
+                error = xfs_trans_reserve(tp, 0,
-                                XFS_FSYNC_TS_LOG_RES(ip->i_mount),
+                                XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
-                                0, 0, 0)))  {
+                if (error) {
                        xfs_trans_cancel(tp, 0);
                        return error;
                }
                xfs_ilock(ip, XFS_ILOCK_EXCL);
                /*
-                 * Note - it's possible that we might have pushed
+                 * Note - it's possible that we might have pushed ourselves out
-                 * ourselves out of the way during trans_reserve
+                 * of the way during trans_reserve which would flush the inode.
-                 * which would flush the inode.  But there's no
+                 * But there's no guarantee that the inode buffer has actually
-                 * guarantee that the inode buffer has actually
+                 * gone out yet (it's delwri).  Plus the buffer could be pinned
-                 * gone out yet (it's delwri).  Plus the buffer
+                 * anyway if it's part of an inode in another recent
-                 * could be pinned anyway if it's part of an
+                 * transaction.  So we play it safe and fire off the
-                 * inode in another recent transaction.  So we
+                 * transaction anyway.
-                 * play it safe and fire off the transaction anyway.
                 */
                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
                xfs_trans_ihold(tp, ip);
                xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-                if (flag & FSYNC_WAIT)
+                xfs_trans_set_sync(tp);
-                        xfs_trans_set_sync(tp);
                error = _xfs_trans_commit(tp, 0, &log_flushed);
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 8abe8f186e20..57335ba4ce53 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,8 +18,7 @@ int xfs_open(struct xfs_inode *ip);
 int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
                struct cred *credp);
 int xfs_readlink(struct xfs_inode *ip, char *link);
-int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start,
+int xfs_fsync(struct xfs_inode *ip);
-                xfs_off_t stop);
 int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
author	Ingo Molnar <mingo@elte.hu>	2008-06-16 05:27:53 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-06-16 05:27:53 -0400
commit	c54f9da1c8ceee19436430afac0798a989eb886d (patch)
tree	412f51c3f2641e4205b767cec95ce6107cd39d36 /fs
parent	a2eddfa95919a730e0e5ed17e9c303fe5ba249cd (diff)
parent	066519068ad2fbe98c7f45552b1f592903a9c8c8 (diff)