Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6: truncate: use new helpers truncate: new helpers fs: fix overflow in sys_mount() for in-kernel calls fs: Make unload_nls() NULL pointer safe freeze_bdev: grab active reference to frozen superblocks freeze_bdev: kill bd_mount_sem exofs: remove BKL from super operations fs/romfs: correct error-handling code vfs: seq_file: add helpers for data filling vfs: remove redundant position check in do_sendfile vfs: change sb->s_maxbytes to a loff_t vfs: explicitly cast s_maxbytes in fiemap_check_ranges libfs: return error code on failed attr set seq_file: return a negative error code when seq_path_root() fails. vfs: optimize touch_time() too vfs: optimization for touch_atime() vfs: split generic_forget_inode() so that hugetlbfs does not have to copy it fs/inode.c: add dev-id and inode number for debugging in init_special_inode() libfs: make simple_read_from_buffer conventional
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-09-24 11:32:11 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-09-24 11:32:11 -0400
commit: 6c5daf012c9155aafd2c7973e4278766c30dfad0 (patch)
tree: 33959d7b36d03e1610615641a2940cb2de5e8603
parent: 6d39b27f0ac7e805ae3bd9efa51d7da04bec0360 (diff)
parent: c08d3b0e33edce28e9cfa7b64f7fe5bdeeb29248 (diff)
41 files changed, 532 insertions, 533 deletions
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f366fa956179..25fadb448760 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
 mm start up ... this is a loose form of stability on mm_users. For
 example, it is used in copy_mm to protect against a racing tlb_gather_mmu
 single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
 be spawned underneath it and go to user mode to drag in pte's into tlbs.
 swap_lock
diff --git a/fs/attr.c b/fs/attr.c
index 9fe1b1bd30a8..96d394bdaddf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
 /* Taken over from the old code... */
 /* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
 {
        int retval = -EPERM;
        unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
 error:
        return retval;
 }
 EXPORT_SYMBOL(inode_change_ok);
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode:      the inode to be truncated
+ * @offset:     the new size to assign to the inode
+ * @Returns:    0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+        if (inode->i_size < offset) {
+                unsigned long limit;
+                limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+                if (limit != RLIM_INFINITY && offset > limit)
+                        goto out_sig;
+                if (offset > inode->i_sb->s_maxbytes)
+                        goto out_big;
+        } else {
+                /*
+                 * truncation of in-use swapfiles is disallowed - it would
+                 * cause subsequent swapout to scribble on the now-freed
+                 * blocks.
+                 */
+                if (IS_SWAPFILE(inode))
+                        return -ETXTBSY;
+        }
+        return 0;
+out_sig:
+        send_sig(SIGXFSZ, current, 0);
+out_big:
+        return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
        unsigned int ia_valid = attr->ia_valid;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dd376c124e71..33baf27fac78 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb)
 {
        kfree(BEFS_SB(sb)->mount_opts.iocharset);
        BEFS_SB(sb)->mount_opts.iocharset = NULL;
+        unload_nls(BEFS_SB(sb)->nls);
-        if (BEFS_SB(sb)->nls) {
-                unload_nls(BEFS_SB(sb)->nls);
-                BEFS_SB(sb)->nls = NULL;
-        }
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5d1ed50bd46c..9cf4b926f8e4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
 * freeze_bdev  --  lock a filesystem and force it into a consistent state
 * @bdev:       blockdevice to lock
 *
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
 * If a superblock is found on this device, we take the s_umount semaphore
 * on it to make sure nobody unmounts until the snapshot creation is done.
 * The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
        int error = 0;
        mutex_lock(&bdev->bd_fsfreeze_mutex);
-        if (bdev->bd_fsfreeze_count > 0) {
+        if (++bdev->bd_fsfreeze_count > 1) {
-                bdev->bd_fsfreeze_count++;
+                /*
+                 * We don't even need to grab a reference - the first call
+                 * to freeze_bdev grab an active reference and only the last
+                 * thaw_bdev drops it.
+                 */
                sb = get_super(bdev);
+                drop_super(sb);
                mutex_unlock(&bdev->bd_fsfreeze_mutex);
                return sb;
        }
-        bdev->bd_fsfreeze_count++;
+        sb = get_active_super(bdev);
-        down(&bdev->bd_mount_sem);
+        if (!sb)
-        sb = get_super(bdev);
+                goto out;
-        if (sb && !(sb->s_flags & MS_RDONLY)) {
+        if (sb->s_flags & MS_RDONLY) {
-                sb->s_frozen = SB_FREEZE_WRITE;
+                deactivate_locked_super(sb);
-                smp_wmb();
+                mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                return sb;
-                sync_filesystem(sb);
+        }
-                sb->s_frozen = SB_FREEZE_TRANS;
+        sb->s_frozen = SB_FREEZE_WRITE;
-                smp_wmb();
+        smp_wmb();
-                sync_blockdev(sb->s_bdev);
+        sync_filesystem(sb);
-                if (sb->s_op->freeze_fs) {
+        sb->s_frozen = SB_FREEZE_TRANS;
-                        error = sb->s_op->freeze_fs(sb);
+        smp_wmb();
-                        if (error) {
-                                printk(KERN_ERR
+        sync_blockdev(sb->s_bdev);
-                                        "VFS:Filesystem freeze failed\n");
-                                sb->s_frozen = SB_UNFROZEN;
+        if (sb->s_op->freeze_fs) {
-                                drop_super(sb);
+                error = sb->s_op->freeze_fs(sb);
-                                up(&bdev->bd_mount_sem);
+                if (error) {
-                                bdev->bd_fsfreeze_count--;
+                        printk(KERN_ERR
-                                mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                                "VFS:Filesystem freeze failed\n");
-                                return ERR_PTR(error);
+                        sb->s_frozen = SB_UNFROZEN;
-                        }
+                        deactivate_locked_super(sb);
+                        bdev->bd_fsfreeze_count--;
+                        mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                        return ERR_PTR(error);
                }
        }
+        up_write(&sb->s_umount);
+ out:
        sync_blockdev(bdev);
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
+        return sb;      /* thaw_bdev releases s->s_umount */
-        return sb;      /* thaw_bdev releases s->s_umount and bd_mount_sem */
 }
 EXPORT_SYMBOL(freeze_bdev);
@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
 */
 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 {
-        int error = 0;
+        int error = -EINVAL;
        mutex_lock(&bdev->bd_fsfreeze_mutex);
-        if (!bdev->bd_fsfreeze_count) {
+        if (!bdev->bd_fsfreeze_count)
-                mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                goto out_unlock;
-                return -EINVAL;
-        }
+        error = 0;
+        if (--bdev->bd_fsfreeze_count > 0)
-        bdev->bd_fsfreeze_count--;
+                goto out_unlock;
-        if (bdev->bd_fsfreeze_count > 0) {
-                if (sb)
+        if (!sb)
-                        drop_super(sb);
+                goto out_unlock;
-                mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                return 0;
+        BUG_ON(sb->s_bdev != bdev);
-        }
+        down_write(&sb->s_umount);
+        if (sb->s_flags & MS_RDONLY)
-        if (sb) {
+                goto out_deactivate;
-                BUG_ON(sb->s_bdev != bdev);
-                if (!(sb->s_flags & MS_RDONLY)) {
+        if (sb->s_op->unfreeze_fs) {
-                        if (sb->s_op->unfreeze_fs) {
+                error = sb->s_op->unfreeze_fs(sb);
-                                error = sb->s_op->unfreeze_fs(sb);
+                if (error) {
-                                if (error) {
+                        printk(KERN_ERR
-                                        printk(KERN_ERR
+                                "VFS:Filesystem thaw failed\n");
-                                                "VFS:Filesystem thaw failed\n");
+                        sb->s_frozen = SB_FREEZE_TRANS;
-                                        sb->s_frozen = SB_FREEZE_TRANS;
+                        bdev->bd_fsfreeze_count++;
-                                        bdev->bd_fsfreeze_count++;
+                        mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                                        mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                        return error;
-                                        return error;
-                                }
-                        }
-                        sb->s_frozen = SB_UNFROZEN;
-                        smp_wmb();
-                        wake_up(&sb->s_wait_unfrozen);
                }
-                drop_super(sb);
        }
-        up(&bdev->bd_mount_sem);
+        sb->s_frozen = SB_UNFROZEN;
+        smp_wmb();
+        wake_up(&sb->s_wait_unfrozen);
+out_deactivate:
+        if (sb)
+                deactivate_locked_super(sb);
+out_unlock:
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
        return 0;
 }
@@ -430,7 +437,6 @@ static void init_once(void *foo)
        memset(bdev, 0, sizeof(*bdev));
        mutex_init(&bdev->bd_mutex);
-        sema_init(&bdev->bd_mount_sem, 1);
        INIT_LIST_HEAD(&bdev->bd_inodes);
        INIT_LIST_HEAD(&bdev->bd_list);
 #ifdef CONFIG_SYSFS
diff --git a/fs/buffer.c b/fs/buffer.c
index 209f7f15f5f8..24afd7422ae8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
        void *fsdata;
-        unsigned long limit;
        int err;
-        err = -EFBIG;
+        err = inode_newsize_ok(inode, size);
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+        if (err)
-        if (limit != RLIM_INFINITY && size > (loff_t)limit) {
-                send_sig(SIGXFSZ, current, 0);
-                goto out;
-        }
-        if (size > inode->i_sb->s_maxbytes)
                goto out;
        err = pagecache_write_begin(NULL, mapping, size, 0,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d79ce2e95c23..90c5b39f0313 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -185,8 +185,7 @@ out_mount_failed:
                        cifs_sb->mountdata = NULL;
                }
 #endif
-                if (cifs_sb->local_nls)
+                unload_nls(cifs_sb->local_nls);
-                        unload_nls(cifs_sb->local_nls);
                kfree(cifs_sb);
        }
        return rc;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1f09c7619319..5e2492535daa 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 static int cifs_vmtruncate(struct inode *inode, loff_t offset)
 {
-        struct address_space *mapping = inode->i_mapping;
+        loff_t oldsize;
-        unsigned long limit;
+        int err;
        spin_lock(&inode->i_lock);
-        if (inode->i_size < offset)
+        err = inode_newsize_ok(inode, offset);
-                goto do_expand;
+        if (err) {
-        /*
-         * truncation of in-use swapfiles is disallowed - it would cause
-         * subsequent swapout to scribble on the now-freed blocks.
-         */
-        if (IS_SWAPFILE(inode)) {
-                spin_unlock(&inode->i_lock);
-                goto out_busy;
-        }
-        i_size_write(inode, offset);
-        spin_unlock(&inode->i_lock);
-        /*
-         * unmap_mapping_range is called twice, first simply for efficiency
-         * so that truncate_inode_pages does fewer single-page unmaps. However
-         * after this first call, and before truncate_inode_pages finishes,
-         * it is possible for private pages to be COWed, which remain after
-         * truncate_inode_pages finishes, hence the second unmap_mapping_range
-         * call must be made for correctness.
-         */
-        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-        truncate_inode_pages(mapping, offset);
-        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-        goto out_truncate;
-do_expand:
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-        if (limit != RLIM_INFINITY && offset > limit) {
                spin_unlock(&inode->i_lock);
-                goto out_sig;
+                goto out;
-        }
-        if (offset > inode->i_sb->s_maxbytes) {
-                spin_unlock(&inode->i_lock);
-                goto out_big;
        }
+        oldsize = inode->i_size;
        i_size_write(inode, offset);
        spin_unlock(&inode->i_lock);
-out_truncate:
+        truncate_pagecache(inode, oldsize, offset);
        if (inode->i_op->truncate)
                inode->i_op->truncate(inode);
-        return 0;
+out:
-out_sig:
+        return err;
-        send_sig(SIGXFSZ, current, 0);
-out_big:
-        return -EFBIG;
-out_busy:
-        return -ETXTBSY;
 }
 static int
diff --git a/fs/compat.c b/fs/compat.c
index 3aa48834a222..d576b552e8e2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
                                 char __user * type, unsigned long flags,
                                 void __user * data)
 {
-        unsigned long type_page;
+        char *kernel_type;
        unsigned long data_page;
-        unsigned long dev_page;
+        char *kernel_dev;
        char *dir_page;
        int retval;
-        retval = copy_mount_options (type, &type_page);
+        retval = copy_mount_string(type, &kernel_type);
        if (retval < 0)
                goto out;
@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
        if (IS_ERR(dir_page))
                goto out1;
-        retval = copy_mount_options (dev_name, &dev_page);
+        retval = copy_mount_string(dev_name, &kernel_dev);
        if (retval < 0)
                goto out2;
-        retval = copy_mount_options (data, &data_page);
+        retval = copy_mount_options(data, &data_page);
        if (retval < 0)
                goto out3;
        retval = -EINVAL;
-        if (type_page && data_page) {
+        if (kernel_type && data_page) {
-                if (!strcmp((char *)type_page, SMBFS_NAME)) {
+                if (!strcmp(kernel_type, SMBFS_NAME)) {
                        do_smb_super_data_conv((void *)data_page);
-                } else if (!strcmp((char *)type_page, NCPFS_NAME)) {
+                } else if (!strcmp(kernel_type, NCPFS_NAME)) {
                        do_ncp_super_data_conv((void *)data_page);
-                } else if (!strcmp((char *)type_page, NFS4_NAME)) {
+                } else if (!strcmp(kernel_type, NFS4_NAME)) {
                        if (do_nfs4_super_data_conv((void *) data_page))
                                goto out4;
                }
        }
-        retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
+        retval = do_mount(kernel_dev, dir_page, kernel_type,
                        flags, (void*)data_page);
 out4:
        free_page(data_page);
 out3:
-        free_page(dev_page);
+        kfree(kernel_dev);
 out2:
        putname(dir_page);
 out1:
-        free_page(type_page);
+        kfree(kernel_type);
 out:
        return retval;
 }
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 5ab10c3bbebe..9f500dec3b59 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
        }
        lock_super(sb);
-        lock_kernel();
        sbi = sb->s_fs_info;
        fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
        fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
 out:
        if (or)
                osd_end_request(or);
-        unlock_kernel();
        unlock_super(sb);
        kfree(fscb);
        return ret;
@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb)
        int num_pend;
        struct exofs_sb_info *sbi = sb->s_fs_info;
-        lock_kernel();
        if (sb->s_dirt)
                exofs_write_super(sb);
@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb)
        osduld_put_device(sbi->s_dev);
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
-        unlock_kernel();
 }
 /*
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8970d8c49bb0..04629d1302fc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb)
        iput(sbi->fat_inode);
-        if (sbi->nls_disk) {
+        unload_nls(sbi->nls_disk);
-                unload_nls(sbi->nls_disk);
+        unload_nls(sbi->nls_io);
-                sbi->nls_disk = NULL;
-                sbi->options.codepage = fat_default_codepage;
+        if (sbi->options.iocharset != fat_default_iocharset)
-        }
-        if (sbi->nls_io) {
-                unload_nls(sbi->nls_io);
-                sbi->nls_io = NULL;
-        }
-        if (sbi->options.iocharset != fat_default_iocharset) {
                kfree(sbi->options.iocharset);
-                sbi->options.iocharset = fat_default_iocharset;
-        }
        sb->s_fs_info = NULL;
        kfree(sbi);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e703654e7f40..992f6c9410bb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
                return 0;
        if (attr->ia_valid & ATTR_SIZE) {
-                unsigned long limit;
+                err = inode_newsize_ok(inode, attr->ia_size);
-                if (IS_SWAPFILE(inode))
+                if (err)
-                        return -ETXTBSY;
+                        return err;
-                limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-                if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
-                        send_sig(SIGXFSZ, current, 0);
-                        return -EFBIG;
-                }
                is_truncate = true;
        }
@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
         * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
         */
        if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
-                if (outarg.attr.size < oldsize)
+                truncate_pagecache(inode, oldsize, outarg.attr.size);
-                        fuse_truncate(inode->i_mapping, outarg.attr.size);
                invalidate_inode_pages2(inode->i_mapping);
        }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fc9c79feb5f7..01cc462ff45d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                   u64 attr_valid);
-void fuse_truncate(struct address_space *mapping, loff_t offset);
 /**
 * Initialize the client device
 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6da947daabda..1a822ce2b24b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
        return 0;
 }
-void fuse_truncate(struct address_space *mapping, loff_t offset)
-{
-        /* See vmtruncate() */
-        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-        truncate_inode_pages(mapping, offset);
-        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-}
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                   u64 attr_valid)
 {
@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
        spin_unlock(&fc->lock);
        if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
-                if (attr->size < oldsize)
+                truncate_pagecache(inode, oldsize, attr->size);
-                        fuse_truncate(inode->i_mapping, attr->size);
                invalidate_inode_pages2(inode->i_mapping);
        }
 }
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 7b6165f25fbe..8bbe03c3f6d5 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb)
        brelse(HFS_SB(sb)->mdb_bh);
        brelse(HFS_SB(sb)->alt_mdb_bh);
-        if (HFS_SB(sb)->nls_io)
+        unload_nls(HFS_SB(sb)->nls_io);
-                unload_nls(HFS_SB(sb)->nls_io);
+        unload_nls(HFS_SB(sb)->nls_disk);
-        if (HFS_SB(sb)->nls_disk)
-                unload_nls(HFS_SB(sb)->nls_disk);
        free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
        kfree(HFS_SB(sb));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index c0759fe0855b..43022f3d5148 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb)
        iput(HFSPLUS_SB(sb).alloc_file);
        iput(HFSPLUS_SB(sb).hidden_dir);
        brelse(HFSPLUS_SB(sb).s_vhbh);
-        if (HFSPLUS_SB(sb).nls)
+        unload_nls(HFSPLUS_SB(sb).nls);
-                unload_nls(HFSPLUS_SB(sb).nls);
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
@@ -464,8 +463,7 @@ out:
 cleanup:
        hfsplus_put_super(sb);
-        if (nls)
+        unload_nls(nls);
-                unload_nls(nls);
        return err;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 133335479c24..87a1258953b8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
 {
-        struct super_block *sb = inode->i_sb;
+        if (generic_detach_inode(inode)) {
+                truncate_hugepages(inode, 0);
-        if (!hlist_unhashed(&inode->i_hash)) {
+                clear_inode(inode);
-                if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+                destroy_inode(inode);
-                        list_move(&inode->i_list, &inode_unused);
-                inodes_stat.nr_unused++;
-                if (!sb || (sb->s_flags & MS_ACTIVE)) {
-                        spin_unlock(&inode_lock);
-                        return;
-                }
-                inode->i_state |= I_WILL_FREE;
-                spin_unlock(&inode_lock);
-                /*
-                 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
-                 * in our backing_dev_info.
-                 */
-                write_inode_now(inode, 1);
-                spin_lock(&inode_lock);
-                inode->i_state &= ~I_WILL_FREE;
-                inodes_stat.nr_unused--;
-                hlist_del_init(&inode->i_hash);
        }
-        list_del_init(&inode->i_list);
-        list_del_init(&inode->i_sb_list);
-        inode->i_state |= I_FREEING;
-        inodes_stat.nr_inodes--;
-        spin_unlock(&inode_lock);
-        truncate_hugepages(inode, 0);
-        clear_inode(inode);
-        destroy_inode(inode);
 }
 static void hugetlbfs_drop_inode(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 76582b06ab97..4d8e3be55976 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(generic_delete_inode);
-static void generic_forget_inode(struct inode *inode)
+/**
+ *      generic_detach_inode - remove inode from inode lists
+ *      @inode: inode to remove
+ *
+ *      Remove inode from inode lists, write it if it's dirty. This is just an
+ *      internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ *      Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
 {
        struct super_block *sb = inode->i_sb;
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
                inodes_stat.nr_unused++;
                if (sb->s_flags & MS_ACTIVE) {
                        spin_unlock(&inode_lock);
-                        return;
+                        return 0;
                }
                WARN_ON(inode->i_state & I_NEW);
                inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
        inode->i_state |= I_FREEING;
        inodes_stat.nr_inodes--;
        spin_unlock(&inode_lock);
+        return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+static void generic_forget_inode(struct inode *inode)
+{
+        if (!generic_detach_inode(inode))
+                return;
        if (inode->i_data.nrpages)
                truncate_inode_pages(&inode->i_data, 0);
        clear_inode(inode);
@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
        struct inode *inode = dentry->d_inode;
        struct timespec now;
-        if (mnt_want_write(mnt))
-                return;
        if (inode->i_flags & S_NOATIME)
-                goto out;
+                return;
        if (IS_NOATIME(inode))
-                goto out;
+                return;
        if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
-                goto out;
+                return;
        if (mnt->mnt_flags & MNT_NOATIME)
-                goto out;
+                return;
        if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
-                goto out;
+                return;
        now = current_fs_time(inode->i_sb);
        if (!relatime_need_update(mnt, inode, now))
-                goto out;
+                return;
        if (timespec_equal(&inode->i_atime, &now))
-                goto out;
+                return;
+        if (mnt_want_write(mnt))
+                return;
        inode->i_atime = now;
        mark_inode_dirty_sync(inode);
-out:
        mnt_drop_write(mnt);
 }
 EXPORT_SYMBOL(touch_atime);
@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
        struct timespec now;
-        int sync_it = 0;
+        enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
-        int err;
+        /* First try to exhaust all avenues to not sync */
        if (IS_NOCMTIME(inode))
                return;
-        err = mnt_want_write_file(file);
-        if (err)
-                return;
        now = current_fs_time(inode->i_sb);
-        if (!timespec_equal(&inode->i_mtime, &now)) {
+        if (!timespec_equal(&inode->i_mtime, &now))
-                inode->i_mtime = now;
+                sync_it = S_MTIME;
-                sync_it = 1;
-        }
-        if (!timespec_equal(&inode->i_ctime, &now)) {
+        if (!timespec_equal(&inode->i_ctime, &now))
-                inode->i_ctime = now;
+                sync_it |= S_CTIME;
-                sync_it = 1;
-        }
-        if (IS_I_VERSION(inode)) {
+        if (IS_I_VERSION(inode))
-                inode_inc_iversion(inode);
+                sync_it |= S_VERSION;
-                sync_it = 1;
-        }
+        if (!sync_it)
+                return;
-        if (sync_it)
+        /* Finally allowed to write? Takes lock. */
-                mark_inode_dirty_sync(inode);
+        if (mnt_want_write_file(file))
+                return;
+        /* Only change inode inside the lock region */
+        if (sync_it & S_VERSION)
+                inode_inc_iversion(inode);
+        if (sync_it & S_CTIME)
+                inode->i_ctime = now;
+        if (sync_it & S_MTIME)
+                inode->i_mtime = now;
+        mark_inode_dirty_sync(inode);
        mnt_drop_write(file->f_path.mnt);
 }
 EXPORT_SYMBOL(file_update_time);
@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
        else if (S_ISSOCK(mode))
                inode->i_fop = &bad_sock_fops;
        else
-                printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
+                printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
-                       mode);
+                                  " inode %s:%lu\n", mode, inode->i_sb->s_id,
+                                  inode->i_ino);
 }
 EXPORT_SYMBOL(init_special_inode);
diff --git a/fs/internal.h b/fs/internal.h
index d55ef562f0bb..515175b8b72e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *);
 * namespace.c
 */
 extern int copy_mount_options(const void __user *, unsigned long *);
+extern int copy_mount_string(const void __user *, char **);
 extern void free_vfsmnt(struct vfsmount *);
 extern struct vfsmount *alloc_vfsmnt(const char *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5612880fcbe7..7b17a14396ff 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
 static int fiemap_check_ranges(struct super_block *sb,
                               u64 start, u64 len, u64 *new_len)
 {
+        u64 maxbytes = (u64) sb->s_maxbytes;
        *new_len = len;
        if (len == 0)
                return -EINVAL;
-        if (start > sb->s_maxbytes)
+        if (start > maxbytes)
                return -EFBIG;
        /*
         * Shrink request scope to what the fs can actually handle.
         */
-        if ((len > sb->s_maxbytes) ||
+        if (len > maxbytes || (maxbytes - len) < start)
-            (sb->s_maxbytes - len) < start)
+                *new_len = maxbytes - start;
-                *new_len = sb->s_maxbytes - start;
        return 0;
 }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 85f96bc651c7..6b4dcd4f2943 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb)
 #ifdef CONFIG_JOLIET
        lock_kernel();
-        if (sbi->s_nls_iocharset) {
+        unload_nls(sbi->s_nls_iocharset);
-                unload_nls(sbi->s_nls_iocharset);
-                sbi->s_nls_iocharset = NULL;
-        }
        unlock_kernel();
 #endif
@@ -912,8 +909,7 @@ out_no_root:
                printk(KERN_WARNING "%s: get root inode failed\n", __func__);
 out_no_inode:
 #ifdef CONFIG_JOLIET
-        if (sbi->s_nls_iocharset)
+        unload_nls(sbi->s_nls_iocharset);
-                unload_nls(sbi->s_nls_iocharset);
 #endif
        goto out_freesbi;
 out_no_read:
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 37e6dcda8fc8..2234c73fc577 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb)
        rc = jfs_umount(sb);
        if (rc)
                jfs_err("jfs_umount failed with return code %d", rc);
-        if (sbi->nls_tab)
-                unload_nls(sbi->nls_tab);
+        unload_nls(sbi->nls_tab);
-        sbi->nls_tab = NULL;
        truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
        iput(sbi->direct_inode);
-        sbi->direct_inode = NULL;
        kfree(sbi);
@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
        if (nls_map != (void *) -1) {
                /* Discard old (if remount) */
-                if (sbi->nls_tab)
+                unload_nls(sbi->nls_tab);
-                        unload_nls(sbi->nls_tab);
                sbi->nls_tab = nls_map;
        }
        return 1;
diff --git a/fs/libfs.c b/fs/libfs.c
index dcec3d3ea64f..219576c52d80 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
                                const void *from, size_t available)
 {
        loff_t pos = *ppos;
+        size_t ret;
        if (pos < 0)
                return -EINVAL;
-        if (pos >= available)
+        if (pos >= available || !count)
                return 0;
        if (count > available - pos)
                count = available - pos;
-        if (copy_to_user(to, from + pos, count))
+        ret = copy_to_user(to, from + pos, count);
+        if (ret == count)
                return -EFAULT;
+        count -= ret;
        *ppos = pos + count;
        return count;
 }
@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
        if (copy_from_user(attr->set_buf, buf, size))
                goto out;
-        ret = len; /* claim we got the whole input */
        attr->set_buf[size] = '\0';
        val = simple_strtol(attr->set_buf, NULL, 0);
-        attr->set(attr->data, val);
+        ret = attr->set(attr->data, val);
+        if (ret == 0)
+                ret = len; /* on success, claim we got the whole input */
 out:
        mutex_unlock(&attr->mutex);
        return ret;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7230787d18b0..bdc3cb4fd222 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
 {
        struct vfsmount *mnt;
-        if (!type || !memchr(type, 0, PAGE_SIZE))
+        if (!type)
                return -EINVAL;
        /* we need capabilities... */
@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
        return 0;
 }
+int copy_mount_string(const void __user *data, char **where)
+{
+        char *tmp;
+        if (!data) {
+                *where = NULL;
+                return 0;
+        }
+        tmp = strndup_user(data, PAGE_SIZE);
+        if (IS_ERR(tmp))
+                return PTR_ERR(tmp);
+        *where = tmp;
+        return 0;
+}
 /*
 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
        if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
                return -EINVAL;
-        if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
-                return -EINVAL;
        if (data_page)
                ((char *)data_page)[PAGE_SIZE - 1] = 0;
@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns);
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
                char __user *, type, unsigned long, flags, void __user *, data)
 {
-        int retval;
+        int ret;
+        char *kernel_type;
+        char *kernel_dir;
+        char *kernel_dev;
        unsigned long data_page;
-        unsigned long type_page;
-        unsigned long dev_page;
-        char *dir_page;
-        retval = copy_mount_options(type, &type_page);
+        ret = copy_mount_string(type, &kernel_type);
-        if (retval < 0)
+        if (ret < 0)
-                return retval;
+                goto out_type;
-        dir_page = getname(dir_name);
+        kernel_dir = getname(dir_name);
-        retval = PTR_ERR(dir_page);
+        if (IS_ERR(kernel_dir)) {
-        if (IS_ERR(dir_page))
+                ret = PTR_ERR(kernel_dir);
-                goto out1;
+                goto out_dir;
+        }
-        retval = copy_mount_options(dev_name, &dev_page);
+        ret = copy_mount_string(dev_name, &kernel_dev);
-        if (retval < 0)
+        if (ret < 0)
-                goto out2;
+                goto out_dev;
-        retval = copy_mount_options(data, &data_page);
+        ret = copy_mount_options(data, &data_page);
-        if (retval < 0)
+        if (ret < 0)
-                goto out3;
+                goto out_data;
-        retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
+        ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
-                          flags, (void *)data_page);
+                (void *) data_page);
-        free_page(data_page);
-out3:
+        free_page(data_page);
-        free_page(dev_page);
+out_data:
-out2:
+        kfree(kernel_dev);
-        putname(dir_page);
+out_dev:
-out1:
+        putname(kernel_dir);
-        free_page(type_page);
+out_dir:
-        return retval;
+        kfree(kernel_type);
+out_type:
+        return ret;
 }
 /*
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index b99ce205b1bd..cf98da1be23e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb)
 #ifdef CONFIG_NCPFS_NLS
        /* unload the NLS charsets */
-        if (server->nls_vol)
+        unload_nls(server->nls_vol);
-        {
+        unload_nls(server->nls_io);
-                unload_nls(server->nls_vol);
-                server->nls_vol = NULL;
-        }
-        if (server->nls_io)
-        {
-                unload_nls(server->nls_io);
-                server->nls_io = NULL;
-        }
 #endif /* CONFIG_NCPFS_NLS */
        if (server->info_filp)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 53a7ed7eb9c6..0d58caf4a6e1 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
        oldset_io = server->nls_io;
        server->nls_io = iocharset;
-        if (oldset_cp)
+        unload_nls(oldset_cp);
-                unload_nls(oldset_cp);
+        unload_nls(oldset_io);
-        if (oldset_io)
-                unload_nls(oldset_io);
        return 0;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 060022b4651c..faa091865ad0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 */
 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 {
-        if (i_size_read(inode) < offset) {
+        loff_t oldsize;
-                unsigned long limit;
+        int err;
-                limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-                if (limit != RLIM_INFINITY && offset > limit)
-                        goto out_sig;
-                if (offset > inode->i_sb->s_maxbytes)
-                        goto out_big;
-                spin_lock(&inode->i_lock);
-                i_size_write(inode, offset);
-                spin_unlock(&inode->i_lock);
-        } else {
-                struct address_space *mapping = inode->i_mapping;
-                /*
+        err = inode_newsize_ok(inode, offset);
-                 * truncation of in-use swapfiles is disallowed - it would
+        if (err)
-                 * cause subsequent swapout to scribble on the now-freed
+                goto out;
-                 * blocks.
-                 */
-                if (IS_SWAPFILE(inode))
-                        return -ETXTBSY;
-                spin_lock(&inode->i_lock);
-                i_size_write(inode, offset);
-                spin_unlock(&inode->i_lock);
-                /*
+        spin_lock(&inode->i_lock);
-                 * unmap_mapping_range is called twice, first simply for
+        oldsize = inode->i_size;
-                 * efficiency so that truncate_inode_pages does fewer
+        i_size_write(inode, offset);
-                 * single-page unmaps.  However after this first call, and
+        spin_unlock(&inode->i_lock);
-                 * before truncate_inode_pages finishes, it is possible for
-                 * private pages to be COWed, which remain after
+        truncate_pagecache(inode, oldsize, offset);
-                 * truncate_inode_pages finishes, hence the second
+out:
-                 * unmap_mapping_range call must be made for correctness.
+        return err;
-                 */
-                unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-                truncate_inode_pages(mapping, offset);
-                unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-        }
-        return 0;
-out_sig:
-        send_sig(SIGXFSZ, current, 0);
-out_big:
-        return -EFBIG;
 }
 /**
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 477d37d83b31..2224b4d07bf0 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset)
 void unload_nls(struct nls_table *nls)
 {
-        module_put(nls->owner);
+        if (nls)
+                module_put(nls->owner);
 }
 static const wchar_t charset2uni[256] = {
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index abaaa1cbf8de..80b04770e8e9 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -201,8 +201,7 @@ use_utf8:
                                                v, old_nls->charset);
                                nls_map = old_nls;
                        } else /* nls_map */ {
-                                if (old_nls)
+                                unload_nls(old_nls);
-                                        unload_nls(old_nls);
                        }
                } else if (!strcmp(p, "utf8")) {
                        bool val = false;
@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb)
                ntfs_free(vol->upcase);
                vol->upcase = NULL;
        }
-        if (vol->nls_map) {
-                unload_nls(vol->nls_map);
+        unload_nls(vol->nls_map);
-                vol->nls_map = NULL;
-        }
        sb->s_fs_info = NULL;
        kfree(vol);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 11f0c06316de..32fae4040ebf 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        /* make various checks */
        order = get_order(newsize);
        if (unlikely(order >= MAX_ORDER))
-                goto too_big;
+                return -EFBIG;
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+        ret = inode_newsize_ok(inode, newsize);
-        if (limit != RLIM_INFINITY && newsize > limit)
+        if (ret)
-                goto fsize_exceeded;
+                return ret;
-        if (newsize > inode->i_sb->s_maxbytes)
-                goto too_big;
        i_size_write(inode, newsize);
@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        return 0;
- fsize_exceeded:
+add_error:
-        send_sig(SIGXFSZ, current, 0);
- too_big:
-        return -EFBIG;
- add_error:
        while (loop < npages)
                __free_page(pages + loop++);
        return ret;
diff --git a/fs/read_write.c b/fs/read_write.c
index 6c8c55dec2bc..3ac28987f22a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
        pos = *ppos;
-        retval = -EINVAL;
-        if (unlikely(pos < 0))
-                goto fput_out;
        if (unlikely(pos + count > max)) {
                retval = -EOVERFLOW;
                if (pos >= max)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 6c959275f2d0..eae7d9dbf3ff 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
 */
 int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
-        if (m->count < m->size) {
+        char *buf;
-                char *s = m->buf + m->count;
+        size_t size = seq_get_buf(m, &buf);
-                char *p = d_path(path, s, m->size - m->count);
+        int res = -1;
+        if (size) {
+                char *p = d_path(path, buf, size);
                if (!IS_ERR(p)) {
-                        s = mangle_path(s, p, esc);
+                        char *end = mangle_path(buf, p, esc);
-                        if (s) {
+                        if (end)
-                                p = m->buf + m->count;
+                                res = end - buf;
-                                m->count = s - m->buf;
-                                return s - p;
-                        }
                }
        }
-        m->count = m->size;
+        seq_commit(m, res);
-        return -1;
+        return res;
 }
 EXPORT_SYMBOL(seq_path);
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
                  char *esc)
 {
-        int err = -ENAMETOOLONG;
+        char *buf;
-        if (m->count < m->size) {
+        size_t size = seq_get_buf(m, &buf);
-                char *s = m->buf + m->count;
+        int res = -ENAMETOOLONG;
+        if (size) {
                char *p;
                spin_lock(&dcache_lock);
-                p = __d_path(path, root, s, m->size - m->count);
+                p = __d_path(path, root, buf, size);
                spin_unlock(&dcache_lock);
-                err = PTR_ERR(p);
+                res = PTR_ERR(p);
                if (!IS_ERR(p)) {
-                        s = mangle_path(s, p, esc);
+                        char *end = mangle_path(buf, p, esc);
-                        if (s) {
+                        if (end)
-                                p = m->buf + m->count;
+                                res = end - buf;
-                                m->count = s - m->buf;
+                        else
-                                return 0;
+                                res = -ENAMETOOLONG;
-                        }
                }
        }
-        m->count = m->size;
+        seq_commit(m, res);
-        return err;
+        return res < 0 ? res : 0;
 }
 /*
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 */
 int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 {
-        if (m->count < m->size) {
+        char *buf;
-                char *s = m->buf + m->count;
+        size_t size = seq_get_buf(m, &buf);
-                char *p = dentry_path(dentry, s, m->size - m->count);
+        int res = -1;
+        if (size) {
+                char *p = dentry_path(dentry, buf, size);
                if (!IS_ERR(p)) {
-                        s = mangle_path(s, p, esc);
+                        char *end = mangle_path(buf, p, esc);
-                        if (s) {
+                        if (end)
-                                p = m->buf + m->count;
+                                res = end - buf;
-                                m->count = s - m->buf;
-                                return s - p;
-                        }
                }
        }
-        m->count = m->size;
+        seq_commit(m, res);
-        return -1;
+        return res;
 }
 int seq_bitmap(struct seq_file *m, const unsigned long *bits,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 1402d2d54f52..1c4c8f089970 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m)
 static void
 smb_unload_nls(struct smb_sb_info *server)
 {
-        if (server->remote_nls) {
+        unload_nls(server->remote_nls);
-                unload_nls(server->remote_nls);
+        unload_nls(server->local_nls);
-                server->remote_nls = NULL;
-        }
-        if (server->local_nls) {
-                unload_nls(server->local_nls);
-                server->local_nls = NULL;
-        }
 }
 static void
diff --git a/fs/super.c b/fs/super.c
index 0e7207b9815c..19eb70b374bc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -465,6 +465,48 @@ rescan:
 }
 EXPORT_SYMBOL(get_super);
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given.  Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+        struct super_block *sb;
+        if (!bdev)
+                return NULL;
+        spin_lock(&sb_lock);
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                if (sb->s_bdev != bdev)
+                        continue;
+                sb->s_count++;
+                spin_unlock(&sb_lock);
+                down_write(&sb->s_umount);
+                if (sb->s_root) {
+                        spin_lock(&sb_lock);
+                        if (sb->s_count > S_BIAS) {
+                                atomic_inc(&sb->s_active);
+                                sb->s_count--;
+                                spin_unlock(&sb_lock);
+                                return sb;
+                        }
+                        spin_unlock(&sb_lock);
+                }
+                up_write(&sb->s_umount);
+                put_super(sb);
+                yield();
+                spin_lock(&sb_lock);
+        }
+        spin_unlock(&sb_lock);
+        return NULL;
+}
 
 struct super_block * user_get_super(dev_t dev)
 {
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
        int retval;
        int remount_rw;
-        
+        if (sb->s_frozen != SB_UNFROZEN)
+                return -EBUSY;
 #ifdef CONFIG_BLOCK
        if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
                return -EACCES;
 #endif
        if (flags & MS_RDONLY)
                acct_auto_close(sb);
        shrink_dcache_sb(sb);
@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
         * will protect the lockfs code from trying to start a snapshot
         * while we are mounting
         */
-        down(&bdev->bd_mount_sem);
+        mutex_lock(&bdev->bd_fsfreeze_mutex);
+        if (bdev->bd_fsfreeze_count > 0) {
+                mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                error = -EBUSY;
+                goto error_bdev;
+        }
        s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-        up(&bdev->bd_mount_sem);
+        mutex_unlock(&bdev->bd_fsfreeze_mutex);
        if (IS_ERR(s))
                goto error_s;
@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
        if (error)
                goto out_sb;
+        /*
+         * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+         * but s_maxbytes was an unsigned long long for many releases. Throw
+         * this warning for a little while to try and catch filesystems that
+         * violate this rule. This warning should be either removed or
+         * converted to a BUG() in 2.6.34.
+         */
+        WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+                "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
        mnt->mnt_mountpoint = mnt->mnt_root;
        mnt->mnt_parent = mnt;
        up_write(&mnt->mnt_sb->s_umount);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78e95b8b66d4..2adaa2529f18 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -641,7 +641,6 @@ struct block_device {
        struct super_block *    bd_super;
        int                     bd_openers;
        struct mutex            bd_mutex;       /* open/close mutex */
-        struct semaphore        bd_mount_sem;
        struct list_head        bd_inodes;
        void *                  bd_holder;
        int                     bd_holders;
@@ -1316,7 +1315,7 @@ struct super_block {
        unsigned long           s_blocksize;
        unsigned char           s_blocksize_bits;
        unsigned char           s_dirt;
-        unsigned long long      s_maxbytes;     /* Max file size */
+        loff_t                  s_maxbytes;     /* Max file size */
        struct file_system_type *s_type;
        const struct super_operations   *s_op;
        const struct dquot_operations   *dq_op;
@@ -2157,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
 extern void generic_delete_inode(struct inode *inode);
 extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
 extern struct inode *ilookup5_nowait(struct super_block *sb,
                unsigned long hashval, int (*test)(struct inode *, void *),
@@ -2335,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
 extern struct super_block *user_get_super(dev_t);
 extern void drop_super(struct super_block *sb);
@@ -2382,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *,
 #define buffer_migrate_page NULL
 #endif
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 extern void file_update_time(struct file *file);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6953a5a53e44..df08551cb0ad 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -792,8 +792,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
        unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
-extern int vmtruncate(struct inode * inode, loff_t offset);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 0c6a86b79596..8366d8f12e53 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -35,6 +35,44 @@ struct seq_operations {
 #define SEQ_SKIP 1
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+        BUG_ON(m->count > m->size);
+        if (m->count < m->size)
+                *bufp = m->buf + m->count;
+        else
+                *bufp = NULL;
+        return m->size - m->count;
+}
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+        if (num < 0) {
+                m->count = m->size;
+        } else {
+                BUG_ON(m->count + num > m->size);
+                m->count += num;
+        }
+}
 char *mangle_path(char *s, char *p, char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
diff --git a/mm/filemap.c b/mm/filemap.c
index c1fc205a92c6..6c84e598b4a9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
 /*
 * Lock ordering:
 *
- *  ->i_mmap_lock               (vmtruncate)
+ *  ->i_mmap_lock               (truncate_pagecache)
 *    ->private_lock            (__free_pte->__set_page_dirty_buffers)
 *      ->swap_lock             (exclusive_swap_page, others)
 *        ->mapping->tree_lock
diff --git a/mm/memory.c b/mm/memory.c
index 987389a809e7..7e91b5f9f690 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                unsigned long addr = vma->vm_start;
                /*
-                 * Hide vma from rmap and vmtruncate before freeing pgtables
+                 * Hide vma from rmap and truncate_pagecache before freeing
+                 * pgtables
                 */
                anon_vma_unlink(vma);
                unlink_file_vma(vma);
@@ -2408,7 +2409,7 @@ restart:
 * @mapping: the address space containing mmaps to be unmapped.
 * @holebegin: byte in first page to unmap, relative to the start of
 * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
 * must keep the partial page.  In contrast, we must get rid of
 * partial pages.
 * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2459,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-        if (inode->i_size < offset) {
-                unsigned long limit;
-                limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-                if (limit != RLIM_INFINITY && offset > limit)
-                        goto out_sig;
-                if (offset > inode->i_sb->s_maxbytes)
-                        goto out_big;
-                i_size_write(inode, offset);
-        } else {
-                struct address_space *mapping = inode->i_mapping;
-                /*
-                 * truncation of in-use swapfiles is disallowed - it would
-                 * cause subsequent swapout to scribble on the now-freed
-                 * blocks.
-                 */
-                if (IS_SWAPFILE(inode))
-                        return -ETXTBSY;
-                i_size_write(inode, offset);
-                /*
-                 * unmap_mapping_range is called twice, first simply for
-                 * efficiency so that truncate_inode_pages does fewer
-                 * single-page unmaps.  However after this first call, and
-                 * before truncate_inode_pages finishes, it is possible for
-                 * private pages to be COWed, which remain after
-                 * truncate_inode_pages finishes, hence the second
-                 * unmap_mapping_range call must be made for correctness.
-                 */
-                unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-                truncate_inode_pages(mapping, offset);
-                unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-        }
-        if (inode->i_op->truncate)
-                inode->i_op->truncate(inode);
-        return 0;
-out_sig:
-        send_sig(SIGXFSZ, current, 0);
-out_big:
-        return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 {
        struct address_space *mapping = inode->i_mapping;
diff --git a/mm/mremap.c b/mm/mremap.c
index 20a07dba6be0..97bff2547719 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
        if (vma->vm_file) {
                /*
                 * Subtle point from Rajesh Venkatasubramanian: before
-                 * moving file-based ptes, we must lock vmtruncate out,
+                 * moving file-based ptes, we must lock truncate_pagecache
-                 * since it might clean the dst vma before the src vma,
+                 * out, since it might clean the dst vma before the src vma,
                 * and we propagate stale pages into the dst afterward.
                 */
                mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8d484241d034..56a446f05971 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = {
 };
 /*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
-        struct address_space *mapping = inode->i_mapping;
-        unsigned long limit;
-        if (inode->i_size < offset)
-                goto do_expand;
-        i_size_write(inode, offset);
-        truncate_inode_pages(mapping, offset);
-        goto out_truncate;
-do_expand:
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-        if (limit != RLIM_INFINITY && offset > limit)
-                goto out_sig;
-        if (offset > inode->i_sb->s_maxbytes)
-                goto out;
-        i_size_write(inode, offset);
-out_truncate:
-        if (inode->i_op->truncate)
-                inode->i_op->truncate(inode);
-        return 0;
-out_sig:
-        send_sig(SIGXFSZ, current, 0);
-out:
-        return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-/*
 * Return the total memory allocated for this pointer, not
 * just what the caller asked for.
 *
diff --git a/mm/truncate.c b/mm/truncate.c
index a17b3977cfdf..450cebdabfc0 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -497,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
        return invalidate_inode_pages2_range(mapping, 0, -1);
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+        if (new < old) {
+                struct address_space *mapping = inode->i_mapping;
+                /*
+                 * unmap_mapping_range is called twice, first simply for
+                 * efficiency so that truncate_inode_pages does fewer
+                 * single-page unmaps.  However after this first call, and
+                 * before truncate_inode_pages finishes, it is possible for
+                 * private pages to be COWed, which remain after
+                 * truncate_inode_pages finishes, hence the second
+                 * unmap_mapping_range call must be made for correctness.
+                 */
+                unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+                truncate_inode_pages(mapping, new);
+                unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+        }
+}
+EXPORT_SYMBOL(truncate_pagecache);
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+        loff_t oldsize;
+        int error;
+        error = inode_newsize_ok(inode, offset);
+        if (error)
+                return error;
+        oldsize = inode->i_size;
+        i_size_write(inode, offset);
+        truncate_pagecache(inode, oldsize, offset);
+        if (inode->i_op->truncate)
+                inode->i_op->truncate(inode);
+        return error;
+}
+EXPORT_SYMBOL(vmtruncate);
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-09-24 11:32:11 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-09-24 11:32:11 -0400
commit	6c5daf012c9155aafd2c7973e4278766c30dfad0 (patch)
tree	33959d7b36d03e1610615641a2940cb2de5e8603
parent	6d39b27f0ac7e805ae3bd9efa51d7da04bec0360 (diff)
parent	c08d3b0e33edce28e9cfa7b64f7fe5bdeeb29248 (diff)