aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_super.c1
-rw-r--r--fs/adfs/super.c1
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/autofs4/init.c1
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_misc.c1
-rw-r--r--fs/btrfs/ctree.c30
-rw-r--r--fs/btrfs/delayed-inode.c151
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/disk-io.c30
-rw-r--r--fs/btrfs/extent-tree.c89
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file.c10
-rw-r--r--fs/btrfs/inode.c31
-rw-r--r--fs/btrfs/ioctl.c18
-rw-r--r--fs/btrfs/locking.h1
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/qgroup.c13
-rw-r--r--fs/btrfs/relocation.c74
-rw-r--r--fs/btrfs/scrub.c3
-rw-r--r--fs/btrfs/send.c10
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/transaction.c76
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/cifs/asn1.c53
-rw-r--r--fs/cifs/cifsfs.c25
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c16
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/inode.c21
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/smb2ops.c1
-rw-r--r--fs/coda/inode.c1
-rw-r--r--fs/compat.c15
-rw-r--r--fs/configfs/mount.c1
-rw-r--r--fs/cramfs/inode.c1
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/debugfs/inode.c1
-rw-r--r--fs/ecryptfs/Kconfig8
-rw-r--r--fs/ecryptfs/Makefile7
-rw-r--r--fs/ecryptfs/crypto.c9
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h40
-rw-r--r--fs/ecryptfs/file.c2
-rw-r--r--fs/ecryptfs/inode.c8
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/main.c1
-rw-r--r--fs/ecryptfs/messaging.c5
-rw-r--r--fs/efs/super.c1
-rw-r--r--fs/exofs/super.c1
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext3/super.c5
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/extents.c105
-rw-r--r--fs/ext4/extents_status.c212
-rw-r--r--fs/ext4/extents_status.h9
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode.c182
-rw-r--r--fs/ext4/mballoc.c23
-rw-r--r--fs/ext4/move_extent.c43
-rw-r--r--fs/ext4/page-io.c12
-rw-r--r--fs/ext4/resize.c4
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/f2fs/super.c1
-rw-r--r--fs/fat/namei_msdos.c1
-rw-r--r--fs/fat/namei_vfat.c1
-rw-r--r--fs/filesystems.c2
-rw-r--r--fs/freevxfs/vxfs_super.c3
-rw-r--r--fs/fuse/control.c1
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/hostfs/hostfs_kern.c10
-rw-r--r--fs/hpfs/super.c1
-rw-r--r--fs/hppfs/hppfs.c1
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/internal.h5
-rw-r--r--fs/isofs/inode.c4
-rw-r--r--fs/jbd2/transaction.c15
-rw-r--r--fs/jffs2/super.c1
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/logfs/super.c1
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c54
-rw-r--r--fs/ncpfs/inode.c1
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c4
-rw-r--r--fs/nfs/idmap.c13
-rw-r--r--fs/nfs/nfs4filelayout.c1
-rw-r--r--fs/nfs/nfs4proc.c16
-rw-r--r--fs/nfs/pnfs.c81
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfsd/nfs4state.c36
-rw-r--r--fs/nfsd/nfscache.c11
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/super.c1
-rw-r--r--fs/ntfs/super.c1
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c1
-rw-r--r--fs/ocfs2/super.c1
-rw-r--r--fs/omfs/inode.c1
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/pnode.c6
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/namespaces.c12
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/qnx6/inode.c1
-rw-r--r--fs/quota/dquot.c5
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/splice.c4
-rw-r--r--fs/squashfs/super.c1
-rw-r--r--fs/sysfs/dir.c17
-rw-r--r--fs/sysfs/mount.c4
-rw-r--r--fs/sysv/super.c4
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/udf/super.c1
-rw-r--r--fs/ufs/super.c1
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_super.c1
136 files changed, 1433 insertions, 490 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 91dad63e5a2d..2756dcd5de6e 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -365,3 +365,4 @@ struct file_system_type v9fs_fs_type = {
365 .owner = THIS_MODULE, 365 .owner = THIS_MODULE,
366 .fs_flags = FS_RENAME_DOES_D_MOVE, 366 .fs_flags = FS_RENAME_DOES_D_MOVE,
367}; 367};
368MODULE_ALIAS_FS("9p");
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d57122935793..0ff4bae2c2a2 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -524,6 +524,7 @@ static struct file_system_type adfs_fs_type = {
524 .kill_sb = kill_block_super, 524 .kill_sb = kill_block_super,
525 .fs_flags = FS_REQUIRES_DEV, 525 .fs_flags = FS_REQUIRES_DEV,
526}; 526};
527MODULE_ALIAS_FS("adfs");
527 528
528static int __init init_adfs_fs(void) 529static int __init init_adfs_fs(void)
529{ 530{
diff --git a/fs/affs/super.c b/fs/affs/super.c
index b84dc7352502..45161a832bbc 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -622,6 +622,7 @@ static struct file_system_type affs_fs_type = {
622 .kill_sb = kill_block_super, 622 .kill_sb = kill_block_super,
623 .fs_flags = FS_REQUIRES_DEV, 623 .fs_flags = FS_REQUIRES_DEV,
624}; 624};
625MODULE_ALIAS_FS("affs");
625 626
626static int __init init_affs_fs(void) 627static int __init init_affs_fs(void)
627{ 628{
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7c31ec399575..c4861557e385 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -45,6 +45,7 @@ struct file_system_type afs_fs_type = {
45 .kill_sb = afs_kill_super, 45 .kill_sb = afs_kill_super,
46 .fs_flags = 0, 46 .fs_flags = 0,
47}; 47};
48MODULE_ALIAS_FS("afs");
48 49
49static const struct super_operations afs_super_ops = { 50static const struct super_operations afs_super_ops = {
50 .statfs = afs_statfs, 51 .statfs = afs_statfs,
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index cddc74b9cdb2..b3db517e89ec 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -26,6 +26,7 @@ static struct file_system_type autofs_fs_type = {
26 .mount = autofs_mount, 26 .mount = autofs_mount,
27 .kill_sb = autofs4_kill_sb, 27 .kill_sb = autofs4_kill_sb,
28}; 28};
29MODULE_ALIAS_FS("autofs");
29 30
30static int __init init_autofs4_fs(void) 31static int __init init_autofs4_fs(void)
31{ 32{
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index c8f4e25eb9e2..8615ee89ab55 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -951,6 +951,7 @@ static struct file_system_type befs_fs_type = {
951 .kill_sb = kill_block_super, 951 .kill_sb = kill_block_super,
952 .fs_flags = FS_REQUIRES_DEV, 952 .fs_flags = FS_REQUIRES_DEV,
953}; 953};
954MODULE_ALIAS_FS("befs");
954 955
955static int __init 956static int __init
956init_befs_fs(void) 957init_befs_fs(void)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 737aaa3f7090..5e376bb93419 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -473,6 +473,7 @@ static struct file_system_type bfs_fs_type = {
473 .kill_sb = kill_block_super, 473 .kill_sb = kill_block_super,
474 .fs_flags = FS_REQUIRES_DEV, 474 .fs_flags = FS_REQUIRES_DEV,
475}; 475};
476MODULE_ALIAS_FS("bfs");
476 477
477static int __init init_bfs_fs(void) 478static int __init init_bfs_fs(void)
478{ 479{
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index fecbbf3f8ff2..751df5e4f61a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -720,6 +720,7 @@ static struct file_system_type bm_fs_type = {
720 .mount = bm_mount, 720 .mount = bm_mount,
721 .kill_sb = kill_litter_super, 721 .kill_sb = kill_litter_super,
722}; 722};
723MODULE_ALIAS_FS("binfmt_misc");
723 724
724static int __init init_misc_binfmt(void) 725static int __init init_misc_binfmt(void)
725{ 726{
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ecd25a1b4e51..ca9d8f1a3bb6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
651 if (tree_mod_dont_log(fs_info, NULL)) 651 if (tree_mod_dont_log(fs_info, NULL))
652 return 0; 652 return 0;
653 653
654 __tree_mod_log_free_eb(fs_info, old_root);
655
654 ret = tree_mod_alloc(fs_info, flags, &tm); 656 ret = tree_mod_alloc(fs_info, flags, &tm);
655 if (ret < 0) 657 if (ret < 0)
656 goto out; 658 goto out;
@@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
736static noinline void 738static noinline void
737tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 739tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
738 struct extent_buffer *src, unsigned long dst_offset, 740 struct extent_buffer *src, unsigned long dst_offset,
739 unsigned long src_offset, int nr_items) 741 unsigned long src_offset, int nr_items, int log_removal)
740{ 742{
741 int ret; 743 int ret;
742 int i; 744 int i;
@@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
750 } 752 }
751 753
752 for (i = 0; i < nr_items; i++) { 754 for (i = 0; i < nr_items; i++) {
753 ret = tree_mod_log_insert_key_locked(fs_info, src, 755 if (log_removal) {
754 i + src_offset, 756 ret = tree_mod_log_insert_key_locked(fs_info, src,
755 MOD_LOG_KEY_REMOVE); 757 i + src_offset,
756 BUG_ON(ret < 0); 758 MOD_LOG_KEY_REMOVE);
759 BUG_ON(ret < 0);
760 }
757 ret = tree_mod_log_insert_key_locked(fs_info, dst, 761 ret = tree_mod_log_insert_key_locked(fs_info, dst,
758 i + dst_offset, 762 i + dst_offset,
759 MOD_LOG_KEY_ADD); 763 MOD_LOG_KEY_ADD);
@@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
927 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 931 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
928 BUG_ON(ret); /* -ENOMEM */ 932 BUG_ON(ret); /* -ENOMEM */
929 } 933 }
930 tree_mod_log_free_eb(root->fs_info, buf);
931 clean_tree_block(trans, root, buf); 934 clean_tree_block(trans, root, buf);
932 *last_ref = 1; 935 *last_ref = 1;
933 } 936 }
@@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1046 btrfs_set_node_ptr_generation(parent, parent_slot, 1049 btrfs_set_node_ptr_generation(parent, parent_slot,
1047 trans->transid); 1050 trans->transid);
1048 btrfs_mark_buffer_dirty(parent); 1051 btrfs_mark_buffer_dirty(parent);
1052 tree_mod_log_free_eb(root->fs_info, buf);
1049 btrfs_free_tree_block(trans, root, buf, parent_start, 1053 btrfs_free_tree_block(trans, root, buf, parent_start,
1050 last_ref); 1054 last_ref);
1051 } 1055 }
@@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1750 goto enospc; 1754 goto enospc;
1751 } 1755 }
1752 1756
1753 tree_mod_log_free_eb(root->fs_info, root->node);
1754 tree_mod_log_set_root_pointer(root, child); 1757 tree_mod_log_set_root_pointer(root, child);
1755 rcu_assign_pointer(root->node, child); 1758 rcu_assign_pointer(root->node, child);
1756 1759
@@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2995 push_items = min(src_nritems - 8, push_items); 2998 push_items = min(src_nritems - 8, push_items);
2996 2999
2997 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, 3000 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
2998 push_items); 3001 push_items, 1);
2999 copy_extent_buffer(dst, src, 3002 copy_extent_buffer(dst, src,
3000 btrfs_node_key_ptr_offset(dst_nritems), 3003 btrfs_node_key_ptr_offset(dst_nritems),
3001 btrfs_node_key_ptr_offset(0), 3004 btrfs_node_key_ptr_offset(0),
@@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3066 sizeof(struct btrfs_key_ptr)); 3069 sizeof(struct btrfs_key_ptr));
3067 3070
3068 tree_mod_log_eb_copy(root->fs_info, dst, src, 0, 3071 tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
3069 src_nritems - push_items, push_items); 3072 src_nritems - push_items, push_items, 1);
3070 copy_extent_buffer(dst, src, 3073 copy_extent_buffer(dst, src,
3071 btrfs_node_key_ptr_offset(0), 3074 btrfs_node_key_ptr_offset(0),
3072 btrfs_node_key_ptr_offset(src_nritems - push_items), 3075 btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3218 int mid; 3221 int mid;
3219 int ret; 3222 int ret;
3220 u32 c_nritems; 3223 u32 c_nritems;
3224 int tree_mod_log_removal = 1;
3221 3225
3222 c = path->nodes[level]; 3226 c = path->nodes[level];
3223 WARN_ON(btrfs_header_generation(c) != trans->transid); 3227 WARN_ON(btrfs_header_generation(c) != trans->transid);
3224 if (c == root->node) { 3228 if (c == root->node) {
3225 /* trying to split the root, lets make a new one */ 3229 /* trying to split the root, lets make a new one */
3226 ret = insert_new_root(trans, root, path, level + 1); 3230 ret = insert_new_root(trans, root, path, level + 1);
3231 /*
3232 * removal of root nodes has been logged by
3233 * tree_mod_log_set_root_pointer due to locking
3234 */
3235 tree_mod_log_removal = 0;
3227 if (ret) 3236 if (ret)
3228 return ret; 3237 return ret;
3229 } else { 3238 } else {
@@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3261 (unsigned long)btrfs_header_chunk_tree_uuid(split), 3270 (unsigned long)btrfs_header_chunk_tree_uuid(split),
3262 BTRFS_UUID_SIZE); 3271 BTRFS_UUID_SIZE);
3263 3272
3264 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); 3273 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid,
3274 tree_mod_log_removal);
3265 copy_extent_buffer(split, c, 3275 copy_extent_buffer(split, c,
3266 btrfs_node_key_ptr_offset(0), 3276 btrfs_node_key_ptr_offset(0),
3267 btrfs_node_key_ptr_offset(mid), 3277 btrfs_node_key_ptr_offset(mid),
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0b278b117cbe..14fce27b4780 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -22,8 +22,9 @@
22#include "disk-io.h" 22#include "disk-io.h"
23#include "transaction.h" 23#include "transaction.h"
24 24
25#define BTRFS_DELAYED_WRITEBACK 400 25#define BTRFS_DELAYED_WRITEBACK 512
26#define BTRFS_DELAYED_BACKGROUND 100 26#define BTRFS_DELAYED_BACKGROUND 128
27#define BTRFS_DELAYED_BATCH 16
27 28
28static struct kmem_cache *delayed_node_cache; 29static struct kmem_cache *delayed_node_cache;
29 30
@@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
494 BTRFS_DELAYED_DELETION_ITEM); 495 BTRFS_DELAYED_DELETION_ITEM);
495} 496}
496 497
498static void finish_one_item(struct btrfs_delayed_root *delayed_root)
499{
500 int seq = atomic_inc_return(&delayed_root->items_seq);
501 if ((atomic_dec_return(&delayed_root->items) <
502 BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
503 waitqueue_active(&delayed_root->wait))
504 wake_up(&delayed_root->wait);
505}
506
497static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) 507static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
498{ 508{
499 struct rb_root *root; 509 struct rb_root *root;
@@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
512 522
513 rb_erase(&delayed_item->rb_node, root); 523 rb_erase(&delayed_item->rb_node, root);
514 delayed_item->delayed_node->count--; 524 delayed_item->delayed_node->count--;
515 if (atomic_dec_return(&delayed_root->items) < 525
516 BTRFS_DELAYED_BACKGROUND && 526 finish_one_item(delayed_root);
517 waitqueue_active(&delayed_root->wait))
518 wake_up(&delayed_root->wait);
519} 527}
520 528
521static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) 529static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
@@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1056 delayed_node->count--; 1064 delayed_node->count--;
1057 1065
1058 delayed_root = delayed_node->root->fs_info->delayed_root; 1066 delayed_root = delayed_node->root->fs_info->delayed_root;
1059 if (atomic_dec_return(&delayed_root->items) < 1067 finish_one_item(delayed_root);
1060 BTRFS_DELAYED_BACKGROUND &&
1061 waitqueue_active(&delayed_root->wait))
1062 wake_up(&delayed_root->wait);
1063 } 1068 }
1064} 1069}
1065 1070
@@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode)
1304 btrfs_release_delayed_node(delayed_node); 1309 btrfs_release_delayed_node(delayed_node);
1305} 1310}
1306 1311
1307struct btrfs_async_delayed_node { 1312struct btrfs_async_delayed_work {
1308 struct btrfs_root *root; 1313 struct btrfs_delayed_root *delayed_root;
1309 struct btrfs_delayed_node *delayed_node; 1314 int nr;
1310 struct btrfs_work work; 1315 struct btrfs_work work;
1311}; 1316};
1312 1317
1313static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) 1318static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1314{ 1319{
1315 struct btrfs_async_delayed_node *async_node; 1320 struct btrfs_async_delayed_work *async_work;
1321 struct btrfs_delayed_root *delayed_root;
1316 struct btrfs_trans_handle *trans; 1322 struct btrfs_trans_handle *trans;
1317 struct btrfs_path *path; 1323 struct btrfs_path *path;
1318 struct btrfs_delayed_node *delayed_node = NULL; 1324 struct btrfs_delayed_node *delayed_node = NULL;
1319 struct btrfs_root *root; 1325 struct btrfs_root *root;
1320 struct btrfs_block_rsv *block_rsv; 1326 struct btrfs_block_rsv *block_rsv;
1321 int need_requeue = 0; 1327 int total_done = 0;
1322 1328
1323 async_node = container_of(work, struct btrfs_async_delayed_node, work); 1329 async_work = container_of(work, struct btrfs_async_delayed_work, work);
1330 delayed_root = async_work->delayed_root;
1324 1331
1325 path = btrfs_alloc_path(); 1332 path = btrfs_alloc_path();
1326 if (!path) 1333 if (!path)
1327 goto out; 1334 goto out;
1328 path->leave_spinning = 1;
1329 1335
1330 delayed_node = async_node->delayed_node; 1336again:
1337 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1338 goto free_path;
1339
1340 delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1341 if (!delayed_node)
1342 goto free_path;
1343
1344 path->leave_spinning = 1;
1331 root = delayed_node->root; 1345 root = delayed_node->root;
1332 1346
1333 trans = btrfs_join_transaction(root); 1347 trans = btrfs_join_transaction(root);
1334 if (IS_ERR(trans)) 1348 if (IS_ERR(trans))
1335 goto free_path; 1349 goto release_path;
1336 1350
1337 block_rsv = trans->block_rsv; 1351 block_rsv = trans->block_rsv;
1338 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1352 trans->block_rsv = &root->fs_info->delayed_block_rsv;
@@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1363 * Task1 will sleep until the transaction is commited. 1377 * Task1 will sleep until the transaction is commited.
1364 */ 1378 */
1365 mutex_lock(&delayed_node->mutex); 1379 mutex_lock(&delayed_node->mutex);
1366 if (delayed_node->count) 1380 btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
1367 need_requeue = 1;
1368 else
1369 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1370 delayed_node);
1371 mutex_unlock(&delayed_node->mutex); 1381 mutex_unlock(&delayed_node->mutex);
1372 1382
1373 trans->block_rsv = block_rsv; 1383 trans->block_rsv = block_rsv;
1374 btrfs_end_transaction_dmeta(trans, root); 1384 btrfs_end_transaction_dmeta(trans, root);
1375 btrfs_btree_balance_dirty_nodelay(root); 1385 btrfs_btree_balance_dirty_nodelay(root);
1386
1387release_path:
1388 btrfs_release_path(path);
1389 total_done++;
1390
1391 btrfs_release_prepared_delayed_node(delayed_node);
1392 if (async_work->nr == 0 || total_done < async_work->nr)
1393 goto again;
1394
1376free_path: 1395free_path:
1377 btrfs_free_path(path); 1396 btrfs_free_path(path);
1378out: 1397out:
1379 if (need_requeue) 1398 wake_up(&delayed_root->wait);
1380 btrfs_requeue_work(&async_node->work); 1399 kfree(async_work);
1381 else {
1382 btrfs_release_prepared_delayed_node(delayed_node);
1383 kfree(async_node);
1384 }
1385} 1400}
1386 1401
1402
1387static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, 1403static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1388 struct btrfs_root *root, int all) 1404 struct btrfs_root *root, int nr)
1389{ 1405{
1390 struct btrfs_async_delayed_node *async_node; 1406 struct btrfs_async_delayed_work *async_work;
1391 struct btrfs_delayed_node *curr;
1392 int count = 0;
1393 1407
1394again: 1408 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1395 curr = btrfs_first_prepared_delayed_node(delayed_root);
1396 if (!curr)
1397 return 0; 1409 return 0;
1398 1410
1399 async_node = kmalloc(sizeof(*async_node), GFP_NOFS); 1411 async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1400 if (!async_node) { 1412 if (!async_work)
1401 btrfs_release_prepared_delayed_node(curr);
1402 return -ENOMEM; 1413 return -ENOMEM;
1403 }
1404
1405 async_node->root = root;
1406 async_node->delayed_node = curr;
1407
1408 async_node->work.func = btrfs_async_run_delayed_node_done;
1409 async_node->work.flags = 0;
1410 1414
1411 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work); 1415 async_work->delayed_root = delayed_root;
1412 count++; 1416 async_work->work.func = btrfs_async_run_delayed_root;
1413 1417 async_work->work.flags = 0;
1414 if (all || count < 4) 1418 async_work->nr = nr;
1415 goto again;
1416 1419
1420 btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
1417 return 0; 1421 return 0;
1418} 1422}
1419 1423
@@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1424 WARN_ON(btrfs_first_delayed_node(delayed_root)); 1428 WARN_ON(btrfs_first_delayed_node(delayed_root));
1425} 1429}
1426 1430
1431static int refs_newer(struct btrfs_delayed_root *delayed_root,
1432 int seq, int count)
1433{
1434 int val = atomic_read(&delayed_root->items_seq);
1435
1436 if (val < seq || val >= seq + count)
1437 return 1;
1438 return 0;
1439}
1440
1427void btrfs_balance_delayed_items(struct btrfs_root *root) 1441void btrfs_balance_delayed_items(struct btrfs_root *root)
1428{ 1442{
1429 struct btrfs_delayed_root *delayed_root; 1443 struct btrfs_delayed_root *delayed_root;
1444 int seq;
1430 1445
1431 delayed_root = btrfs_get_delayed_root(root); 1446 delayed_root = btrfs_get_delayed_root(root);
1432 1447
1433 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1448 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1434 return; 1449 return;
1435 1450
1451 seq = atomic_read(&delayed_root->items_seq);
1452
1436 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { 1453 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1437 int ret; 1454 int ret;
1438 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1); 1455 DEFINE_WAIT(__wait);
1456
1457 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
1439 if (ret) 1458 if (ret)
1440 return; 1459 return;
1441 1460
1442 wait_event_interruptible_timeout( 1461 while (1) {
1443 delayed_root->wait, 1462 prepare_to_wait(&delayed_root->wait, &__wait,
1444 (atomic_read(&delayed_root->items) < 1463 TASK_INTERRUPTIBLE);
1445 BTRFS_DELAYED_BACKGROUND), 1464
1446 HZ); 1465 if (refs_newer(delayed_root, seq,
1447 return; 1466 BTRFS_DELAYED_BATCH) ||
1467 atomic_read(&delayed_root->items) <
1468 BTRFS_DELAYED_BACKGROUND) {
1469 break;
1470 }
1471 if (!signal_pending(current))
1472 schedule();
1473 else
1474 break;
1475 }
1476 finish_wait(&delayed_root->wait, &__wait);
1448 } 1477 }
1449 1478
1450 btrfs_wq_run_delayed_node(delayed_root, root, 0); 1479 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
1451} 1480}
1452 1481
1453/* Will return 0 or -ENOMEM */ 1482/* Will return 0 or -ENOMEM */
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 78b6ad0fc669..1d5c5f7abe3e 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -43,6 +43,7 @@ struct btrfs_delayed_root {
43 */ 43 */
44 struct list_head prepare_list; 44 struct list_head prepare_list;
45 atomic_t items; /* for delayed items */ 45 atomic_t items; /* for delayed items */
46 atomic_t items_seq; /* for delayed items */
46 int nodes; /* for delayed nodes */ 47 int nodes; /* for delayed nodes */
47 wait_queue_head_t wait; 48 wait_queue_head_t wait;
48}; 49};
@@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root(
86 struct btrfs_delayed_root *delayed_root) 87 struct btrfs_delayed_root *delayed_root)
87{ 88{
88 atomic_set(&delayed_root->items, 0); 89 atomic_set(&delayed_root->items, 0);
90 atomic_set(&delayed_root->items_seq, 0);
89 delayed_root->nodes = 0; 91 delayed_root->nodes = 0;
90 spin_lock_init(&delayed_root->lock); 92 spin_lock_init(&delayed_root->lock);
91 init_waitqueue_head(&delayed_root->wait); 93 init_waitqueue_head(&delayed_root->wait);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02369a3c162e..6d19a0a554aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -62,7 +62,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
62static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 62static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
63static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 63static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
64 struct btrfs_root *root); 64 struct btrfs_root *root);
65static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); 65static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
66static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); 66static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
67static int btrfs_destroy_marked_extents(struct btrfs_root *root, 67static int btrfs_destroy_marked_extents(struct btrfs_root *root,
68 struct extent_io_tree *dirty_pages, 68 struct extent_io_tree *dirty_pages,
@@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1291 0, objectid, NULL, 0, 0, 0); 1291 0, objectid, NULL, 0, 0, 0);
1292 if (IS_ERR(leaf)) { 1292 if (IS_ERR(leaf)) {
1293 ret = PTR_ERR(leaf); 1293 ret = PTR_ERR(leaf);
1294 leaf = NULL;
1294 goto fail; 1295 goto fail;
1295 } 1296 }
1296 1297
@@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1334 1335
1335 btrfs_tree_unlock(leaf); 1336 btrfs_tree_unlock(leaf);
1336 1337
1338 return root;
1339
1337fail: 1340fail:
1338 if (ret) 1341 if (leaf) {
1339 return ERR_PTR(ret); 1342 btrfs_tree_unlock(leaf);
1343 free_extent_buffer(leaf);
1344 }
1345 kfree(root);
1340 1346
1341 return root; 1347 return ERR_PTR(ret);
1342} 1348}
1343 1349
1344static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, 1350static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
@@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3253 if (btrfs_root_refs(&root->root_item) == 0) 3259 if (btrfs_root_refs(&root->root_item) == 0)
3254 synchronize_srcu(&fs_info->subvol_srcu); 3260 synchronize_srcu(&fs_info->subvol_srcu);
3255 3261
3256 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3262 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
3257 btrfs_free_log(NULL, root); 3263 btrfs_free_log(NULL, root);
3258 btrfs_free_log_root_tree(NULL, fs_info); 3264 btrfs_free_log_root_tree(NULL, fs_info);
3259 } 3265 }
@@ -3687,7 +3693,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3687 return ret; 3693 return ret;
3688} 3694}
3689 3695
3690static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) 3696static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
3691{ 3697{
3692 struct btrfs_pending_snapshot *snapshot; 3698 struct btrfs_pending_snapshot *snapshot;
3693 struct list_head splice; 3699 struct list_head splice;
@@ -3700,10 +3706,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
3700 snapshot = list_entry(splice.next, 3706 snapshot = list_entry(splice.next,
3701 struct btrfs_pending_snapshot, 3707 struct btrfs_pending_snapshot,
3702 list); 3708 list);
3703 3709 snapshot->error = -ECANCELED;
3704 list_del_init(&snapshot->list); 3710 list_del_init(&snapshot->list);
3705
3706 kfree(snapshot);
3707 } 3711 }
3708} 3712}
3709 3713
@@ -3840,6 +3844,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3840 cur_trans->blocked = 1; 3844 cur_trans->blocked = 1;
3841 wake_up(&root->fs_info->transaction_blocked_wait); 3845 wake_up(&root->fs_info->transaction_blocked_wait);
3842 3846
3847 btrfs_evict_pending_snapshots(cur_trans);
3848
3843 cur_trans->blocked = 0; 3849 cur_trans->blocked = 0;
3844 wake_up(&root->fs_info->transaction_wait); 3850 wake_up(&root->fs_info->transaction_wait);
3845 3851
@@ -3849,8 +3855,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3849 btrfs_destroy_delayed_inodes(root); 3855 btrfs_destroy_delayed_inodes(root);
3850 btrfs_assert_delayed_root_empty(root); 3856 btrfs_assert_delayed_root_empty(root);
3851 3857
3852 btrfs_destroy_pending_snapshots(cur_trans);
3853
3854 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, 3858 btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
3855 EXTENT_DIRTY); 3859 EXTENT_DIRTY);
3856 btrfs_destroy_pinned_extent(root, 3860 btrfs_destroy_pinned_extent(root,
@@ -3894,6 +3898,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3894 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3898 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3895 wake_up(&root->fs_info->transaction_blocked_wait); 3899 wake_up(&root->fs_info->transaction_blocked_wait);
3896 3900
3901 btrfs_evict_pending_snapshots(t);
3902
3897 t->blocked = 0; 3903 t->blocked = 0;
3898 smp_mb(); 3904 smp_mb();
3899 if (waitqueue_active(&root->fs_info->transaction_wait)) 3905 if (waitqueue_active(&root->fs_info->transaction_wait))
@@ -3907,8 +3913,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3907 btrfs_destroy_delayed_inodes(root); 3913 btrfs_destroy_delayed_inodes(root);
3908 btrfs_assert_delayed_root_empty(root); 3914 btrfs_assert_delayed_root_empty(root);
3909 3915
3910 btrfs_destroy_pending_snapshots(t);
3911
3912 btrfs_destroy_delalloc_inodes(root); 3916 btrfs_destroy_delalloc_inodes(root);
3913 3917
3914 spin_lock(&root->fs_info->trans_lock); 3918 spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e074dab2d57..3d551231caba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
257 cache->bytes_super += stripe_len; 257 cache->bytes_super += stripe_len;
258 ret = add_excluded_extent(root, cache->key.objectid, 258 ret = add_excluded_extent(root, cache->key.objectid,
259 stripe_len); 259 stripe_len);
260 BUG_ON(ret); /* -ENOMEM */ 260 if (ret)
261 return ret;
261 } 262 }
262 263
263 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 264 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root,
265 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 266 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
266 cache->key.objectid, bytenr, 267 cache->key.objectid, bytenr,
267 0, &logical, &nr, &stripe_len); 268 0, &logical, &nr, &stripe_len);
268 BUG_ON(ret); /* -ENOMEM */ 269 if (ret)
270 return ret;
269 271
270 while (nr--) { 272 while (nr--) {
271 cache->bytes_super += stripe_len; 273 cache->bytes_super += stripe_len;
272 ret = add_excluded_extent(root, logical[nr], 274 ret = add_excluded_extent(root, logical[nr],
273 stripe_len); 275 stripe_len);
274 BUG_ON(ret); /* -ENOMEM */ 276 if (ret) {
277 kfree(logical);
278 return ret;
279 }
275 } 280 }
276 281
277 kfree(logical); 282 kfree(logical);
@@ -1467,8 +1472,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1467 if (ret && !insert) { 1472 if (ret && !insert) {
1468 err = -ENOENT; 1473 err = -ENOENT;
1469 goto out; 1474 goto out;
1475 } else if (ret) {
1476 err = -EIO;
1477 WARN_ON(1);
1478 goto out;
1470 } 1479 }
1471 BUG_ON(ret); /* Corruption */
1472 1480
1473 leaf = path->nodes[0]; 1481 leaf = path->nodes[0];
1474 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1482 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -4435,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4435 spin_lock(&sinfo->lock); 4443 spin_lock(&sinfo->lock);
4436 spin_lock(&block_rsv->lock); 4444 spin_lock(&block_rsv->lock);
4437 4445
4438 block_rsv->size = num_bytes; 4446 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
4439 4447
4440 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 4448 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
4441 sinfo->bytes_reserved + sinfo->bytes_readonly + 4449 sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -4790,14 +4798,49 @@ out_fail:
4790 * If the inodes csum_bytes is the same as the original 4798 * If the inodes csum_bytes is the same as the original
4791 * csum_bytes then we know we haven't raced with any free()ers 4799 * csum_bytes then we know we haven't raced with any free()ers
4792 * so we can just reduce our inodes csum bytes and carry on. 4800 * so we can just reduce our inodes csum bytes and carry on.
4793 * Otherwise we have to do the normal free thing to account for
4794 * the case that the free side didn't free up its reserve
4795 * because of this outstanding reservation.
4796 */ 4801 */
4797 if (BTRFS_I(inode)->csum_bytes == csum_bytes) 4802 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
4798 calc_csum_metadata_size(inode, num_bytes, 0); 4803 calc_csum_metadata_size(inode, num_bytes, 0);
4799 else 4804 } else {
4800 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 4805 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
4806 u64 bytes;
4807
4808 /*
4809 * This is tricky, but first we need to figure out how much we
4810 * free'd from any free-ers that occured during this
4811 * reservation, so we reset ->csum_bytes to the csum_bytes
4812 * before we dropped our lock, and then call the free for the
4813 * number of bytes that were freed while we were trying our
4814 * reservation.
4815 */
4816 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
4817 BTRFS_I(inode)->csum_bytes = csum_bytes;
4818 to_free = calc_csum_metadata_size(inode, bytes, 0);
4819
4820
4821 /*
4822 * Now we need to see how much we would have freed had we not
4823 * been making this reservation and our ->csum_bytes were not
4824 * artificially inflated.
4825 */
4826 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
4827 bytes = csum_bytes - orig_csum_bytes;
4828 bytes = calc_csum_metadata_size(inode, bytes, 0);
4829
4830 /*
4831 * Now reset ->csum_bytes to what it should be. If bytes is
4832 * more than to_free then we would have free'd more space had we
4833 * not had an artificially high ->csum_bytes, so we need to free
4834 * the remainder. If bytes is the same or less then we don't
4835 * need to do anything, the other free-ers did the correct
4836 * thing.
4837 */
4838 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
4839 if (bytes > to_free)
4840 to_free = bytes - to_free;
4841 else
4842 to_free = 0;
4843 }
4801 spin_unlock(&BTRFS_I(inode)->lock); 4844 spin_unlock(&BTRFS_I(inode)->lock);
4802 if (dropped) 4845 if (dropped)
4803 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4846 to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -7944,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7944 * info has super bytes accounted for, otherwise we'll think 7987 * info has super bytes accounted for, otherwise we'll think
7945 * we have more space than we actually do. 7988 * we have more space than we actually do.
7946 */ 7989 */
7947 exclude_super_stripes(root, cache); 7990 ret = exclude_super_stripes(root, cache);
7991 if (ret) {
7992 /*
7993 * We may have excluded something, so call this just in
7994 * case.
7995 */
7996 free_excluded_extents(root, cache);
7997 kfree(cache->free_space_ctl);
7998 kfree(cache);
7999 goto error;
8000 }
7948 8001
7949 /* 8002 /*
7950 * check for two cases, either we are full, and therefore 8003 * check for two cases, either we are full, and therefore
@@ -8086,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8086 8139
8087 cache->last_byte_to_unpin = (u64)-1; 8140 cache->last_byte_to_unpin = (u64)-1;
8088 cache->cached = BTRFS_CACHE_FINISHED; 8141 cache->cached = BTRFS_CACHE_FINISHED;
8089 exclude_super_stripes(root, cache); 8142 ret = exclude_super_stripes(root, cache);
8143 if (ret) {
8144 /*
8145 * We may have excluded something, so call this just in
8146 * case.
8147 */
8148 free_excluded_extents(root, cache);
8149 kfree(cache->free_space_ctl);
8150 kfree(cache);
8151 return ret;
8152 }
8090 8153
8091 add_new_free_space(cache, root->fs_info, chunk_offset, 8154 add_new_free_space(cache, root->fs_info, chunk_offset,
8092 chunk_offset + size); 8155 chunk_offset + size);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..cdee391fc7bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1257 GFP_NOFS); 1257 GFP_NOFS);
1258} 1258}
1259 1259
1260int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1261{
1262 unsigned long index = start >> PAGE_CACHE_SHIFT;
1263 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1264 struct page *page;
1265
1266 while (index <= end_index) {
1267 page = find_get_page(inode->i_mapping, index);
1268 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1269 clear_page_dirty_for_io(page);
1270 page_cache_release(page);
1271 index++;
1272 }
1273 return 0;
1274}
1275
1276int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1277{
1278 unsigned long index = start >> PAGE_CACHE_SHIFT;
1279 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1280 struct page *page;
1281
1282 while (index <= end_index) {
1283 page = find_get_page(inode->i_mapping, index);
1284 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1285 account_page_redirty(page);
1286 __set_page_dirty_nobuffers(page);
1287 page_cache_release(page);
1288 index++;
1289 }
1290 return 0;
1291}
1292
1260/* 1293/*
1261 * helper function to set both pages and extents in the tree writeback 1294 * helper function to set both pages and extents in the tree writeback
1262 */ 1295 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..258c92156857 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
325 unsigned long *map_len); 325 unsigned long *map_len);
326int extent_range_uptodate(struct extent_io_tree *tree, 326int extent_range_uptodate(struct extent_io_tree *tree,
327 u64 start, u64 end); 327 u64 start, u64 end);
328int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
329int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
328int extent_clear_unlock_delalloc(struct inode *inode, 330int extent_clear_unlock_delalloc(struct inode *inode,
329 struct extent_io_tree *tree, 331 struct extent_io_tree *tree,
330 u64 start, u64 end, struct page *locked_page, 332 u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ec160202be3e..c4628a201cb3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
118 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 118 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
119 csums_in_item /= csum_size; 119 csums_in_item /= csum_size;
120 120
121 if (csum_offset >= csums_in_item) { 121 if (csum_offset == csums_in_item) {
122 ret = -EFBIG; 122 ret = -EFBIG;
123 goto fail; 123 goto fail;
124 } else if (csum_offset > csums_in_item) {
125 goto fail;
124 } 126 }
125 } 127 }
126 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 128 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
728 return -ENOMEM; 730 return -ENOMEM;
729 731
730 sector_sum = sums->sums; 732 sector_sum = sums->sums;
731 trans->adding_csums = 1;
732again: 733again:
733 next_offset = (u64)-1; 734 next_offset = (u64)-1;
734 found_next = 0; 735 found_next = 0;
@@ -899,7 +900,6 @@ next_sector:
899 goto again; 900 goto again;
900 } 901 }
901out: 902out:
902 trans->adding_csums = 0;
903 btrfs_free_path(path); 903 btrfs_free_path(path);
904 return ret; 904 return ret;
905 905
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index af1d0605a5c1..ade03e6f7bd2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -591,6 +591,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
591 } 591 }
592 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 592 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
593 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 593 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
594 clear_bit(EXTENT_FLAG_LOGGING, &flags);
594 remove_extent_mapping(em_tree, em); 595 remove_extent_mapping(em_tree, em);
595 if (no_splits) 596 if (no_splits)
596 goto next; 597 goto next;
@@ -2141,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2141{ 2142{
2142 struct inode *inode = file_inode(file); 2143 struct inode *inode = file_inode(file);
2143 struct extent_state *cached_state = NULL; 2144 struct extent_state *cached_state = NULL;
2145 struct btrfs_root *root = BTRFS_I(inode)->root;
2144 u64 cur_offset; 2146 u64 cur_offset;
2145 u64 last_byte; 2147 u64 last_byte;
2146 u64 alloc_start; 2148 u64 alloc_start;
@@ -2168,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode,
2168 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); 2170 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
2169 if (ret) 2171 if (ret)
2170 return ret; 2172 return ret;
2173 if (root->fs_info->quota_enabled) {
2174 ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
2175 if (ret)
2176 goto out_reserve_fail;
2177 }
2171 2178
2172 /* 2179 /*
2173 * wait for ordered IO before we have any locks. We'll loop again 2180 * wait for ordered IO before we have any locks. We'll loop again
@@ -2271,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode,
2271 &cached_state, GFP_NOFS); 2278 &cached_state, GFP_NOFS);
2272out: 2279out:
2273 mutex_unlock(&inode->i_mutex); 2280 mutex_unlock(&inode->i_mutex);
2281 if (root->fs_info->quota_enabled)
2282 btrfs_qgroup_free(root, alloc_end - alloc_start);
2283out_reserve_fail:
2274 /* Let go of our reservation. */ 2284 /* Let go of our reservation. */
2275 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); 2285 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2276 return ret; 2286 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c226daefd65d..09c58a35b429 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
353 int i; 353 int i;
354 int will_compress; 354 int will_compress;
355 int compress_type = root->fs_info->compress_type; 355 int compress_type = root->fs_info->compress_type;
356 int redirty = 0;
356 357
357 /* if this is a small write inside eof, kick off a defrag */ 358 /* if this is a small write inside eof, kick off a defrag */
358 if ((end - start + 1) < 16 * 1024 && 359 if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
415 if (BTRFS_I(inode)->force_compress) 416 if (BTRFS_I(inode)->force_compress)
416 compress_type = BTRFS_I(inode)->force_compress; 417 compress_type = BTRFS_I(inode)->force_compress;
417 418
419 /*
420 * we need to call clear_page_dirty_for_io on each
421 * page in the range. Otherwise applications with the file
422 * mmap'd can wander in and change the page contents while
423 * we are compressing them.
424 *
425 * If the compression fails for any reason, we set the pages
426 * dirty again later on.
427 */
428 extent_range_clear_dirty_for_io(inode, start, end);
429 redirty = 1;
418 ret = btrfs_compress_pages(compress_type, 430 ret = btrfs_compress_pages(compress_type,
419 inode->i_mapping, start, 431 inode->i_mapping, start,
420 total_compressed, pages, 432 total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
554 __set_page_dirty_nobuffers(locked_page); 566 __set_page_dirty_nobuffers(locked_page);
555 /* unlocked later on in the async handlers */ 567 /* unlocked later on in the async handlers */
556 } 568 }
569 if (redirty)
570 extent_range_redirty_for_io(inode, start, end);
557 add_async_extent(async_cow, start, end - start + 1, 571 add_async_extent(async_cow, start, end - start + 1,
558 0, NULL, 0, BTRFS_COMPRESS_NONE); 572 0, NULL, 0, BTRFS_COMPRESS_NONE);
559 *num_added += 1; 573 *num_added += 1;
@@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1743 struct btrfs_ordered_sum *sum; 1757 struct btrfs_ordered_sum *sum;
1744 1758
1745 list_for_each_entry(sum, list, list) { 1759 list_for_each_entry(sum, list, list) {
1760 trans->adding_csums = 1;
1746 btrfs_csum_file_blocks(trans, 1761 btrfs_csum_file_blocks(trans,
1747 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1762 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1763 trans->adding_csums = 0;
1748 } 1764 }
1749 return 0; 1765 return 0;
1750} 1766}
@@ -2312,6 +2328,7 @@ again:
2312 key.type = BTRFS_EXTENT_DATA_KEY; 2328 key.type = BTRFS_EXTENT_DATA_KEY;
2313 key.offset = start; 2329 key.offset = start;
2314 2330
2331 path->leave_spinning = 1;
2315 if (merge) { 2332 if (merge) {
2316 struct btrfs_file_extent_item *fi; 2333 struct btrfs_file_extent_item *fi;
2317 u64 extent_len; 2334 u64 extent_len;
@@ -2368,6 +2385,7 @@ again:
2368 2385
2369 btrfs_mark_buffer_dirty(leaf); 2386 btrfs_mark_buffer_dirty(leaf);
2370 inode_add_bytes(inode, len); 2387 inode_add_bytes(inode, len);
2388 btrfs_release_path(path);
2371 2389
2372 ret = btrfs_inc_extent_ref(trans, root, new->bytenr, 2390 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2373 new->disk_len, 0, 2391 new->disk_len, 0,
@@ -2381,6 +2399,7 @@ again:
2381 ret = 1; 2399 ret = 1;
2382out_free_path: 2400out_free_path:
2383 btrfs_release_path(path); 2401 btrfs_release_path(path);
2402 path->leave_spinning = 0;
2384 btrfs_end_transaction(trans, root); 2403 btrfs_end_transaction(trans, root);
2385out_unlock: 2404out_unlock:
2386 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, 2405 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
@@ -3676,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
3676 * 1 for the dir item 3695 * 1 for the dir item
3677 * 1 for the dir index 3696 * 1 for the dir index
3678 * 1 for the inode ref 3697 * 1 for the inode ref
3679 * 1 for the inode ref in the tree log
3680 * 2 for the dir entries in the log
3681 * 1 for the inode 3698 * 1 for the inode
3682 */ 3699 */
3683 trans = btrfs_start_transaction(root, 8); 3700 trans = btrfs_start_transaction(root, 5);
3684 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 3701 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
3685 return trans; 3702 return trans;
3686 3703
@@ -8124,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8124 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items 8141 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
8125 * should cover the worst case number of items we'll modify. 8142 * should cover the worst case number of items we'll modify.
8126 */ 8143 */
8127 trans = btrfs_start_transaction(root, 20); 8144 trans = btrfs_start_transaction(root, 11);
8128 if (IS_ERR(trans)) { 8145 if (IS_ERR(trans)) {
8129 ret = PTR_ERR(trans); 8146 ret = PTR_ERR(trans);
8130 goto out_notrans; 8147 goto out_notrans;
@@ -8502,6 +8519,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8502 struct btrfs_key ins; 8519 struct btrfs_key ins;
8503 u64 cur_offset = start; 8520 u64 cur_offset = start;
8504 u64 i_size; 8521 u64 i_size;
8522 u64 cur_bytes;
8505 int ret = 0; 8523 int ret = 0;
8506 bool own_trans = true; 8524 bool own_trans = true;
8507 8525
@@ -8516,8 +8534,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8516 } 8534 }
8517 } 8535 }
8518 8536
8519 ret = btrfs_reserve_extent(trans, root, 8537 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
8520 min(num_bytes, 256ULL * 1024 * 1024), 8538 cur_bytes = max(cur_bytes, min_size);
8539 ret = btrfs_reserve_extent(trans, root, cur_bytes,
8521 min_size, 0, *alloc_hint, &ins, 1); 8540 min_size, 0, *alloc_hint, &ins, 1);
8522 if (ret) { 8541 if (ret) {
8523 if (own_trans) 8542 if (own_trans)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c83086fdda05..2c02310ff2d9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -527,6 +527,8 @@ fail:
527 if (async_transid) { 527 if (async_transid) {
528 *async_transid = trans->transid; 528 *async_transid = trans->transid;
529 err = btrfs_commit_transaction_async(trans, root, 1); 529 err = btrfs_commit_transaction_async(trans, root, 1);
530 if (err)
531 err = btrfs_commit_transaction(trans, root);
530 } else { 532 } else {
531 err = btrfs_commit_transaction(trans, root); 533 err = btrfs_commit_transaction(trans, root);
532 } 534 }
@@ -592,16 +594,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
592 *async_transid = trans->transid; 594 *async_transid = trans->transid;
593 ret = btrfs_commit_transaction_async(trans, 595 ret = btrfs_commit_transaction_async(trans,
594 root->fs_info->extent_root, 1); 596 root->fs_info->extent_root, 1);
597 if (ret)
598 ret = btrfs_commit_transaction(trans, root);
595 } else { 599 } else {
596 ret = btrfs_commit_transaction(trans, 600 ret = btrfs_commit_transaction(trans,
597 root->fs_info->extent_root); 601 root->fs_info->extent_root);
598 } 602 }
599 if (ret) { 603 if (ret)
600 /* cleanup_transaction has freed this for us */
601 if (trans->aborted)
602 pending_snapshot = NULL;
603 goto fail; 604 goto fail;
604 }
605 605
606 ret = pending_snapshot->error; 606 ret = pending_snapshot->error;
607 if (ret) 607 if (ret)
@@ -2245,13 +2245,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2245 if (ret) 2245 if (ret)
2246 return ret; 2246 return ret;
2247 2247
2248 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2249 1)) {
2250 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2251 mnt_drop_write_file(file);
2252 return -EINVAL;
2253 }
2254
2255 if (btrfs_root_readonly(root)) { 2248 if (btrfs_root_readonly(root)) {
2256 ret = -EROFS; 2249 ret = -EROFS;
2257 goto out; 2250 goto out;
@@ -2306,7 +2299,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2306 ret = -EINVAL; 2299 ret = -EINVAL;
2307 } 2300 }
2308out: 2301out:
2309 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2310 mnt_drop_write_file(file); 2302 mnt_drop_write_file(file);
2311 return ret; 2303 return ret;
2312} 2304}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index ca52681e5f40..b81e0e9a4894 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -26,7 +26,6 @@
26 26
27void btrfs_tree_lock(struct extent_buffer *eb); 27void btrfs_tree_lock(struct extent_buffer *eb);
28void btrfs_tree_unlock(struct extent_buffer *eb); 28void btrfs_tree_unlock(struct extent_buffer *eb);
29int btrfs_try_spin_lock(struct extent_buffer *eb);
30 29
31void btrfs_tree_read_lock(struct extent_buffer *eb); 30void btrfs_tree_read_lock(struct extent_buffer *eb);
32void btrfs_tree_read_unlock(struct extent_buffer *eb); 31void btrfs_tree_read_unlock(struct extent_buffer *eb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dc08d77b717e..005c45db699e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
557 INIT_LIST_HEAD(&splice); 557 INIT_LIST_HEAD(&splice);
558 INIT_LIST_HEAD(&works); 558 INIT_LIST_HEAD(&works);
559 559
560 mutex_lock(&root->fs_info->ordered_operations_mutex);
560 spin_lock(&root->fs_info->ordered_extent_lock); 561 spin_lock(&root->fs_info->ordered_extent_lock);
561 list_splice_init(&root->fs_info->ordered_extents, &splice); 562 list_splice_init(&root->fs_info->ordered_extents, &splice);
562 while (!list_empty(&splice)) { 563 while (!list_empty(&splice)) {
@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
600 601
601 cond_resched(); 602 cond_resched();
602 } 603 }
604 mutex_unlock(&root->fs_info->ordered_operations_mutex);
603} 605}
604 606
605/* 607/*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index aee4b1cc3d98..b44124dd2370 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1153 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, 1153 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
1154 sgn > 0 ? node->seq - 1 : node->seq, &roots); 1154 sgn > 0 ? node->seq - 1 : node->seq, &roots);
1155 if (ret < 0) 1155 if (ret < 0)
1156 goto out; 1156 return ret;
1157 1157
1158 spin_lock(&fs_info->qgroup_lock); 1158 spin_lock(&fs_info->qgroup_lock);
1159 quota_root = fs_info->quota_root; 1159 quota_root = fs_info->quota_root;
@@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1275 ret = 0; 1275 ret = 0;
1276unlock: 1276unlock:
1277 spin_unlock(&fs_info->qgroup_lock); 1277 spin_unlock(&fs_info->qgroup_lock);
1278out:
1279 ulist_free(roots); 1278 ulist_free(roots);
1280 ulist_free(tmp); 1279 ulist_free(tmp);
1281 1280
@@ -1525,21 +1524,23 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1525 1524
1526 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 1525 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1527 qg->reserved + qg->rfer + num_bytes > 1526 qg->reserved + qg->rfer + num_bytes >
1528 qg->max_rfer) 1527 qg->max_rfer) {
1529 ret = -EDQUOT; 1528 ret = -EDQUOT;
1529 goto out;
1530 }
1530 1531
1531 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 1532 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1532 qg->reserved + qg->excl + num_bytes > 1533 qg->reserved + qg->excl + num_bytes >
1533 qg->max_excl) 1534 qg->max_excl) {
1534 ret = -EDQUOT; 1535 ret = -EDQUOT;
1536 goto out;
1537 }
1535 1538
1536 list_for_each_entry(glist, &qg->groups, next_group) { 1539 list_for_each_entry(glist, &qg->groups, next_group) {
1537 ulist_add(ulist, glist->group->qgroupid, 1540 ulist_add(ulist, glist->group->qgroupid,
1538 (uintptr_t)glist->group, GFP_ATOMIC); 1541 (uintptr_t)glist->group, GFP_ATOMIC);
1539 } 1542 }
1540 } 1543 }
1541 if (ret)
1542 goto out;
1543 1544
1544 /* 1545 /*
1545 * no limits exceeded, now record the reservation into all qgroups 1546 * no limits exceeded, now record the reservation into all qgroups
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 50695dc5e2ab..b67171e6d688 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1269,6 +1269,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
1269 } 1269 }
1270 spin_unlock(&rc->reloc_root_tree.lock); 1270 spin_unlock(&rc->reloc_root_tree.lock);
1271 1271
1272 if (!node)
1273 return 0;
1272 BUG_ON((struct btrfs_root *)node->data != root); 1274 BUG_ON((struct btrfs_root *)node->data != root);
1273 1275
1274 if (!del) { 1276 if (!del) {
@@ -2238,13 +2240,28 @@ again:
2238} 2240}
2239 2241
2240static noinline_for_stack 2242static noinline_for_stack
2243void free_reloc_roots(struct list_head *list)
2244{
2245 struct btrfs_root *reloc_root;
2246
2247 while (!list_empty(list)) {
2248 reloc_root = list_entry(list->next, struct btrfs_root,
2249 root_list);
2250 __update_reloc_root(reloc_root, 1);
2251 free_extent_buffer(reloc_root->node);
2252 free_extent_buffer(reloc_root->commit_root);
2253 kfree(reloc_root);
2254 }
2255}
2256
2257static noinline_for_stack
2241int merge_reloc_roots(struct reloc_control *rc) 2258int merge_reloc_roots(struct reloc_control *rc)
2242{ 2259{
2243 struct btrfs_root *root; 2260 struct btrfs_root *root;
2244 struct btrfs_root *reloc_root; 2261 struct btrfs_root *reloc_root;
2245 LIST_HEAD(reloc_roots); 2262 LIST_HEAD(reloc_roots);
2246 int found = 0; 2263 int found = 0;
2247 int ret; 2264 int ret = 0;
2248again: 2265again:
2249 root = rc->extent_root; 2266 root = rc->extent_root;
2250 2267
@@ -2270,20 +2287,33 @@ again:
2270 BUG_ON(root->reloc_root != reloc_root); 2287 BUG_ON(root->reloc_root != reloc_root);
2271 2288
2272 ret = merge_reloc_root(rc, root); 2289 ret = merge_reloc_root(rc, root);
2273 BUG_ON(ret); 2290 if (ret)
2291 goto out;
2274 } else { 2292 } else {
2275 list_del_init(&reloc_root->root_list); 2293 list_del_init(&reloc_root->root_list);
2276 } 2294 }
2277 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); 2295 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
2278 BUG_ON(ret < 0); 2296 if (ret < 0) {
2297 if (list_empty(&reloc_root->root_list))
2298 list_add_tail(&reloc_root->root_list,
2299 &reloc_roots);
2300 goto out;
2301 }
2279 } 2302 }
2280 2303
2281 if (found) { 2304 if (found) {
2282 found = 0; 2305 found = 0;
2283 goto again; 2306 goto again;
2284 } 2307 }
2308out:
2309 if (ret) {
2310 btrfs_std_error(root->fs_info, ret);
2311 if (!list_empty(&reloc_roots))
2312 free_reloc_roots(&reloc_roots);
2313 }
2314
2285 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 2315 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
2286 return 0; 2316 return ret;
2287} 2317}
2288 2318
2289static void free_block_list(struct rb_root *blocks) 2319static void free_block_list(struct rb_root *blocks)
@@ -2818,8 +2848,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2818 int err = 0; 2848 int err = 0;
2819 2849
2820 path = btrfs_alloc_path(); 2850 path = btrfs_alloc_path();
2821 if (!path) 2851 if (!path) {
2822 return -ENOMEM; 2852 err = -ENOMEM;
2853 goto out_path;
2854 }
2823 2855
2824 rb_node = rb_first(blocks); 2856 rb_node = rb_first(blocks);
2825 while (rb_node) { 2857 while (rb_node) {
@@ -2858,10 +2890,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2858 rb_node = rb_next(rb_node); 2890 rb_node = rb_next(rb_node);
2859 } 2891 }
2860out: 2892out:
2861 free_block_list(blocks);
2862 err = finish_pending_nodes(trans, rc, path, err); 2893 err = finish_pending_nodes(trans, rc, path, err);
2863 2894
2864 btrfs_free_path(path); 2895 btrfs_free_path(path);
2896out_path:
2897 free_block_list(blocks);
2865 return err; 2898 return err;
2866} 2899}
2867 2900
@@ -3698,7 +3731,15 @@ int prepare_to_relocate(struct reloc_control *rc)
3698 set_reloc_control(rc); 3731 set_reloc_control(rc);
3699 3732
3700 trans = btrfs_join_transaction(rc->extent_root); 3733 trans = btrfs_join_transaction(rc->extent_root);
3701 BUG_ON(IS_ERR(trans)); 3734 if (IS_ERR(trans)) {
3735 unset_reloc_control(rc);
3736 /*
3737 * extent tree is not a ref_cow tree and has no reloc_root to
3738 * cleanup. And callers are responsible to free the above
3739 * block rsv.
3740 */
3741 return PTR_ERR(trans);
3742 }
3702 btrfs_commit_transaction(trans, rc->extent_root); 3743 btrfs_commit_transaction(trans, rc->extent_root);
3703 return 0; 3744 return 0;
3704} 3745}
@@ -3730,7 +3771,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3730 while (1) { 3771 while (1) {
3731 progress++; 3772 progress++;
3732 trans = btrfs_start_transaction(rc->extent_root, 0); 3773 trans = btrfs_start_transaction(rc->extent_root, 0);
3733 BUG_ON(IS_ERR(trans)); 3774 if (IS_ERR(trans)) {
3775 err = PTR_ERR(trans);
3776 trans = NULL;
3777 break;
3778 }
3734restart: 3779restart:
3735 if (update_backref_cache(trans, &rc->backref_cache)) { 3780 if (update_backref_cache(trans, &rc->backref_cache)) {
3736 btrfs_end_transaction(trans, rc->extent_root); 3781 btrfs_end_transaction(trans, rc->extent_root);
@@ -4264,14 +4309,9 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4264out_free: 4309out_free:
4265 kfree(rc); 4310 kfree(rc);
4266out: 4311out:
4267 while (!list_empty(&reloc_roots)) { 4312 if (!list_empty(&reloc_roots))
4268 reloc_root = list_entry(reloc_roots.next, 4313 free_reloc_roots(&reloc_roots);
4269 struct btrfs_root, root_list); 4314
4270 list_del(&reloc_root->root_list);
4271 free_extent_buffer(reloc_root->node);
4272 free_extent_buffer(reloc_root->commit_root);
4273 kfree(reloc_root);
4274 }
4275 btrfs_free_path(path); 4315 btrfs_free_path(path);
4276 4316
4277 if (err == 0) { 4317 if (err == 0) {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 53c3501fa4ca..85e072b956d5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
542 eb = path->nodes[0]; 542 eb = path->nodes[0];
543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
544 item_size = btrfs_item_size_nr(eb, path->slots[0]); 544 item_size = btrfs_item_size_nr(eb, path->slots[0]);
545 btrfs_release_path(path);
546 545
547 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
548 do { 547 do {
@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
558 ret < 0 ? -1 : ref_level, 557 ret < 0 ? -1 : ref_level,
559 ret < 0 ? -1 : ref_root); 558 ret < 0 ? -1 : ref_root);
560 } while (ret != 1); 559 } while (ret != 1);
560 btrfs_release_path(path);
561 } else { 561 } else {
562 btrfs_release_path(path);
562 swarn.path = path; 563 swarn.path = path;
563 swarn.dev = dev; 564 swarn.dev = dev;
564 iterate_extent_inodes(fs_info, found_key.objectid, 565 iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7a8b861058b..c85e7c6b4598 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3945 found_key.type != key.type) { 3945 found_key.type != key.type) {
3946 key.offset += right_len; 3946 key.offset += right_len;
3947 break; 3947 break;
3948 } else { 3948 }
3949 if (found_key.offset != key.offset + right_len) { 3949 if (found_key.offset != key.offset + right_len) {
3950 /* Should really not happen */ 3950 ret = 0;
3951 ret = -EIO; 3951 goto out;
3952 goto out;
3953 }
3954 } 3952 }
3955 key = found_key; 3953 key = found_key;
3956 } 3954 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 68a29a1ea068..f6b88595f858 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1558,6 +1558,7 @@ static struct file_system_type btrfs_fs_type = {
1558 .kill_sb = btrfs_kill_super, 1558 .kill_sb = btrfs_kill_super,
1559 .fs_flags = FS_REQUIRES_DEV, 1559 .fs_flags = FS_REQUIRES_DEV,
1560}; 1560};
1561MODULE_ALIAS_FS("btrfs");
1561 1562
1562/* 1563/*
1563 * used by btrfsctl to scan devices when no FS is mounted 1564 * used by btrfsctl to scan devices when no FS is mounted
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e52da6fb1165..50767bbaad6c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -625,14 +625,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
625 625
626 btrfs_trans_release_metadata(trans, root); 626 btrfs_trans_release_metadata(trans, root);
627 trans->block_rsv = NULL; 627 trans->block_rsv = NULL;
628 /*
629 * the same root has to be passed to start_transaction and
630 * end_transaction. Subvolume quota depends on this.
631 */
632 WARN_ON(trans->root != root);
633 628
634 if (trans->qgroup_reserved) { 629 if (trans->qgroup_reserved) {
635 btrfs_qgroup_free(root, trans->qgroup_reserved); 630 /*
631 * the same root has to be passed here between start_transaction
632 * and end_transaction. Subvolume quota depends on this.
633 */
634 btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
636 trans->qgroup_reserved = 0; 635 trans->qgroup_reserved = 0;
637 } 636 }
638 637
@@ -1052,7 +1051,12 @@ int btrfs_defrag_root(struct btrfs_root *root)
1052 1051
1053/* 1052/*
1054 * new snapshots need to be created at a very specific time in the 1053 * new snapshots need to be created at a very specific time in the
1055 * transaction commit. This does the actual creation 1054 * transaction commit. This does the actual creation.
1055 *
1056 * Note:
1057 * If the error which may affect the commitment of the current transaction
1058 * happens, we should return the error number. If the error which just affect
1059 * the creation of the pending snapshots, just return 0.
1056 */ 1060 */
1057static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 1061static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1058 struct btrfs_fs_info *fs_info, 1062 struct btrfs_fs_info *fs_info,
@@ -1071,7 +1075,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1071 struct extent_buffer *tmp; 1075 struct extent_buffer *tmp;
1072 struct extent_buffer *old; 1076 struct extent_buffer *old;
1073 struct timespec cur_time = CURRENT_TIME; 1077 struct timespec cur_time = CURRENT_TIME;
1074 int ret; 1078 int ret = 0;
1075 u64 to_reserve = 0; 1079 u64 to_reserve = 0;
1076 u64 index = 0; 1080 u64 index = 0;
1077 u64 objectid; 1081 u64 objectid;
@@ -1080,40 +1084,36 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1080 1084
1081 path = btrfs_alloc_path(); 1085 path = btrfs_alloc_path();
1082 if (!path) { 1086 if (!path) {
1083 ret = pending->error = -ENOMEM; 1087 pending->error = -ENOMEM;
1084 return ret; 1088 return 0;
1085 } 1089 }
1086 1090
1087 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 1091 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
1088 if (!new_root_item) { 1092 if (!new_root_item) {
1089 ret = pending->error = -ENOMEM; 1093 pending->error = -ENOMEM;
1090 goto root_item_alloc_fail; 1094 goto root_item_alloc_fail;
1091 } 1095 }
1092 1096
1093 ret = btrfs_find_free_objectid(tree_root, &objectid); 1097 pending->error = btrfs_find_free_objectid(tree_root, &objectid);
1094 if (ret) { 1098 if (pending->error)
1095 pending->error = ret;
1096 goto no_free_objectid; 1099 goto no_free_objectid;
1097 }
1098 1100
1099 btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); 1101 btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
1100 1102
1101 if (to_reserve > 0) { 1103 if (to_reserve > 0) {
1102 ret = btrfs_block_rsv_add(root, &pending->block_rsv, 1104 pending->error = btrfs_block_rsv_add(root,
1103 to_reserve, 1105 &pending->block_rsv,
1104 BTRFS_RESERVE_NO_FLUSH); 1106 to_reserve,
1105 if (ret) { 1107 BTRFS_RESERVE_NO_FLUSH);
1106 pending->error = ret; 1108 if (pending->error)
1107 goto no_free_objectid; 1109 goto no_free_objectid;
1108 }
1109 } 1110 }
1110 1111
1111 ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, 1112 pending->error = btrfs_qgroup_inherit(trans, fs_info,
1112 objectid, pending->inherit); 1113 root->root_key.objectid,
1113 if (ret) { 1114 objectid, pending->inherit);
1114 pending->error = ret; 1115 if (pending->error)
1115 goto no_free_objectid; 1116 goto no_free_objectid;
1116 }
1117 1117
1118 key.objectid = objectid; 1118 key.objectid = objectid;
1119 key.offset = (u64)-1; 1119 key.offset = (u64)-1;
@@ -1141,7 +1141,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1141 dentry->d_name.len, 0); 1141 dentry->d_name.len, 0);
1142 if (dir_item != NULL && !IS_ERR(dir_item)) { 1142 if (dir_item != NULL && !IS_ERR(dir_item)) {
1143 pending->error = -EEXIST; 1143 pending->error = -EEXIST;
1144 goto fail; 1144 goto dir_item_existed;
1145 } else if (IS_ERR(dir_item)) { 1145 } else if (IS_ERR(dir_item)) {
1146 ret = PTR_ERR(dir_item); 1146 ret = PTR_ERR(dir_item);
1147 btrfs_abort_transaction(trans, root, ret); 1147 btrfs_abort_transaction(trans, root, ret);
@@ -1272,6 +1272,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1272 if (ret) 1272 if (ret)
1273 btrfs_abort_transaction(trans, root, ret); 1273 btrfs_abort_transaction(trans, root, ret);
1274fail: 1274fail:
1275 pending->error = ret;
1276dir_item_existed:
1275 trans->block_rsv = rsv; 1277 trans->block_rsv = rsv;
1276 trans->bytes_reserved = 0; 1278 trans->bytes_reserved = 0;
1277no_free_objectid: 1279no_free_objectid:
@@ -1287,12 +1289,17 @@ root_item_alloc_fail:
1287static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, 1289static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1288 struct btrfs_fs_info *fs_info) 1290 struct btrfs_fs_info *fs_info)
1289{ 1291{
1290 struct btrfs_pending_snapshot *pending; 1292 struct btrfs_pending_snapshot *pending, *next;
1291 struct list_head *head = &trans->transaction->pending_snapshots; 1293 struct list_head *head = &trans->transaction->pending_snapshots;
1294 int ret = 0;
1292 1295
1293 list_for_each_entry(pending, head, list) 1296 list_for_each_entry_safe(pending, next, head, list) {
1294 create_pending_snapshot(trans, fs_info, pending); 1297 list_del(&pending->list);
1295 return 0; 1298 ret = create_pending_snapshot(trans, fs_info, pending);
1299 if (ret)
1300 break;
1301 }
1302 return ret;
1296} 1303}
1297 1304
1298static void update_super_roots(struct btrfs_root *root) 1305static void update_super_roots(struct btrfs_root *root)
@@ -1448,6 +1455,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
1448 btrfs_abort_transaction(trans, root, err); 1455 btrfs_abort_transaction(trans, root, err);
1449 1456
1450 spin_lock(&root->fs_info->trans_lock); 1457 spin_lock(&root->fs_info->trans_lock);
1458
1459 if (list_empty(&cur_trans->list)) {
1460 spin_unlock(&root->fs_info->trans_lock);
1461 btrfs_end_transaction(trans, root);
1462 return;
1463 }
1464
1451 list_del_init(&cur_trans->list); 1465 list_del_init(&cur_trans->list);
1452 if (cur_trans == root->fs_info->running_transaction) { 1466 if (cur_trans == root->fs_info->running_transaction) {
1453 root->fs_info->trans_no_join = 1; 1467 root->fs_info->trans_no_join = 1;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c7ef569eb22a..451fad96ecd1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1382,7 +1382,10 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1382 1382
1383 btrfs_release_path(path); 1383 btrfs_release_path(path);
1384 if (ret == 0) { 1384 if (ret == 0) {
1385 btrfs_inc_nlink(inode); 1385 if (!inode->i_nlink)
1386 set_nlink(inode, 1);
1387 else
1388 btrfs_inc_nlink(inode);
1386 ret = btrfs_update_inode(trans, root, inode); 1389 ret = btrfs_update_inode(trans, root, inode);
1387 } else if (ret == -EEXIST) { 1390 } else if (ret == -EEXIST) {
1388 ret = 0; 1391 ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 35bb2d4ed29f..2854c824ab64 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -684,6 +684,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
684 __btrfs_close_devices(fs_devices); 684 __btrfs_close_devices(fs_devices);
685 free_fs_devices(fs_devices); 685 free_fs_devices(fs_devices);
686 } 686 }
687 /*
688 * Wait for rcu kworkers under __btrfs_close_devices
689 * to finish all blkdev_puts so device is really
690 * free when umount is done.
691 */
692 rcu_barrier();
687 return ret; 693 return ret;
688} 694}
689 695
@@ -2379,7 +2385,11 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
2379 return ret; 2385 return ret;
2380 2386
2381 trans = btrfs_start_transaction(root, 0); 2387 trans = btrfs_start_transaction(root, 0);
2382 BUG_ON(IS_ERR(trans)); 2388 if (IS_ERR(trans)) {
2389 ret = PTR_ERR(trans);
2390 btrfs_std_error(root->fs_info, ret);
2391 return ret;
2392 }
2383 2393
2384 lock_chunks(root); 2394 lock_chunks(root);
2385 2395
@@ -3050,7 +3060,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
3050 3060
3051 unset_balance_control(fs_info); 3061 unset_balance_control(fs_info);
3052 ret = del_balance_item(fs_info->tree_root); 3062 ret = del_balance_item(fs_info->tree_root);
3053 BUG_ON(ret); 3063 if (ret)
3064 btrfs_std_error(fs_info, ret);
3054 3065
3055 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 3066 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3056} 3067}
@@ -3230,6 +3241,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3230 update_ioctl_balance_args(fs_info, 0, bargs); 3241 update_ioctl_balance_args(fs_info, 0, bargs);
3231 } 3242 }
3232 3243
3244 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
3245 balance_need_close(fs_info)) {
3246 __cancel_balance(fs_info);
3247 }
3248
3233 wake_up(&fs_info->balance_wait_q); 3249 wake_up(&fs_info->balance_wait_q);
3234 3250
3235 return ret; 3251 return ret;
@@ -4919,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
4919 em = lookup_extent_mapping(em_tree, chunk_start, 1); 4935 em = lookup_extent_mapping(em_tree, chunk_start, 1);
4920 read_unlock(&em_tree->lock); 4936 read_unlock(&em_tree->lock);
4921 4937
4922 BUG_ON(!em || em->start != chunk_start); 4938 if (!em) {
4939 printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
4940 chunk_start);
4941 return -EIO;
4942 }
4943
4944 if (em->start != chunk_start) {
4945 printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
4946 em->start, chunk_start);
4947 free_extent_map(em);
4948 return -EIO;
4949 }
4923 map = (struct map_lookup *)em->bdev; 4950 map = (struct map_lookup *)em->bdev;
4924 4951
4925 length = em->len; 4952 length = em->len;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9fe17c6c2876..6ddc0bca56b2 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -952,6 +952,7 @@ static struct file_system_type ceph_fs_type = {
952 .kill_sb = ceph_kill_sb, 952 .kill_sb = ceph_kill_sb,
953 .fs_flags = FS_RENAME_DOES_D_MOVE, 953 .fs_flags = FS_RENAME_DOES_D_MOVE,
954}; 954};
955MODULE_ALIAS_FS("ceph");
955 956
956#define _STRINGIFY(x) #x 957#define _STRINGIFY(x) #x
957#define STRINGIFY(x) _STRINGIFY(x) 958#define STRINGIFY(x) _STRINGIFY(x)
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index cfd1ce34e0bc..1d36db114772 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length,
614 } 614 }
615 } 615 }
616 616
617 /* mechlistMIC */ 617 /*
618 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 618 * We currently ignore anything at the end of the SPNEGO blob after
619 /* Check if we have reached the end of the blob, but with 619 * the mechTypes have been parsed, since none of that info is
620 no mechListMic (e.g. NTLMSSP instead of KRB5) */ 620 * used at the moment.
621 if (ctx.error == ASN1_ERR_DEC_EMPTY) 621 */
622 goto decode_negtoken_exit;
623 cFYI(1, "Error decoding last part negTokenInit exit3");
624 return 0;
625 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
626 /* tag = 3 indicating mechListMIC */
627 cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
628 cls, con, tag, end, *end);
629 return 0;
630 }
631
632 /* sequence */
633 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
634 cFYI(1, "Error decoding last part negTokenInit exit5");
635 return 0;
636 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
637 || (tag != ASN1_SEQ)) {
638 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)",
639 cls, con, tag, end, *end);
640 }
641
642 /* sequence of */
643 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
644 cFYI(1, "Error decoding last part negTokenInit exit 7");
645 return 0;
646 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
647 cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
648 cls, con, tag, end, *end);
649 return 0;
650 }
651
652 /* general string */
653 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
654 cFYI(1, "Error decoding last part negTokenInit exit9");
655 return 0;
656 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
657 || (tag != ASN1_GENSTR)) {
658 cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)",
659 cls, con, tag, end, *end);
660 return 0;
661 }
662 cFYI(1, "Need to call asn1_octets_decode() function for %s",
663 ctx.pointer); /* is this UTF-8 or ASCII? */
664decode_negtoken_exit:
665 return 1; 622 return 1;
666} 623}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1a052c0eee8e..345fc89c4286 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -91,6 +91,30 @@ struct workqueue_struct *cifsiod_wq;
91__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; 91__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
92#endif 92#endif
93 93
94/*
95 * Bumps refcount for cifs super block.
96 * Note that it should be only called if a referece to VFS super block is
97 * already held, e.g. in open-type syscalls context. Otherwise it can race with
98 * atomic_dec_and_test in deactivate_locked_super.
99 */
100void
101cifs_sb_active(struct super_block *sb)
102{
103 struct cifs_sb_info *server = CIFS_SB(sb);
104
105 if (atomic_inc_return(&server->active) == 1)
106 atomic_inc(&sb->s_active);
107}
108
109void
110cifs_sb_deactive(struct super_block *sb)
111{
112 struct cifs_sb_info *server = CIFS_SB(sb);
113
114 if (atomic_dec_and_test(&server->active))
115 deactivate_super(sb);
116}
117
94static int 118static int
95cifs_read_super(struct super_block *sb) 119cifs_read_super(struct super_block *sb)
96{ 120{
@@ -777,6 +801,7 @@ struct file_system_type cifs_fs_type = {
777 .kill_sb = cifs_kill_sb, 801 .kill_sb = cifs_kill_sb,
778 /* .fs_flags */ 802 /* .fs_flags */
779}; 803};
804MODULE_ALIAS_FS("cifs");
780const struct inode_operations cifs_dir_inode_ops = { 805const struct inode_operations cifs_dir_inode_ops = {
781 .create = cifs_create, 806 .create = cifs_create,
782 .atomic_open = cifs_atomic_open, 807 .atomic_open = cifs_atomic_open,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 7163419cecd9..0e32c3446ce9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -41,6 +41,10 @@ extern struct file_system_type cifs_fs_type;
41extern const struct address_space_operations cifs_addr_ops; 41extern const struct address_space_operations cifs_addr_ops;
42extern const struct address_space_operations cifs_addr_ops_smallbuf; 42extern const struct address_space_operations cifs_addr_ops_smallbuf;
43 43
44/* Functions related to super block operations */
45extern void cifs_sb_active(struct super_block *sb);
46extern void cifs_sb_deactive(struct super_block *sb);
47
44/* Functions related to inodes */ 48/* Functions related to inodes */
45extern const struct inode_operations cifs_dir_inode_ops; 49extern const struct inode_operations cifs_dir_inode_ops;
46extern struct inode *cifs_root_iget(struct super_block *); 50extern struct inode *cifs_root_iget(struct super_block *);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 7353bc5d73d7..8e2e799e7a24 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1909,12 +1909,12 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
1909 } while (rc == -EAGAIN); 1909 } while (rc == -EAGAIN);
1910 1910
1911 for (i = 0; i < wdata->nr_pages; i++) { 1911 for (i = 0; i < wdata->nr_pages; i++) {
1912 unlock_page(wdata->pages[i]);
1912 if (rc != 0) { 1913 if (rc != 0) {
1913 SetPageError(wdata->pages[i]); 1914 SetPageError(wdata->pages[i]);
1914 end_page_writeback(wdata->pages[i]); 1915 end_page_writeback(wdata->pages[i]);
1915 page_cache_release(wdata->pages[i]); 1916 page_cache_release(wdata->pages[i]);
1916 } 1917 }
1917 unlock_page(wdata->pages[i]);
1918 } 1918 }
1919 1919
1920 mapping_set_error(inode->i_mapping, rc); 1920 mapping_set_error(inode->i_mapping, rc);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 54125e04fd0c..991c63c6bdd0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -97,7 +97,7 @@ enum {
97 Opt_user, Opt_pass, Opt_ip, 97 Opt_user, Opt_pass, Opt_ip,
98 Opt_unc, Opt_domain, 98 Opt_unc, Opt_domain,
99 Opt_srcaddr, Opt_prefixpath, 99 Opt_srcaddr, Opt_prefixpath,
100 Opt_iocharset, Opt_sockopt, 100 Opt_iocharset,
101 Opt_netbiosname, Opt_servern, 101 Opt_netbiosname, Opt_servern,
102 Opt_ver, Opt_vers, Opt_sec, Opt_cache, 102 Opt_ver, Opt_vers, Opt_sec, Opt_cache,
103 103
@@ -202,7 +202,6 @@ static const match_table_t cifs_mount_option_tokens = {
202 { Opt_srcaddr, "srcaddr=%s" }, 202 { Opt_srcaddr, "srcaddr=%s" },
203 { Opt_prefixpath, "prefixpath=%s" }, 203 { Opt_prefixpath, "prefixpath=%s" },
204 { Opt_iocharset, "iocharset=%s" }, 204 { Opt_iocharset, "iocharset=%s" },
205 { Opt_sockopt, "sockopt=%s" },
206 { Opt_netbiosname, "netbiosname=%s" }, 205 { Opt_netbiosname, "netbiosname=%s" },
207 { Opt_servern, "servern=%s" }, 206 { Opt_servern, "servern=%s" },
208 { Opt_ver, "ver=%s" }, 207 { Opt_ver, "ver=%s" },
@@ -1752,19 +1751,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1752 */ 1751 */
1753 cFYI(1, "iocharset set to %s", string); 1752 cFYI(1, "iocharset set to %s", string);
1754 break; 1753 break;
1755 case Opt_sockopt:
1756 string = match_strdup(args);
1757 if (string == NULL)
1758 goto out_nomem;
1759
1760 if (strnicmp(string, "TCP_NODELAY", 11) == 0) {
1761 printk(KERN_WARNING "CIFS: the "
1762 "sockopt=TCP_NODELAY option has been "
1763 "deprecated and will be removed "
1764 "in 3.9\n");
1765 vol->sockopt_tcp_nodelay = 1;
1766 }
1767 break;
1768 case Opt_netbiosname: 1754 case Opt_netbiosname:
1769 string = match_strdup(args); 1755 string = match_strdup(args);
1770 if (string == NULL) 1756 if (string == NULL)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c0d85577314..7a0dd99e4507 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,6 +300,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
300 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 300 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
301 mutex_init(&cfile->fh_mutex); 301 mutex_init(&cfile->fh_mutex);
302 302
303 cifs_sb_active(inode->i_sb);
304
303 /* 305 /*
304 * If the server returned a read oplock and we have mandatory brlocks, 306 * If the server returned a read oplock and we have mandatory brlocks,
305 * set oplock level to None. 307 * set oplock level to None.
@@ -349,7 +351,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
349 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 351 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
350 struct TCP_Server_Info *server = tcon->ses->server; 352 struct TCP_Server_Info *server = tcon->ses->server;
351 struct cifsInodeInfo *cifsi = CIFS_I(inode); 353 struct cifsInodeInfo *cifsi = CIFS_I(inode);
352 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 354 struct super_block *sb = inode->i_sb;
355 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
353 struct cifsLockInfo *li, *tmp; 356 struct cifsLockInfo *li, *tmp;
354 struct cifs_fid fid; 357 struct cifs_fid fid;
355 struct cifs_pending_open open; 358 struct cifs_pending_open open;
@@ -414,6 +417,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
414 417
415 cifs_put_tlink(cifs_file->tlink); 418 cifs_put_tlink(cifs_file->tlink);
416 dput(cifs_file->dentry); 419 dput(cifs_file->dentry);
420 cifs_sb_deactive(sb);
417 kfree(cifs_file); 421 kfree(cifs_file);
418} 422}
419 423
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 83f2606c76d0..20887bf63121 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -995,6 +995,15 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
995 return PTR_ERR(tlink); 995 return PTR_ERR(tlink);
996 tcon = tlink_tcon(tlink); 996 tcon = tlink_tcon(tlink);
997 997
998 /*
999 * We cannot rename the file if the server doesn't support
1000 * CAP_INFOLEVEL_PASSTHRU
1001 */
1002 if (!(tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)) {
1003 rc = -EBUSY;
1004 goto out;
1005 }
1006
998 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, 1007 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
999 DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, 1008 DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR,
1000 &netfid, &oplock, NULL, cifs_sb->local_nls, 1009 &netfid, &oplock, NULL, cifs_sb->local_nls,
@@ -1023,7 +1032,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
1023 current->tgid); 1032 current->tgid);
1024 /* although we would like to mark the file hidden 1033 /* although we would like to mark the file hidden
1025 if that fails we will still try to rename it */ 1034 if that fails we will still try to rename it */
1026 if (rc != 0) 1035 if (!rc)
1027 cifsInode->cifsAttrs = dosattr; 1036 cifsInode->cifsAttrs = dosattr;
1028 else 1037 else
1029 dosattr = origattr; /* since not able to change them */ 1038 dosattr = origattr; /* since not able to change them */
@@ -1034,7 +1043,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
1034 cifs_sb->mnt_cifs_flags & 1043 cifs_sb->mnt_cifs_flags &
1035 CIFS_MOUNT_MAP_SPECIAL_CHR); 1044 CIFS_MOUNT_MAP_SPECIAL_CHR);
1036 if (rc != 0) { 1045 if (rc != 0) {
1037 rc = -ETXTBSY; 1046 rc = -EBUSY;
1038 goto undo_setattr; 1047 goto undo_setattr;
1039 } 1048 }
1040 1049
@@ -1053,7 +1062,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
1053 if (rc == -ENOENT) 1062 if (rc == -ENOENT)
1054 rc = 0; 1063 rc = 0;
1055 else if (rc != 0) { 1064 else if (rc != 0) {
1056 rc = -ETXTBSY; 1065 rc = -EBUSY;
1057 goto undo_rename; 1066 goto undo_rename;
1058 } 1067 }
1059 cifsInode->delete_pending = true; 1068 cifsInode->delete_pending = true;
@@ -1160,15 +1169,13 @@ psx_del_no_retry:
1160 cifs_drop_nlink(inode); 1169 cifs_drop_nlink(inode);
1161 } else if (rc == -ENOENT) { 1170 } else if (rc == -ENOENT) {
1162 d_drop(dentry); 1171 d_drop(dentry);
1163 } else if (rc == -ETXTBSY) { 1172 } else if (rc == -EBUSY) {
1164 if (server->ops->rename_pending_delete) { 1173 if (server->ops->rename_pending_delete) {
1165 rc = server->ops->rename_pending_delete(full_path, 1174 rc = server->ops->rename_pending_delete(full_path,
1166 dentry, xid); 1175 dentry, xid);
1167 if (rc == 0) 1176 if (rc == 0)
1168 cifs_drop_nlink(inode); 1177 cifs_drop_nlink(inode);
1169 } 1178 }
1170 if (rc == -ETXTBSY)
1171 rc = -EBUSY;
1172 } else if ((rc == -EACCES) && (dosattr == 0) && inode) { 1179 } else if ((rc == -EACCES) && (dosattr == 0) && inode) {
1173 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); 1180 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
1174 if (attrs == NULL) { 1181 if (attrs == NULL) {
@@ -1509,7 +1516,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
1509 * source. Note that cross directory moves do not work with 1516 * source. Note that cross directory moves do not work with
1510 * rename by filehandle to various Windows servers. 1517 * rename by filehandle to various Windows servers.
1511 */ 1518 */
1512 if (rc == 0 || rc != -ETXTBSY) 1519 if (rc == 0 || rc != -EBUSY)
1513 goto do_rename_exit; 1520 goto do_rename_exit;
1514 1521
1515 /* open-file renames don't work across directories */ 1522 /* open-file renames don't work across directories */
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index a82bc51fdc82..c0b25b28be6c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -62,7 +62,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
62 {ERRdiffdevice, -EXDEV}, 62 {ERRdiffdevice, -EXDEV},
63 {ERRnofiles, -ENOENT}, 63 {ERRnofiles, -ENOENT},
64 {ERRwriteprot, -EROFS}, 64 {ERRwriteprot, -EROFS},
65 {ERRbadshare, -ETXTBSY}, 65 {ERRbadshare, -EBUSY},
66 {ERRlock, -EACCES}, 66 {ERRlock, -EACCES},
67 {ERRunsup, -EINVAL}, 67 {ERRunsup, -EINVAL},
68 {ERRnosuchshare, -ENXIO}, 68 {ERRnosuchshare, -ENXIO},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c9c7aa7ed966..bceffe7b8f8d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -744,4 +744,5 @@ struct smb_version_values smb30_values = {
744 .cap_unix = 0, 744 .cap_unix = 0,
745 .cap_nt_find = SMB2_NT_FIND, 745 .cap_nt_find = SMB2_NT_FIND,
746 .cap_large_files = SMB2_LARGE_FILES, 746 .cap_large_files = SMB2_LARGE_FILES,
747 .oplock_read = SMB2_OPLOCK_LEVEL_II,
747}; 748};
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index dada9d0abede..4dcc0d81a7aa 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -329,4 +329,5 @@ struct file_system_type coda_fs_type = {
329 .kill_sb = kill_anon_super, 329 .kill_sb = kill_anon_super,
330 .fs_flags = FS_BINARY_MOUNTDATA, 330 .fs_flags = FS_BINARY_MOUNTDATA,
331}; 331};
332MODULE_ALIAS_FS("coda");
332 333
diff --git a/fs/compat.c b/fs/compat.c
index fe40fde29111..d487985dd0ea 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -558,6 +558,10 @@ ssize_t compat_rw_copy_check_uvector(int type,
558 } 558 }
559 *ret_pointer = iov; 559 *ret_pointer = iov;
560 560
561 ret = -EFAULT;
562 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
563 goto out;
564
561 /* 565 /*
562 * Single unix specification: 566 * Single unix specification:
563 * We should -EINVAL if an element length is not >= 0 and fitting an 567 * We should -EINVAL if an element length is not >= 0 and fitting an
@@ -1080,17 +1084,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1080 if (!file->f_op) 1084 if (!file->f_op)
1081 goto out; 1085 goto out;
1082 1086
1083 ret = -EFAULT; 1087 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
1084 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
1085 goto out;
1086
1087 tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs,
1088 UIO_FASTIOV, iovstack, &iov); 1088 UIO_FASTIOV, iovstack, &iov);
1089 if (tot_len == 0) { 1089 if (ret <= 0)
1090 ret = 0;
1091 goto out; 1090 goto out;
1092 }
1093 1091
1092 tot_len = ret;
1094 ret = rw_verify_area(type, file, pos, tot_len); 1093 ret = rw_verify_area(type, file, pos, tot_len);
1095 if (ret < 0) 1094 if (ret < 0)
1096 goto out; 1095 goto out;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index aee0a7ebbd8e..7f26c3cf75ae 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -114,6 +114,7 @@ static struct file_system_type configfs_fs_type = {
114 .mount = configfs_do_mount, 114 .mount = configfs_do_mount,
115 .kill_sb = kill_litter_super, 115 .kill_sb = kill_litter_super,
116}; 116};
117MODULE_ALIAS_FS("configfs");
117 118
118struct dentry *configfs_pin_fs(void) 119struct dentry *configfs_pin_fs(void)
119{ 120{
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 3ceb9ec976e1..35b1c7bd18b7 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -573,6 +573,7 @@ static struct file_system_type cramfs_fs_type = {
573 .kill_sb = kill_block_super, 573 .kill_sb = kill_block_super,
574 .fs_flags = FS_REQUIRES_DEV, 574 .fs_flags = FS_REQUIRES_DEV,
575}; 575};
576MODULE_ALIAS_FS("cramfs");
576 577
577static int __init init_cramfs_fs(void) 578static int __init init_cramfs_fs(void)
578{ 579{
diff --git a/fs/dcache.c b/fs/dcache.c
index fbfae008ba44..e8bc3420d63e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path,
2542 bool slash = false; 2542 bool slash = false;
2543 int error = 0; 2543 int error = 0;
2544 2544
2545 br_read_lock(&vfsmount_lock);
2546 while (dentry != root->dentry || vfsmnt != root->mnt) { 2545 while (dentry != root->dentry || vfsmnt != root->mnt) {
2547 struct dentry * parent; 2546 struct dentry * parent;
2548 2547
@@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path,
2572 if (!error && !slash) 2571 if (!error && !slash)
2573 error = prepend(buffer, buflen, "/", 1); 2572 error = prepend(buffer, buflen, "/", 1);
2574 2573
2575out:
2576 br_read_unlock(&vfsmount_lock);
2577 return error; 2574 return error;
2578 2575
2579global_root: 2576global_root:
@@ -2590,7 +2587,7 @@ global_root:
2590 error = prepend(buffer, buflen, "/", 1); 2587 error = prepend(buffer, buflen, "/", 1);
2591 if (!error) 2588 if (!error)
2592 error = is_mounted(vfsmnt) ? 1 : 2; 2589 error = is_mounted(vfsmnt) ? 1 : 2;
2593 goto out; 2590 return error;
2594} 2591}
2595 2592
2596/** 2593/**
@@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path,
2617 int error; 2614 int error;
2618 2615
2619 prepend(&res, &buflen, "\0", 1); 2616 prepend(&res, &buflen, "\0", 1);
2617 br_read_lock(&vfsmount_lock);
2620 write_seqlock(&rename_lock); 2618 write_seqlock(&rename_lock);
2621 error = prepend_path(path, root, &res, &buflen); 2619 error = prepend_path(path, root, &res, &buflen);
2622 write_sequnlock(&rename_lock); 2620 write_sequnlock(&rename_lock);
2621 br_read_unlock(&vfsmount_lock);
2623 2622
2624 if (error < 0) 2623 if (error < 0)
2625 return ERR_PTR(error); 2624 return ERR_PTR(error);
@@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path,
2636 int error; 2635 int error;
2637 2636
2638 prepend(&res, &buflen, "\0", 1); 2637 prepend(&res, &buflen, "\0", 1);
2638 br_read_lock(&vfsmount_lock);
2639 write_seqlock(&rename_lock); 2639 write_seqlock(&rename_lock);
2640 error = prepend_path(path, &root, &res, &buflen); 2640 error = prepend_path(path, &root, &res, &buflen);
2641 write_sequnlock(&rename_lock); 2641 write_sequnlock(&rename_lock);
2642 br_read_unlock(&vfsmount_lock);
2642 2643
2643 if (error > 1) 2644 if (error > 1)
2644 error = -EINVAL; 2645 error = -EINVAL;
@@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
2702 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2703 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2703 2704
2704 get_fs_root(current->fs, &root); 2705 get_fs_root(current->fs, &root);
2706 br_read_lock(&vfsmount_lock);
2705 write_seqlock(&rename_lock); 2707 write_seqlock(&rename_lock);
2706 error = path_with_deleted(path, &root, &res, &buflen); 2708 error = path_with_deleted(path, &root, &res, &buflen);
2709 write_sequnlock(&rename_lock);
2710 br_read_unlock(&vfsmount_lock);
2707 if (error < 0) 2711 if (error < 0)
2708 res = ERR_PTR(error); 2712 res = ERR_PTR(error);
2709 write_sequnlock(&rename_lock);
2710 path_put(&root); 2713 path_put(&root);
2711 return res; 2714 return res;
2712} 2715}
@@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2830 get_fs_root_and_pwd(current->fs, &root, &pwd); 2833 get_fs_root_and_pwd(current->fs, &root, &pwd);
2831 2834
2832 error = -ENOENT; 2835 error = -ENOENT;
2836 br_read_lock(&vfsmount_lock);
2833 write_seqlock(&rename_lock); 2837 write_seqlock(&rename_lock);
2834 if (!d_unlinked(pwd.dentry)) { 2838 if (!d_unlinked(pwd.dentry)) {
2835 unsigned long len; 2839 unsigned long len;
@@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2839 prepend(&cwd, &buflen, "\0", 1); 2843 prepend(&cwd, &buflen, "\0", 1);
2840 error = prepend_path(&pwd, &root, &cwd, &buflen); 2844 error = prepend_path(&pwd, &root, &cwd, &buflen);
2841 write_sequnlock(&rename_lock); 2845 write_sequnlock(&rename_lock);
2846 br_read_unlock(&vfsmount_lock);
2842 2847
2843 if (error < 0) 2848 if (error < 0)
2844 goto out; 2849 goto out;
@@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2859 } 2864 }
2860 } else { 2865 } else {
2861 write_sequnlock(&rename_lock); 2866 write_sequnlock(&rename_lock);
2867 br_read_unlock(&vfsmount_lock);
2862 } 2868 }
2863 2869
2864out: 2870out:
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 0c4f80b447fb..4888cb3fdef7 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -299,6 +299,7 @@ static struct file_system_type debug_fs_type = {
299 .mount = debug_mount, 299 .mount = debug_mount,
300 .kill_sb = kill_litter_super, 300 .kill_sb = kill_litter_super,
301}; 301};
302MODULE_ALIAS_FS("debugfs");
302 303
303static struct dentry *__create_file(const char *name, umode_t mode, 304static struct dentry *__create_file(const char *name, umode_t mode,
304 struct dentry *parent, void *data, 305 struct dentry *parent, void *data,
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index e15ef38c24fa..434aa313f077 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -12,3 +12,11 @@ config ECRYPT_FS
12 12
13 To compile this file system support as a module, choose M here: the 13 To compile this file system support as a module, choose M here: the
14 module will be called ecryptfs. 14 module will be called ecryptfs.
15
16config ECRYPT_FS_MESSAGING
17 bool "Enable notifications for userspace key wrap/unwrap"
18 depends on ECRYPT_FS
19 help
20 Enables the /dev/ecryptfs entry for use by ecryptfsd. This allows
21 for userspace to wrap/unwrap file encryption keys by other
22 backends, like OpenSSL.
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 2cc9ee4ad2eb..49678a69947d 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -1,7 +1,10 @@
1# 1#
2# Makefile for the Linux 2.6 eCryptfs 2# Makefile for the Linux eCryptfs
3# 3#
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o 7ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \
8 crypto.o keystore.o kthread.o debug.o
9
10ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a7b0c2dfb3db..d5c25db4398f 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -301,17 +301,14 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
301 while (size > 0 && i < sg_size) { 301 while (size > 0 && i < sg_size) {
302 pg = virt_to_page(addr); 302 pg = virt_to_page(addr);
303 offset = offset_in_page(addr); 303 offset = offset_in_page(addr);
304 if (sg) 304 sg_set_page(&sg[i], pg, 0, offset);
305 sg_set_page(&sg[i], pg, 0, offset);
306 remainder_of_page = PAGE_CACHE_SIZE - offset; 305 remainder_of_page = PAGE_CACHE_SIZE - offset;
307 if (size >= remainder_of_page) { 306 if (size >= remainder_of_page) {
308 if (sg) 307 sg[i].length = remainder_of_page;
309 sg[i].length = remainder_of_page;
310 addr += remainder_of_page; 308 addr += remainder_of_page;
311 size -= remainder_of_page; 309 size -= remainder_of_page;
312 } else { 310 } else {
313 if (sg) 311 sg[i].length = size;
314 sg[i].length = size;
315 addr += size; 312 addr += size;
316 size = 0; 313 size = 0;
317 } 314 }
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 1b5d9af937df..bf12ba5dd223 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -45,14 +45,12 @@
45static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) 45static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
46{ 46{
47 struct dentry *lower_dentry; 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt;
49 int rc = 1; 48 int rc = 1;
50 49
51 if (flags & LOOKUP_RCU) 50 if (flags & LOOKUP_RCU)
52 return -ECHILD; 51 return -ECHILD;
53 52
54 lower_dentry = ecryptfs_dentry_to_lower(dentry); 53 lower_dentry = ecryptfs_dentry_to_lower(dentry);
55 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
56 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 54 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
57 goto out; 55 goto out;
58 rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); 56 rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 7e2c6f5d7985..dd299b389d4e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -172,6 +172,19 @@ ecryptfs_get_key_payload_data(struct key *key)
172#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24 172#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24
173#define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32) 173#define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32)
174 174
175#ifdef CONFIG_ECRYPT_FS_MESSAGING
176# define ECRYPTFS_VERSIONING_MASK_MESSAGING (ECRYPTFS_VERSIONING_DEVMISC \
177 | ECRYPTFS_VERSIONING_PUBKEY)
178#else
179# define ECRYPTFS_VERSIONING_MASK_MESSAGING 0
180#endif
181
182#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
183 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
184 | ECRYPTFS_VERSIONING_XATTR \
185 | ECRYPTFS_VERSIONING_MULTKEY \
186 | ECRYPTFS_VERSIONING_MASK_MESSAGING \
187 | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
175struct ecryptfs_key_sig { 188struct ecryptfs_key_sig {
176 struct list_head crypt_stat_list; 189 struct list_head crypt_stat_list;
177 char keysig[ECRYPTFS_SIG_SIZE_HEX + 1]; 190 char keysig[ECRYPTFS_SIG_SIZE_HEX + 1];
@@ -399,7 +412,9 @@ struct ecryptfs_daemon {
399 struct hlist_node euid_chain; 412 struct hlist_node euid_chain;
400}; 413};
401 414
415#ifdef CONFIG_ECRYPT_FS_MESSAGING
402extern struct mutex ecryptfs_daemon_hash_mux; 416extern struct mutex ecryptfs_daemon_hash_mux;
417#endif
403 418
404static inline size_t 419static inline size_t
405ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) 420ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat)
@@ -610,6 +625,7 @@ int
610ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 625ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
611 size_t size, int flags); 626 size_t size, int flags);
612int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); 627int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
628#ifdef CONFIG_ECRYPT_FS_MESSAGING
613int ecryptfs_process_response(struct ecryptfs_daemon *daemon, 629int ecryptfs_process_response(struct ecryptfs_daemon *daemon,
614 struct ecryptfs_message *msg, u32 seq); 630 struct ecryptfs_message *msg, u32 seq);
615int ecryptfs_send_message(char *data, int data_len, 631int ecryptfs_send_message(char *data, int data_len,
@@ -618,6 +634,24 @@ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
618 struct ecryptfs_message **emsg); 634 struct ecryptfs_message **emsg);
619int ecryptfs_init_messaging(void); 635int ecryptfs_init_messaging(void);
620void ecryptfs_release_messaging(void); 636void ecryptfs_release_messaging(void);
637#else
638static inline int ecryptfs_init_messaging(void)
639{
640 return 0;
641}
642static inline void ecryptfs_release_messaging(void)
643{ }
644static inline int ecryptfs_send_message(char *data, int data_len,
645 struct ecryptfs_msg_ctx **msg_ctx)
646{
647 return -ENOTCONN;
648}
649static inline int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
650 struct ecryptfs_message **emsg)
651{
652 return -ENOMSG;
653}
654#endif
621 655
622void 656void
623ecryptfs_write_header_metadata(char *virt, 657ecryptfs_write_header_metadata(char *virt,
@@ -655,12 +689,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
655 size_t offset_in_page, size_t size, 689 size_t offset_in_page, size_t size,
656 struct inode *ecryptfs_inode); 690 struct inode *ecryptfs_inode);
657struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index); 691struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index);
658int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
659int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon);
660int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, 692int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
661 size_t *length_size); 693 size_t *length_size);
662int ecryptfs_write_packet_length(char *dest, size_t size, 694int ecryptfs_write_packet_length(char *dest, size_t size,
663 size_t *packet_size_length); 695 size_t *packet_size_length);
696#ifdef CONFIG_ECRYPT_FS_MESSAGING
664int ecryptfs_init_ecryptfs_miscdev(void); 697int ecryptfs_init_ecryptfs_miscdev(void);
665void ecryptfs_destroy_ecryptfs_miscdev(void); 698void ecryptfs_destroy_ecryptfs_miscdev(void);
666int ecryptfs_send_miscdev(char *data, size_t data_size, 699int ecryptfs_send_miscdev(char *data, size_t data_size,
@@ -669,6 +702,9 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
669void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); 702void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
670int 703int
671ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file); 704ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file);
705int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
706int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon);
707#endif
672int ecryptfs_init_kthread(void); 708int ecryptfs_init_kthread(void);
673void ecryptfs_destroy_kthread(void); 709void ecryptfs_destroy_kthread(void);
674int ecryptfs_privileged_open(struct file **lower_file, 710int ecryptfs_privileged_open(struct file **lower_file,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 53acc9d0c138..63b1f54b6a1f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -199,7 +199,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
199 struct dentry *ecryptfs_dentry = file->f_path.dentry; 199 struct dentry *ecryptfs_dentry = file->f_path.dentry;
200 /* Private value of ecryptfs_dentry allocated in 200 /* Private value of ecryptfs_dentry allocated in
201 * ecryptfs_lookup() */ 201 * ecryptfs_lookup() */
202 struct dentry *lower_dentry;
203 struct ecryptfs_file_info *file_info; 202 struct ecryptfs_file_info *file_info;
204 203
205 mount_crypt_stat = &ecryptfs_superblock_to_private( 204 mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -222,7 +221,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
222 rc = -ENOMEM; 221 rc = -ENOMEM;
223 goto out; 222 goto out;
224 } 223 }
225 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
226 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; 224 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
227 mutex_lock(&crypt_stat->cs_mutex); 225 mutex_lock(&crypt_stat->cs_mutex);
228 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) { 226 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e0f07fb6d56b..5eab400e2590 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -999,8 +999,8 @@ out:
999 return rc; 999 return rc;
1000} 1000}
1001 1001
1002int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, 1002static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
1003 struct kstat *stat) 1003 struct kstat *stat)
1004{ 1004{
1005 struct ecryptfs_mount_crypt_stat *mount_crypt_stat; 1005 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
1006 int rc = 0; 1006 int rc = 0;
@@ -1021,8 +1021,8 @@ int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
1021 return rc; 1021 return rc;
1022} 1022}
1023 1023
1024int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1024static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1025 struct kstat *stat) 1025 struct kstat *stat)
1026{ 1026{
1027 struct kstat lower_stat; 1027 struct kstat lower_stat;
1028 int rc; 1028 int rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 2333203a120b..7d52806c2119 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1150,7 +1150,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1150 struct ecryptfs_message *msg = NULL; 1150 struct ecryptfs_message *msg = NULL;
1151 char *auth_tok_sig; 1151 char *auth_tok_sig;
1152 char *payload; 1152 char *payload;
1153 size_t payload_len; 1153 size_t payload_len = 0;
1154 int rc; 1154 int rc;
1155 1155
1156 rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); 1156 rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
@@ -1168,7 +1168,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1168 rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); 1168 rc = ecryptfs_send_message(payload, payload_len, &msg_ctx);
1169 if (rc) { 1169 if (rc) {
1170 ecryptfs_printk(KERN_ERR, "Error sending message to " 1170 ecryptfs_printk(KERN_ERR, "Error sending message to "
1171 "ecryptfsd\n"); 1171 "ecryptfsd: %d\n", rc);
1172 goto out; 1172 goto out;
1173 } 1173 }
1174 rc = ecryptfs_wait_for_response(msg_ctx, &msg); 1174 rc = ecryptfs_wait_for_response(msg_ctx, &msg);
@@ -1202,8 +1202,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1202 crypt_stat->key_size); 1202 crypt_stat->key_size);
1203 } 1203 }
1204out: 1204out:
1205 if (msg) 1205 kfree(msg);
1206 kfree(msg);
1207 return rc; 1206 return rc;
1208} 1207}
1209 1208
@@ -1989,7 +1988,7 @@ pki_encrypt_session_key(struct key *auth_tok_key,
1989 rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); 1988 rc = ecryptfs_send_message(payload, payload_len, &msg_ctx);
1990 if (rc) { 1989 if (rc) {
1991 ecryptfs_printk(KERN_ERR, "Error sending message to " 1990 ecryptfs_printk(KERN_ERR, "Error sending message to "
1992 "ecryptfsd\n"); 1991 "ecryptfsd: %d\n", rc);
1993 goto out; 1992 goto out;
1994 } 1993 }
1995 rc = ecryptfs_wait_for_response(msg_ctx, &msg); 1994 rc = ecryptfs_wait_for_response(msg_ctx, &msg);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 4e0886c9e5c4..e924cf45aad9 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -629,6 +629,7 @@ static struct file_system_type ecryptfs_fs_type = {
629 .kill_sb = ecryptfs_kill_block_super, 629 .kill_sb = ecryptfs_kill_block_super,
630 .fs_flags = 0 630 .fs_flags = 0
631}; 631};
632MODULE_ALIAS_FS("ecryptfs");
632 633
633/** 634/**
634 * inode_info_init_once 635 * inode_info_init_once
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 8d7a577ae497..49ff8ea08f1c 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -97,8 +97,7 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
97void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) 97void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
98{ 98{
99 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); 99 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
100 if (msg_ctx->msg) 100 kfree(msg_ctx->msg);
101 kfree(msg_ctx->msg);
102 msg_ctx->msg = NULL; 101 msg_ctx->msg = NULL;
103 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; 102 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
104} 103}
@@ -283,7 +282,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
283 int rc; 282 int rc;
284 283
285 rc = ecryptfs_find_daemon_by_euid(&daemon); 284 rc = ecryptfs_find_daemon_by_euid(&daemon);
286 if (rc || !daemon) { 285 if (rc) {
287 rc = -ENOTCONN; 286 rc = -ENOTCONN;
288 goto out; 287 goto out;
289 } 288 }
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 2002431ef9a0..c6f57a74a559 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -33,6 +33,7 @@ static struct file_system_type efs_fs_type = {
33 .kill_sb = kill_block_super, 33 .kill_sb = kill_block_super,
34 .fs_flags = FS_REQUIRES_DEV, 34 .fs_flags = FS_REQUIRES_DEV,
35}; 35};
36MODULE_ALIAS_FS("efs");
36 37
37static struct pt_types sgi_pt_types[] = { 38static struct pt_types sgi_pt_types[] = {
38 {0x00, "SGI vh"}, 39 {0x00, "SGI vh"},
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 5e59280d42d7..9d9763328734 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -1010,6 +1010,7 @@ static struct file_system_type exofs_type = {
1010 .mount = exofs_mount, 1010 .mount = exofs_mount,
1011 .kill_sb = generic_shutdown_super, 1011 .kill_sb = generic_shutdown_super,
1012}; 1012};
1013MODULE_ALIAS_FS("exofs");
1013 1014
1014static int __init init_exofs(void) 1015static int __init init_exofs(void)
1015{ 1016{
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 8f370e012e61..7cadd823bb31 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -118,7 +118,6 @@ void ext2_free_inode (struct inode * inode)
118 * as writing the quota to disk may need the lock as well. 118 * as writing the quota to disk may need the lock as well.
119 */ 119 */
120 /* Quota is already initialized in iput() */ 120 /* Quota is already initialized in iput() */
121 ext2_xattr_delete_inode(inode);
122 dquot_free_inode(inode); 121 dquot_free_inode(inode);
123 dquot_drop(inode); 122 dquot_drop(inode);
124 123
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c3881e56662e..fe60cc1117d8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -34,6 +34,7 @@
34#include "ext2.h" 34#include "ext2.h"
35#include "acl.h" 35#include "acl.h"
36#include "xip.h" 36#include "xip.h"
37#include "xattr.h"
37 38
38static int __ext2_write_inode(struct inode *inode, int do_sync); 39static int __ext2_write_inode(struct inode *inode, int do_sync);
39 40
@@ -88,6 +89,7 @@ void ext2_evict_inode(struct inode * inode)
88 inode->i_size = 0; 89 inode->i_size = 0;
89 if (inode->i_blocks) 90 if (inode->i_blocks)
90 ext2_truncate_blocks(inode, 0); 91 ext2_truncate_blocks(inode, 0);
92 ext2_xattr_delete_inode(inode);
91 } 93 }
92 94
93 invalidate_inode_buffers(inode); 95 invalidate_inode_buffers(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7f68c8114026..288534920fe5 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1536,6 +1536,7 @@ static struct file_system_type ext2_fs_type = {
1536 .kill_sb = kill_block_super, 1536 .kill_sb = kill_block_super,
1537 .fs_flags = FS_REQUIRES_DEV, 1537 .fs_flags = FS_REQUIRES_DEV,
1538}; 1538};
1539MODULE_ALIAS_FS("ext2");
1539 1540
1540static int __init init_ext2_fs(void) 1541static int __init init_ext2_fs(void)
1541{ 1542{
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 5546ca225ffe..fb5120a5505c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -353,7 +353,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
353 return bdev; 353 return bdev;
354 354
355fail: 355fail:
356 ext3_msg(sb, "error: failed to open journal device %s: %ld", 356 ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld",
357 __bdevname(dev, b), PTR_ERR(bdev)); 357 __bdevname(dev, b), PTR_ERR(bdev));
358 358
359 return NULL; 359 return NULL;
@@ -887,7 +887,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
887 /*todo: use simple_strtoll with >32bit ext3 */ 887 /*todo: use simple_strtoll with >32bit ext3 */
888 sb_block = simple_strtoul(options, &options, 0); 888 sb_block = simple_strtoul(options, &options, 0);
889 if (*options && *options != ',') { 889 if (*options && *options != ',') {
890 ext3_msg(sb, "error: invalid sb specification: %s", 890 ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s",
891 (char *) *data); 891 (char *) *data);
892 return 1; 892 return 1;
893 } 893 }
@@ -3068,6 +3068,7 @@ static struct file_system_type ext3_fs_type = {
3068 .kill_sb = kill_block_super, 3068 .kill_sb = kill_block_super,
3069 .fs_flags = FS_REQUIRES_DEV, 3069 .fs_flags = FS_REQUIRES_DEV,
3070}; 3070};
3071MODULE_ALIAS_FS("ext3");
3071 3072
3072static int __init init_ext3_fs(void) 3073static int __init init_ext3_fs(void)
3073{ 3074{
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4a01ba315262..3b83cd604796 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -335,9 +335,9 @@ struct ext4_group_desc
335 */ 335 */
336 336
337struct flex_groups { 337struct flex_groups {
338 atomic_t free_inodes; 338 atomic64_t free_clusters;
339 atomic_t free_clusters; 339 atomic_t free_inodes;
340 atomic_t used_dirs; 340 atomic_t used_dirs;
341}; 341};
342 342
343#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ 343#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2617extern int __init ext4_init_pageio(void); 2617extern int __init ext4_init_pageio(void);
2618extern void ext4_add_complete_io(ext4_io_end_t *io_end); 2618extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2619extern void ext4_exit_pageio(void); 2619extern void ext4_exit_pageio(void);
2620extern void ext4_ioend_wait(struct inode *); 2620extern void ext4_ioend_shutdown(struct inode *);
2621extern void ext4_free_io_end(ext4_io_end_t *io); 2621extern void ext4_free_io_end(ext4_io_end_t *io);
2622extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2622extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2623extern void ext4_end_io_work(struct work_struct *work); 2623extern void ext4_end_io_work(struct work_struct *work);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 28dd8eeea6a9..56efcaadf848 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1584 unsigned short ext1_ee_len, ext2_ee_len, max_len; 1584 unsigned short ext1_ee_len, ext2_ee_len, max_len;
1585 1585
1586 /* 1586 /*
1587 * Make sure that either both extents are uninitialized, or 1587 * Make sure that both extents are initialized. We don't merge
1588 * both are _not_. 1588 * uninitialized extents so that we can be sure that end_io code has
1589 * the extent that was written properly split out and conversion to
1590 * initialized is trivial.
1589 */ 1591 */
1590 if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) 1592 if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
1591 return 0; 1593 return 0;
1592 1594
1593 if (ext4_ext_is_uninitialized(ex1)) 1595 if (ext4_ext_is_uninitialized(ex1))
@@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
2923{ 2925{
2924 ext4_fsblk_t newblock; 2926 ext4_fsblk_t newblock;
2925 ext4_lblk_t ee_block; 2927 ext4_lblk_t ee_block;
2926 struct ext4_extent *ex, newex, orig_ex; 2928 struct ext4_extent *ex, newex, orig_ex, zero_ex;
2927 struct ext4_extent *ex2 = NULL; 2929 struct ext4_extent *ex2 = NULL;
2928 unsigned int ee_len, depth; 2930 unsigned int ee_len, depth;
2929 int err = 0; 2931 int err = 0;
@@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle,
2943 newblock = split - ee_block + ext4_ext_pblock(ex); 2945 newblock = split - ee_block + ext4_ext_pblock(ex);
2944 2946
2945 BUG_ON(split < ee_block || split >= (ee_block + ee_len)); 2947 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
2948 BUG_ON(!ext4_ext_is_uninitialized(ex) &&
2949 split_flag & (EXT4_EXT_MAY_ZEROOUT |
2950 EXT4_EXT_MARK_UNINIT1 |
2951 EXT4_EXT_MARK_UNINIT2));
2946 2952
2947 err = ext4_ext_get_access(handle, inode, path + depth); 2953 err = ext4_ext_get_access(handle, inode, path + depth);
2948 if (err) 2954 if (err)
@@ -2990,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
2990 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 2996 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
2991 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2997 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2992 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { 2998 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2993 if (split_flag & EXT4_EXT_DATA_VALID1) 2999 if (split_flag & EXT4_EXT_DATA_VALID1) {
2994 err = ext4_ext_zeroout(inode, ex2); 3000 err = ext4_ext_zeroout(inode, ex2);
2995 else 3001 zero_ex.ee_block = ex2->ee_block;
3002 zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
3003 ext4_ext_store_pblock(&zero_ex,
3004 ext4_ext_pblock(ex2));
3005 } else {
2996 err = ext4_ext_zeroout(inode, ex); 3006 err = ext4_ext_zeroout(inode, ex);
2997 } else 3007 zero_ex.ee_block = ex->ee_block;
3008 zero_ex.ee_len = ext4_ext_get_actual_len(ex);
3009 ext4_ext_store_pblock(&zero_ex,
3010 ext4_ext_pblock(ex));
3011 }
3012 } else {
2998 err = ext4_ext_zeroout(inode, &orig_ex); 3013 err = ext4_ext_zeroout(inode, &orig_ex);
3014 zero_ex.ee_block = orig_ex.ee_block;
3015 zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
3016 ext4_ext_store_pblock(&zero_ex,
3017 ext4_ext_pblock(&orig_ex));
3018 }
2999 3019
3000 if (err) 3020 if (err)
3001 goto fix_extent_len; 3021 goto fix_extent_len;
@@ -3003,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
3003 ex->ee_len = cpu_to_le16(ee_len); 3023 ex->ee_len = cpu_to_le16(ee_len);
3004 ext4_ext_try_to_merge(handle, inode, path, ex); 3024 ext4_ext_try_to_merge(handle, inode, path, ex);
3005 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 3025 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3026 if (err)
3027 goto fix_extent_len;
3028
3029 /* update extent status tree */
3030 err = ext4_es_zeroout(inode, &zero_ex);
3031
3006 goto out; 3032 goto out;
3007 } else if (err) 3033 } else if (err)
3008 goto fix_extent_len; 3034 goto fix_extent_len;
@@ -3041,6 +3067,7 @@ static int ext4_split_extent(handle_t *handle,
3041 int err = 0; 3067 int err = 0;
3042 int uninitialized; 3068 int uninitialized;
3043 int split_flag1, flags1; 3069 int split_flag1, flags1;
3070 int allocated = map->m_len;
3044 3071
3045 depth = ext_depth(inode); 3072 depth = ext_depth(inode);
3046 ex = path[depth].p_ext; 3073 ex = path[depth].p_ext;
@@ -3060,20 +3087,29 @@ static int ext4_split_extent(handle_t *handle,
3060 map->m_lblk + map->m_len, split_flag1, flags1); 3087 map->m_lblk + map->m_len, split_flag1, flags1);
3061 if (err) 3088 if (err)
3062 goto out; 3089 goto out;
3090 } else {
3091 allocated = ee_len - (map->m_lblk - ee_block);
3063 } 3092 }
3064 3093 /*
3094 * Update path is required because previous ext4_split_extent_at() may
3095 * result in split of original leaf or extent zeroout.
3096 */
3065 ext4_ext_drop_refs(path); 3097 ext4_ext_drop_refs(path);
3066 path = ext4_ext_find_extent(inode, map->m_lblk, path); 3098 path = ext4_ext_find_extent(inode, map->m_lblk, path);
3067 if (IS_ERR(path)) 3099 if (IS_ERR(path))
3068 return PTR_ERR(path); 3100 return PTR_ERR(path);
3101 depth = ext_depth(inode);
3102 ex = path[depth].p_ext;
3103 uninitialized = ext4_ext_is_uninitialized(ex);
3104 split_flag1 = 0;
3069 3105
3070 if (map->m_lblk >= ee_block) { 3106 if (map->m_lblk >= ee_block) {
3071 split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | 3107 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3072 EXT4_EXT_DATA_VALID2); 3108 if (uninitialized) {
3073 if (uninitialized)
3074 split_flag1 |= EXT4_EXT_MARK_UNINIT1; 3109 split_flag1 |= EXT4_EXT_MARK_UNINIT1;
3075 if (split_flag & EXT4_EXT_MARK_UNINIT2) 3110 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3076 split_flag1 |= EXT4_EXT_MARK_UNINIT2; 3111 EXT4_EXT_MARK_UNINIT2);
3112 }
3077 err = ext4_split_extent_at(handle, inode, path, 3113 err = ext4_split_extent_at(handle, inode, path,
3078 map->m_lblk, split_flag1, flags); 3114 map->m_lblk, split_flag1, flags);
3079 if (err) 3115 if (err)
@@ -3082,7 +3118,7 @@ static int ext4_split_extent(handle_t *handle,
3082 3118
3083 ext4_ext_show_leaf(inode, path); 3119 ext4_ext_show_leaf(inode, path);
3084out: 3120out:
3085 return err ? err : map->m_len; 3121 return err ? err : allocated;
3086} 3122}
3087 3123
3088/* 3124/*
@@ -3137,6 +3173,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3137 ee_block = le32_to_cpu(ex->ee_block); 3173 ee_block = le32_to_cpu(ex->ee_block);
3138 ee_len = ext4_ext_get_actual_len(ex); 3174 ee_len = ext4_ext_get_actual_len(ex);
3139 allocated = ee_len - (map->m_lblk - ee_block); 3175 allocated = ee_len - (map->m_lblk - ee_block);
3176 zero_ex.ee_len = 0;
3140 3177
3141 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); 3178 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3142 3179
@@ -3227,13 +3264,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3227 3264
3228 if (EXT4_EXT_MAY_ZEROOUT & split_flag) 3265 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3229 max_zeroout = sbi->s_extent_max_zeroout_kb >> 3266 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3230 inode->i_sb->s_blocksize_bits; 3267 (inode->i_sb->s_blocksize_bits - 10);
3231 3268
3232 /* If extent is less than s_max_zeroout_kb, zeroout directly */ 3269 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3233 if (max_zeroout && (ee_len <= max_zeroout)) { 3270 if (max_zeroout && (ee_len <= max_zeroout)) {
3234 err = ext4_ext_zeroout(inode, ex); 3271 err = ext4_ext_zeroout(inode, ex);
3235 if (err) 3272 if (err)
3236 goto out; 3273 goto out;
3274 zero_ex.ee_block = ex->ee_block;
3275 zero_ex.ee_len = ext4_ext_get_actual_len(ex);
3276 ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
3237 3277
3238 err = ext4_ext_get_access(handle, inode, path + depth); 3278 err = ext4_ext_get_access(handle, inode, path + depth);
3239 if (err) 3279 if (err)
@@ -3292,6 +3332,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3292 err = allocated; 3332 err = allocated;
3293 3333
3294out: 3334out:
3335 /* If we have gotten a failure, don't zero out status tree */
3336 if (!err)
3337 err = ext4_es_zeroout(inode, &zero_ex);
3295 return err ? err : allocated; 3338 return err ? err : allocated;
3296} 3339}
3297 3340
@@ -3374,8 +3417,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3374 "block %llu, max_blocks %u\n", inode->i_ino, 3417 "block %llu, max_blocks %u\n", inode->i_ino,
3375 (unsigned long long)ee_block, ee_len); 3418 (unsigned long long)ee_block, ee_len);
3376 3419
3377 /* If extent is larger than requested then split is required */ 3420 /* If extent is larger than requested it is a clear sign that we still
3421 * have some extent state machine issues left. So extent_split is still
3422 * required.
3423 * TODO: Once all related issues will be fixed this situation should be
3424 * illegal.
3425 */
3378 if (ee_block != map->m_lblk || ee_len > map->m_len) { 3426 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3427#ifdef EXT4_DEBUG
3428 ext4_warning("Inode (%ld) finished: extent logical block %llu,"
3429 " len %u; IO logical block %llu, len %u\n",
3430 inode->i_ino, (unsigned long long)ee_block, ee_len,
3431 (unsigned long long)map->m_lblk, map->m_len);
3432#endif
3379 err = ext4_split_unwritten_extents(handle, inode, map, path, 3433 err = ext4_split_unwritten_extents(handle, inode, map, path,
3380 EXT4_GET_BLOCKS_CONVERT); 3434 EXT4_GET_BLOCKS_CONVERT);
3381 if (err < 0) 3435 if (err < 0)
@@ -3626,6 +3680,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3626 path, map->m_len); 3680 path, map->m_len);
3627 } else 3681 } else
3628 err = ret; 3682 err = ret;
3683 map->m_flags |= EXT4_MAP_MAPPED;
3684 if (allocated > map->m_len)
3685 allocated = map->m_len;
3686 map->m_len = allocated;
3629 goto out2; 3687 goto out2;
3630 } 3688 }
3631 /* buffered IO case */ 3689 /* buffered IO case */
@@ -3675,6 +3733,7 @@ out:
3675 allocated - map->m_len); 3733 allocated - map->m_len);
3676 allocated = map->m_len; 3734 allocated = map->m_len;
3677 } 3735 }
3736 map->m_len = allocated;
3678 3737
3679 /* 3738 /*
3680 * If we have done fallocate with the offset that is already 3739 * If we have done fallocate with the offset that is already
@@ -4106,9 +4165,6 @@ got_allocated_blocks:
4106 } 4165 }
4107 } else { 4166 } else {
4108 BUG_ON(allocated_clusters < reserved_clusters); 4167 BUG_ON(allocated_clusters < reserved_clusters);
4109 /* We will claim quota for all newly allocated blocks.*/
4110 ext4_da_update_reserve_space(inode, allocated_clusters,
4111 1);
4112 if (reserved_clusters < allocated_clusters) { 4168 if (reserved_clusters < allocated_clusters) {
4113 struct ext4_inode_info *ei = EXT4_I(inode); 4169 struct ext4_inode_info *ei = EXT4_I(inode);
4114 int reservation = allocated_clusters - 4170 int reservation = allocated_clusters -
@@ -4159,6 +4215,15 @@ got_allocated_blocks:
4159 ei->i_reserved_data_blocks += reservation; 4215 ei->i_reserved_data_blocks += reservation;
4160 spin_unlock(&ei->i_block_reservation_lock); 4216 spin_unlock(&ei->i_block_reservation_lock);
4161 } 4217 }
4218 /*
4219 * We will claim quota for all newly allocated blocks.
4220 * We're updating the reserved space *after* the
4221 * correction above so we do not accidentally free
4222 * all the metadata reservation because we might
4223 * actually need it later on.
4224 */
4225 ext4_da_update_reserve_space(inode, allocated_clusters,
4226 1);
4162 } 4227 }
4163 } 4228 }
4164 4229
@@ -4368,8 +4433,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4368 if (len <= EXT_UNINIT_MAX_LEN << blkbits) 4433 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4369 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; 4434 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4370 4435
4371 /* Prevent race condition between unwritten */
4372 ext4_flush_unwritten_io(inode);
4373retry: 4436retry:
4374 while (ret >= 0 && ret < max_blocks) { 4437 while (ret >= 0 && ret < max_blocks) {
4375 map.m_lblk = map.m_lblk + ret; 4438 map.m_lblk = map.m_lblk + ret;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 95796a1b7522..fe3337a85ede 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
333static int ext4_es_can_be_merged(struct extent_status *es1, 333static int ext4_es_can_be_merged(struct extent_status *es1,
334 struct extent_status *es2) 334 struct extent_status *es2)
335{ 335{
336 if (es1->es_lblk + es1->es_len != es2->es_lblk) 336 if (ext4_es_status(es1) != ext4_es_status(es2))
337 return 0; 337 return 0;
338 338
339 if (ext4_es_status(es1) != ext4_es_status(es2)) 339 if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
340 return 0; 340 return 0;
341 341
342 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && 342 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
343 (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
344 return 0; 343 return 0;
345 344
346 return 1; 345 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
346 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
347 return 1;
348
349 if (ext4_es_is_hole(es1))
350 return 1;
351
352 /* we need to check delayed extent is without unwritten status */
353 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
354 return 1;
355
356 return 0;
347} 357}
348 358
349static struct extent_status * 359static struct extent_status *
@@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
389 return es; 399 return es;
390} 400}
391 401
402#ifdef ES_AGGRESSIVE_TEST
403static void ext4_es_insert_extent_ext_check(struct inode *inode,
404 struct extent_status *es)
405{
406 struct ext4_ext_path *path = NULL;
407 struct ext4_extent *ex;
408 ext4_lblk_t ee_block;
409 ext4_fsblk_t ee_start;
410 unsigned short ee_len;
411 int depth, ee_status, es_status;
412
413 path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
414 if (IS_ERR(path))
415 return;
416
417 depth = ext_depth(inode);
418 ex = path[depth].p_ext;
419
420 if (ex) {
421
422 ee_block = le32_to_cpu(ex->ee_block);
423 ee_start = ext4_ext_pblock(ex);
424 ee_len = ext4_ext_get_actual_len(ex);
425
426 ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
427 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
428
429 /*
430 * Make sure ex and es are not overlap when we try to insert
431 * a delayed/hole extent.
432 */
433 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
434 if (in_range(es->es_lblk, ee_block, ee_len)) {
435 pr_warn("ES insert assertation failed for "
436 "inode: %lu we can find an extent "
437 "at block [%d/%d/%llu/%c], but we "
438 "want to add an delayed/hole extent "
439 "[%d/%d/%llu/%llx]\n",
440 inode->i_ino, ee_block, ee_len,
441 ee_start, ee_status ? 'u' : 'w',
442 es->es_lblk, es->es_len,
443 ext4_es_pblock(es), ext4_es_status(es));
444 }
445 goto out;
446 }
447
448 /*
449 * We don't check ee_block == es->es_lblk, etc. because es
450 * might be a part of whole extent, vice versa.
451 */
452 if (es->es_lblk < ee_block ||
453 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
454 pr_warn("ES insert assertation failed for inode: %lu "
455 "ex_status [%d/%d/%llu/%c] != "
456 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
457 ee_block, ee_len, ee_start,
458 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
459 ext4_es_pblock(es), es_status ? 'u' : 'w');
460 goto out;
461 }
462
463 if (ee_status ^ es_status) {
464 pr_warn("ES insert assertation failed for inode: %lu "
465 "ex_status [%d/%d/%llu/%c] != "
466 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
467 ee_block, ee_len, ee_start,
468 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
469 ext4_es_pblock(es), es_status ? 'u' : 'w');
470 }
471 } else {
472 /*
473 * We can't find an extent on disk. So we need to make sure
474 * that we don't want to add an written/unwritten extent.
475 */
476 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
477 pr_warn("ES insert assertation failed for inode: %lu "
478 "can't find an extent at block %d but we want "
479 "to add an written/unwritten extent "
480 "[%d/%d/%llu/%llx]\n", inode->i_ino,
481 es->es_lblk, es->es_lblk, es->es_len,
482 ext4_es_pblock(es), ext4_es_status(es));
483 }
484 }
485out:
486 if (path) {
487 ext4_ext_drop_refs(path);
488 kfree(path);
489 }
490}
491
492static void ext4_es_insert_extent_ind_check(struct inode *inode,
493 struct extent_status *es)
494{
495 struct ext4_map_blocks map;
496 int retval;
497
498 /*
499 * Here we call ext4_ind_map_blocks to lookup a block mapping because
500 * 'Indirect' structure is defined in indirect.c. So we couldn't
501 * access direct/indirect tree from outside. It is too dirty to define
502 * this function in indirect.c file.
503 */
504
505 map.m_lblk = es->es_lblk;
506 map.m_len = es->es_len;
507
508 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
509 if (retval > 0) {
510 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
511 /*
512 * We want to add a delayed/hole extent but this
513 * block has been allocated.
514 */
515 pr_warn("ES insert assertation failed for inode: %lu "
516 "We can find blocks but we want to add a "
517 "delayed/hole extent [%d/%d/%llu/%llx]\n",
518 inode->i_ino, es->es_lblk, es->es_len,
519 ext4_es_pblock(es), ext4_es_status(es));
520 return;
521 } else if (ext4_es_is_written(es)) {
522 if (retval != es->es_len) {
523 pr_warn("ES insert assertation failed for "
524 "inode: %lu retval %d != es_len %d\n",
525 inode->i_ino, retval, es->es_len);
526 return;
527 }
528 if (map.m_pblk != ext4_es_pblock(es)) {
529 pr_warn("ES insert assertation failed for "
530 "inode: %lu m_pblk %llu != "
531 "es_pblk %llu\n",
532 inode->i_ino, map.m_pblk,
533 ext4_es_pblock(es));
534 return;
535 }
536 } else {
537 /*
538 * We don't need to check unwritten extent because
539 * indirect-based file doesn't have it.
540 */
541 BUG_ON(1);
542 }
543 } else if (retval == 0) {
544 if (ext4_es_is_written(es)) {
545 pr_warn("ES insert assertation failed for inode: %lu "
546 "We can't find the block but we want to add "
547 "an written extent [%d/%d/%llu/%llx]\n",
548 inode->i_ino, es->es_lblk, es->es_len,
549 ext4_es_pblock(es), ext4_es_status(es));
550 return;
551 }
552 }
553}
554
555static inline void ext4_es_insert_extent_check(struct inode *inode,
556 struct extent_status *es)
557{
558 /*
559 * We don't need to worry about the race condition because
560 * caller takes i_data_sem locking.
561 */
562 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
563 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
564 ext4_es_insert_extent_ext_check(inode, es);
565 else
566 ext4_es_insert_extent_ind_check(inode, es);
567}
568#else
569static inline void ext4_es_insert_extent_check(struct inode *inode,
570 struct extent_status *es)
571{
572}
573#endif
574
392static int __es_insert_extent(struct inode *inode, struct extent_status *newes) 575static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
393{ 576{
394 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 577 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
@@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
471 ext4_es_store_status(&newes, status); 654 ext4_es_store_status(&newes, status);
472 trace_ext4_es_insert_extent(inode, &newes); 655 trace_ext4_es_insert_extent(inode, &newes);
473 656
657 ext4_es_insert_extent_check(inode, &newes);
658
474 write_lock(&EXT4_I(inode)->i_es_lock); 659 write_lock(&EXT4_I(inode)->i_es_lock);
475 err = __es_remove_extent(inode, lblk, end); 660 err = __es_remove_extent(inode, lblk, end);
476 if (err != 0) 661 if (err != 0)
@@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
669 return err; 854 return err;
670} 855}
671 856
857int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
858{
859 ext4_lblk_t ee_block;
860 ext4_fsblk_t ee_pblock;
861 unsigned int ee_len;
862
863 ee_block = le32_to_cpu(ex->ee_block);
864 ee_len = ext4_ext_get_actual_len(ex);
865 ee_pblock = ext4_ext_pblock(ex);
866
867 if (ee_len == 0)
868 return 0;
869
870 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
871 EXTENT_STATUS_WRITTEN);
872}
873
672static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) 874static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
673{ 875{
674 struct ext4_sb_info *sbi = container_of(shrink, 876 struct ext4_sb_info *sbi = container_of(shrink,
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f190dfe969da..d8e2d4dc311e 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -21,6 +21,12 @@
21#endif 21#endif
22 22
23/* 23/*
24 * With ES_AGGRESSIVE_TEST defined, the result of es caching will be
25 * checked with old map_block's result.
26 */
27#define ES_AGGRESSIVE_TEST__
28
29/*
24 * These flags live in the high bits of extent_status.es_pblk 30 * These flags live in the high bits of extent_status.es_pblk
25 */ 31 */
26#define EXTENT_STATUS_WRITTEN (1ULL << 63) 32#define EXTENT_STATUS_WRITTEN (1ULL << 63)
@@ -33,6 +39,8 @@
33 EXTENT_STATUS_DELAYED | \ 39 EXTENT_STATUS_DELAYED | \
34 EXTENT_STATUS_HOLE) 40 EXTENT_STATUS_HOLE)
35 41
42struct ext4_extent;
43
36struct extent_status { 44struct extent_status {
37 struct rb_node rb_node; 45 struct rb_node rb_node;
38 ext4_lblk_t es_lblk; /* first logical block extent covers */ 46 ext4_lblk_t es_lblk; /* first logical block extent covers */
@@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
58 struct extent_status *es); 66 struct extent_status *es);
59extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 67extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
60 struct extent_status *es); 68 struct extent_status *es);
69extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
61 70
62static inline int ext4_es_is_written(struct extent_status *es) 71static inline int ext4_es_is_written(struct extent_status *es)
63{ 72{
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 32fd2b9075dd..6c5bb8d993fe 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -324,8 +324,8 @@ error_return:
324} 324}
325 325
326struct orlov_stats { 326struct orlov_stats {
327 __u64 free_clusters;
327 __u32 free_inodes; 328 __u32 free_inodes;
328 __u32 free_clusters;
329 __u32 used_dirs; 329 __u32 used_dirs;
330}; 330};
331 331
@@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
342 342
343 if (flex_size > 1) { 343 if (flex_size > 1) {
344 stats->free_inodes = atomic_read(&flex_group[g].free_inodes); 344 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
345 stats->free_clusters = atomic_read(&flex_group[g].free_clusters); 345 stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
346 stats->used_dirs = atomic_read(&flex_group[g].used_dirs); 346 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
347 return; 347 return;
348 } 348 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9ea0cde3fa9e..b3a5213bc73e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
185 185
186 trace_ext4_evict_inode(inode); 186 trace_ext4_evict_inode(inode);
187 187
188 ext4_ioend_wait(inode);
189
190 if (inode->i_nlink) { 188 if (inode->i_nlink) {
191 /* 189 /*
192 * When journalling data dirty buffers are tracked only in the 190 * When journalling data dirty buffers are tracked only in the
@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
207 * don't use page cache. 205 * don't use page cache.
208 */ 206 */
209 if (ext4_should_journal_data(inode) && 207 if (ext4_should_journal_data(inode) &&
210 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 208 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
209 inode->i_ino != EXT4_JOURNAL_INO) {
211 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 210 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
212 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 211 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
213 212
@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode)
216 filemap_write_and_wait(&inode->i_data); 215 filemap_write_and_wait(&inode->i_data);
217 } 216 }
218 truncate_inode_pages(&inode->i_data, 0); 217 truncate_inode_pages(&inode->i_data, 0);
218 ext4_ioend_shutdown(inode);
219 goto no_delete; 219 goto no_delete;
220 } 220 }
221 221
@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode)
225 if (ext4_should_order_data(inode)) 225 if (ext4_should_order_data(inode))
226 ext4_begin_ordered_truncate(inode, 0); 226 ext4_begin_ordered_truncate(inode, 0);
227 truncate_inode_pages(&inode->i_data, 0); 227 truncate_inode_pages(&inode->i_data, 0);
228 ext4_ioend_shutdown(inode);
228 229
229 if (is_bad_inode(inode)) 230 if (is_bad_inode(inode))
230 goto no_delete; 231 goto no_delete;
@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
482 return num; 483 return num;
483} 484}
484 485
486#ifdef ES_AGGRESSIVE_TEST
487static void ext4_map_blocks_es_recheck(handle_t *handle,
488 struct inode *inode,
489 struct ext4_map_blocks *es_map,
490 struct ext4_map_blocks *map,
491 int flags)
492{
493 int retval;
494
495 map->m_flags = 0;
496 /*
497 * There is a race window that the result is not the same.
498 * e.g. xfstests #223 when dioread_nolock enables. The reason
499 * is that we lookup a block mapping in extent status tree with
500 * out taking i_data_sem. So at the time the unwritten extent
501 * could be converted.
502 */
503 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
504 down_read((&EXT4_I(inode)->i_data_sem));
505 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
506 retval = ext4_ext_map_blocks(handle, inode, map, flags &
507 EXT4_GET_BLOCKS_KEEP_SIZE);
508 } else {
509 retval = ext4_ind_map_blocks(handle, inode, map, flags &
510 EXT4_GET_BLOCKS_KEEP_SIZE);
511 }
512 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
513 up_read((&EXT4_I(inode)->i_data_sem));
514 /*
515 * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
516 * because it shouldn't be marked in es_map->m_flags.
517 */
518 map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
519
520 /*
521 * We don't check m_len because extent will be collpased in status
522 * tree. So the m_len might not equal.
523 */
524 if (es_map->m_lblk != map->m_lblk ||
525 es_map->m_flags != map->m_flags ||
526 es_map->m_pblk != map->m_pblk) {
527 printk("ES cache assertation failed for inode: %lu "
528 "es_cached ex [%d/%d/%llu/%x] != "
529 "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
530 inode->i_ino, es_map->m_lblk, es_map->m_len,
531 es_map->m_pblk, es_map->m_flags, map->m_lblk,
532 map->m_len, map->m_pblk, map->m_flags,
533 retval, flags);
534 }
535}
536#endif /* ES_AGGRESSIVE_TEST */
537
485/* 538/*
486 * The ext4_map_blocks() function tries to look up the requested blocks, 539 * The ext4_map_blocks() function tries to look up the requested blocks,
487 * and returns if the blocks are already mapped. 540 * and returns if the blocks are already mapped.
@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
509{ 562{
510 struct extent_status es; 563 struct extent_status es;
511 int retval; 564 int retval;
565#ifdef ES_AGGRESSIVE_TEST
566 struct ext4_map_blocks orig_map;
567
568 memcpy(&orig_map, map, sizeof(*map));
569#endif
512 570
513 map->m_flags = 0; 571 map->m_flags = 0;
514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 572 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
531 } else { 589 } else {
532 BUG_ON(1); 590 BUG_ON(1);
533 } 591 }
592#ifdef ES_AGGRESSIVE_TEST
593 ext4_map_blocks_es_recheck(handle, inode, map,
594 &orig_map, flags);
595#endif
534 goto found; 596 goto found;
535 } 597 }
536 598
@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
551 int ret; 613 int ret;
552 unsigned long long status; 614 unsigned long long status;
553 615
616#ifdef ES_AGGRESSIVE_TEST
617 if (retval != map->m_len) {
618 printk("ES len assertation failed for inode: %lu "
619 "retval %d != map->m_len %d "
620 "in %s (lookup)\n", inode->i_ino, retval,
621 map->m_len, __func__);
622 }
623#endif
624
554 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 625 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
555 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 626 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
556 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 627 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -643,6 +714,24 @@ found:
643 int ret; 714 int ret;
644 unsigned long long status; 715 unsigned long long status;
645 716
717#ifdef ES_AGGRESSIVE_TEST
718 if (retval != map->m_len) {
719 printk("ES len assertation failed for inode: %lu "
720 "retval %d != map->m_len %d "
721 "in %s (allocation)\n", inode->i_ino, retval,
722 map->m_len, __func__);
723 }
724#endif
725
726 /*
727 * If the extent has been zeroed out, we don't need to update
728 * extent status tree.
729 */
730 if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
731 ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
732 if (ext4_es_is_written(&es))
733 goto has_zeroout;
734 }
646 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 735 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
647 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 736 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
648 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 737 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -655,6 +744,7 @@ found:
655 retval = ret; 744 retval = ret;
656 } 745 }
657 746
747has_zeroout:
658 up_write((&EXT4_I(inode)->i_data_sem)); 748 up_write((&EXT4_I(inode)->i_data_sem));
659 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 749 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
660 int ret = check_block_validity(inode, map); 750 int ret = check_block_validity(inode, map);
@@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file,
1216} 1306}
1217 1307
1218/* 1308/*
1309 * Reserve a metadata for a single block located at lblock
1310 */
1311static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1312{
1313 int retries = 0;
1314 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1315 struct ext4_inode_info *ei = EXT4_I(inode);
1316 unsigned int md_needed;
1317 ext4_lblk_t save_last_lblock;
1318 int save_len;
1319
1320 /*
1321 * recalculate the amount of metadata blocks to reserve
1322 * in order to allocate nrblocks
1323 * worse case is one extent per block
1324 */
1325repeat:
1326 spin_lock(&ei->i_block_reservation_lock);
1327 /*
1328 * ext4_calc_metadata_amount() has side effects, which we have
1329 * to be prepared undo if we fail to claim space.
1330 */
1331 save_len = ei->i_da_metadata_calc_len;
1332 save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1333 md_needed = EXT4_NUM_B2C(sbi,
1334 ext4_calc_metadata_amount(inode, lblock));
1335 trace_ext4_da_reserve_space(inode, md_needed);
1336
1337 /*
1338 * We do still charge estimated metadata to the sb though;
1339 * we cannot afford to run out of free blocks.
1340 */
1341 if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
1342 ei->i_da_metadata_calc_len = save_len;
1343 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1344 spin_unlock(&ei->i_block_reservation_lock);
1345 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1346 cond_resched();
1347 goto repeat;
1348 }
1349 return -ENOSPC;
1350 }
1351 ei->i_reserved_meta_blocks += md_needed;
1352 spin_unlock(&ei->i_block_reservation_lock);
1353
1354 return 0; /* success */
1355}
1356
1357/*
1219 * Reserve a single cluster located at lblock 1358 * Reserve a single cluster located at lblock
1220 */ 1359 */
1221static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1360static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1263,7 +1402,7 @@ repeat:
1263 ei->i_da_metadata_calc_last_lblock = save_last_lblock; 1402 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1264 spin_unlock(&ei->i_block_reservation_lock); 1403 spin_unlock(&ei->i_block_reservation_lock);
1265 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1404 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1266 yield(); 1405 cond_resched();
1267 goto repeat; 1406 goto repeat;
1268 } 1407 }
1269 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1408 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1768 struct extent_status es; 1907 struct extent_status es;
1769 int retval; 1908 int retval;
1770 sector_t invalid_block = ~((sector_t) 0xffff); 1909 sector_t invalid_block = ~((sector_t) 0xffff);
1910#ifdef ES_AGGRESSIVE_TEST
1911 struct ext4_map_blocks orig_map;
1912
1913 memcpy(&orig_map, map, sizeof(*map));
1914#endif
1771 1915
1772 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) 1916 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1773 invalid_block = ~0; 1917 invalid_block = ~0;
@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1809 else 1953 else
1810 BUG_ON(1); 1954 BUG_ON(1);
1811 1955
1956#ifdef ES_AGGRESSIVE_TEST
1957 ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
1958#endif
1812 return retval; 1959 return retval;
1813 } 1960 }
1814 1961
@@ -1843,8 +1990,11 @@ add_delayed:
1843 * XXX: __block_prepare_write() unmaps passed block, 1990 * XXX: __block_prepare_write() unmaps passed block,
1844 * is it OK? 1991 * is it OK?
1845 */ 1992 */
1846 /* If the block was allocated from previously allocated cluster, 1993 /*
1847 * then we dont need to reserve it again. */ 1994 * If the block was allocated from previously allocated cluster,
1995 * then we don't need to reserve it again. However we still need
1996 * to reserve metadata for every block we're going to write.
1997 */
1848 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { 1998 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1849 ret = ext4_da_reserve_space(inode, iblock); 1999 ret = ext4_da_reserve_space(inode, iblock);
1850 if (ret) { 2000 if (ret) {
@@ -1852,6 +2002,13 @@ add_delayed:
1852 retval = ret; 2002 retval = ret;
1853 goto out_unlock; 2003 goto out_unlock;
1854 } 2004 }
2005 } else {
2006 ret = ext4_da_reserve_metadata(inode, iblock);
2007 if (ret) {
2008 /* not enough space to reserve */
2009 retval = ret;
2010 goto out_unlock;
2011 }
1855 } 2012 }
1856 2013
1857 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 2014 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -1873,6 +2030,15 @@ add_delayed:
1873 int ret; 2030 int ret;
1874 unsigned long long status; 2031 unsigned long long status;
1875 2032
2033#ifdef ES_AGGRESSIVE_TEST
2034 if (retval != map->m_len) {
2035 printk("ES len assertation failed for inode: %lu "
2036 "retval %d != map->m_len %d "
2037 "in %s (lookup)\n", inode->i_ino, retval,
2038 map->m_len, __func__);
2039 }
2040#endif
2041
1876 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 2042 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1877 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 2043 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1878 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 2044 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
2908 3074
2909 trace_ext4_releasepage(page); 3075 trace_ext4_releasepage(page);
2910 3076
2911 WARN_ON(PageChecked(page)); 3077 /* Page has dirty journalled data -> cannot release */
2912 if (!page_has_buffers(page)) 3078 if (PageChecked(page))
2913 return 0; 3079 return 0;
2914 if (journal) 3080 if (journal)
2915 return jbd2_journal_try_to_free_buffers(journal, page, wait); 3081 return jbd2_journal_try_to_free_buffers(journal, page, wait);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7bb713a46fe4..ee6614bdb639 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2804 if (sbi->s_log_groups_per_flex) { 2804 if (sbi->s_log_groups_per_flex) {
2805 ext4_group_t flex_group = ext4_flex_group(sbi, 2805 ext4_group_t flex_group = ext4_flex_group(sbi,
2806 ac->ac_b_ex.fe_group); 2806 ac->ac_b_ex.fe_group);
2807 atomic_sub(ac->ac_b_ex.fe_len, 2807 atomic64_sub(ac->ac_b_ex.fe_len,
2808 &sbi->s_flex_groups[flex_group].free_clusters); 2808 &sbi->s_flex_groups[flex_group].free_clusters);
2809 } 2809 }
2810 2810
2811 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2811 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3692,11 +3692,7 @@ repeat:
3692 if (free < needed && busy) { 3692 if (free < needed && busy) {
3693 busy = 0; 3693 busy = 0;
3694 ext4_unlock_group(sb, group); 3694 ext4_unlock_group(sb, group);
3695 /* 3695 cond_resched();
3696 * Yield the CPU here so that we don't get soft lockup
3697 * in non preempt case.
3698 */
3699 yield();
3700 goto repeat; 3696 goto repeat;
3701 } 3697 }
3702 3698
@@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4246 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { 4242 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4247 4243
4248 /* let others to free the space */ 4244 /* let others to free the space */
4249 yield(); 4245 cond_resched();
4250 ar->len = ar->len >> 1; 4246 ar->len = ar->len >> 1;
4251 } 4247 }
4252 if (!ar->len) { 4248 if (!ar->len) {
@@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4464 struct buffer_head *bitmap_bh = NULL; 4460 struct buffer_head *bitmap_bh = NULL;
4465 struct super_block *sb = inode->i_sb; 4461 struct super_block *sb = inode->i_sb;
4466 struct ext4_group_desc *gdp; 4462 struct ext4_group_desc *gdp;
4467 unsigned long freed = 0;
4468 unsigned int overflow; 4463 unsigned int overflow;
4469 ext4_grpblk_t bit; 4464 ext4_grpblk_t bit;
4470 struct buffer_head *gd_bh; 4465 struct buffer_head *gd_bh;
@@ -4666,14 +4661,12 @@ do_more:
4666 4661
4667 if (sbi->s_log_groups_per_flex) { 4662 if (sbi->s_log_groups_per_flex) {
4668 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4663 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4669 atomic_add(count_clusters, 4664 atomic64_add(count_clusters,
4670 &sbi->s_flex_groups[flex_group].free_clusters); 4665 &sbi->s_flex_groups[flex_group].free_clusters);
4671 } 4666 }
4672 4667
4673 ext4_mb_unload_buddy(&e4b); 4668 ext4_mb_unload_buddy(&e4b);
4674 4669
4675 freed += count;
4676
4677 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) 4670 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4678 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); 4671 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4679 4672
@@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4811 4804
4812 if (sbi->s_log_groups_per_flex) { 4805 if (sbi->s_log_groups_per_flex) {
4813 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4806 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4814 atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), 4807 atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
4815 &sbi->s_flex_groups[flex_group].free_clusters); 4808 &sbi->s_flex_groups[flex_group].free_clusters);
4816 } 4809 }
4817 4810
4818 ext4_mb_unload_buddy(&e4b); 4811 ext4_mb_unload_buddy(&e4b);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 4e81d47aa8cb..33e1c086858b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -32,16 +32,18 @@
32 */ 32 */
33static inline int 33static inline int
34get_ext_path(struct inode *inode, ext4_lblk_t lblock, 34get_ext_path(struct inode *inode, ext4_lblk_t lblock,
35 struct ext4_ext_path **path) 35 struct ext4_ext_path **orig_path)
36{ 36{
37 int ret = 0; 37 int ret = 0;
38 struct ext4_ext_path *path;
38 39
39 *path = ext4_ext_find_extent(inode, lblock, *path); 40 path = ext4_ext_find_extent(inode, lblock, *orig_path);
40 if (IS_ERR(*path)) { 41 if (IS_ERR(path))
41 ret = PTR_ERR(*path); 42 ret = PTR_ERR(path);
42 *path = NULL; 43 else if (path[ext_depth(inode)].p_ext == NULL)
43 } else if ((*path)[ext_depth(inode)].p_ext == NULL)
44 ret = -ENODATA; 44 ret = -ENODATA;
45 else
46 *orig_path = path;
45 47
46 return ret; 48 return ret;
47} 49}
@@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
611{ 613{
612 struct ext4_ext_path *path = NULL; 614 struct ext4_ext_path *path = NULL;
613 struct ext4_extent *ext; 615 struct ext4_extent *ext;
616 int ret = 0;
614 ext4_lblk_t last = from + count; 617 ext4_lblk_t last = from + count;
615 while (from < last) { 618 while (from < last) {
616 *err = get_ext_path(inode, from, &path); 619 *err = get_ext_path(inode, from, &path);
617 if (*err) 620 if (*err)
618 return 0; 621 goto out;
619 ext = path[ext_depth(inode)].p_ext; 622 ext = path[ext_depth(inode)].p_ext;
620 if (!ext) { 623 if (uninit != ext4_ext_is_uninitialized(ext))
621 ext4_ext_drop_refs(path); 624 goto out;
622 return 0;
623 }
624 if (uninit != ext4_ext_is_uninitialized(ext)) {
625 ext4_ext_drop_refs(path);
626 return 0;
627 }
628 from += ext4_ext_get_actual_len(ext); 625 from += ext4_ext_get_actual_len(ext);
629 ext4_ext_drop_refs(path); 626 ext4_ext_drop_refs(path);
630 } 627 }
631 return 1; 628 ret = 1;
629out:
630 if (path) {
631 ext4_ext_drop_refs(path);
632 kfree(path);
633 }
634 return ret;
632} 635}
633 636
634/** 637/**
@@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
666 int replaced_count = 0; 669 int replaced_count = 0;
667 int dext_alen; 670 int dext_alen;
668 671
672 *err = ext4_es_remove_extent(orig_inode, from, count);
673 if (*err)
674 goto out;
675
676 *err = ext4_es_remove_extent(donor_inode, from, count);
677 if (*err)
678 goto out;
679
669 /* Get the original extent for the block "orig_off" */ 680 /* Get the original extent for the block "orig_off" */
670 *err = get_ext_path(orig_inode, orig_off, &orig_path); 681 *err = get_ext_path(orig_inode, orig_off, &orig_path);
671 if (*err) 682 if (*err)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b31003ecc..047a6de04a0a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -50,11 +50,21 @@ void ext4_exit_pageio(void)
50 kmem_cache_destroy(io_page_cachep); 50 kmem_cache_destroy(io_page_cachep);
51} 51}
52 52
53void ext4_ioend_wait(struct inode *inode) 53/*
54 * This function is called by ext4_evict_inode() to make sure there is
55 * no more pending I/O completion work left to do.
56 */
57void ext4_ioend_shutdown(struct inode *inode)
54{ 58{
55 wait_queue_head_t *wq = ext4_ioend_wq(inode); 59 wait_queue_head_t *wq = ext4_ioend_wq(inode);
56 60
57 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 61 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
62 /*
63 * We need to make sure the work structure is finished being
64 * used before we let the inode get destroyed.
65 */
66 if (work_pending(&EXT4_I(inode)->i_unwritten_work))
67 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
58} 68}
59 69
60static void put_io_page(struct ext4_io_page *io_page) 70static void put_io_page(struct ext4_io_page *io_page)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b2c8ee56eb98..c169477a62c9 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb,
1360 sbi->s_log_groups_per_flex) { 1360 sbi->s_log_groups_per_flex) {
1361 ext4_group_t flex_group; 1361 ext4_group_t flex_group;
1362 flex_group = ext4_flex_group(sbi, group_data[0].group); 1362 flex_group = ext4_flex_group(sbi, group_data[0].group);
1363 atomic_add(EXT4_NUM_B2C(sbi, free_blocks), 1363 atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
1364 &sbi->s_flex_groups[flex_group].free_clusters); 1364 &sbi->s_flex_groups[flex_group].free_clusters);
1365 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, 1365 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
1366 &sbi->s_flex_groups[flex_group].free_inodes); 1366 &sbi->s_flex_groups[flex_group].free_inodes);
1367 } 1367 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5e6c87836193..5d6d53578124 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -90,6 +90,8 @@ static struct file_system_type ext2_fs_type = {
90 .kill_sb = kill_block_super, 90 .kill_sb = kill_block_super,
91 .fs_flags = FS_REQUIRES_DEV, 91 .fs_flags = FS_REQUIRES_DEV,
92}; 92};
93MODULE_ALIAS_FS("ext2");
94MODULE_ALIAS("ext2");
93#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) 95#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
94#else 96#else
95#define IS_EXT2_SB(sb) (0) 97#define IS_EXT2_SB(sb) (0)
@@ -104,6 +106,8 @@ static struct file_system_type ext3_fs_type = {
104 .kill_sb = kill_block_super, 106 .kill_sb = kill_block_super,
105 .fs_flags = FS_REQUIRES_DEV, 107 .fs_flags = FS_REQUIRES_DEV,
106}; 108};
109MODULE_ALIAS_FS("ext3");
110MODULE_ALIAS("ext3");
107#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) 111#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
108#else 112#else
109#define IS_EXT3_SB(sb) (0) 113#define IS_EXT3_SB(sb) (0)
@@ -1923,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
1923 flex_group = ext4_flex_group(sbi, i); 1927 flex_group = ext4_flex_group(sbi, i);
1924 atomic_add(ext4_free_inodes_count(sb, gdp), 1928 atomic_add(ext4_free_inodes_count(sb, gdp),
1925 &sbi->s_flex_groups[flex_group].free_inodes); 1929 &sbi->s_flex_groups[flex_group].free_inodes);
1926 atomic_add(ext4_free_group_clusters(sb, gdp), 1930 atomic64_add(ext4_free_group_clusters(sb, gdp),
1927 &sbi->s_flex_groups[flex_group].free_clusters); 1931 &sbi->s_flex_groups[flex_group].free_clusters);
1928 atomic_add(ext4_used_dirs_count(sb, gdp), 1932 atomic_add(ext4_used_dirs_count(sb, gdp),
1929 &sbi->s_flex_groups[flex_group].used_dirs); 1933 &sbi->s_flex_groups[flex_group].used_dirs);
1930 } 1934 }
@@ -5152,7 +5156,6 @@ static inline int ext2_feature_set_ok(struct super_block *sb)
5152 return 0; 5156 return 0;
5153 return 1; 5157 return 1;
5154} 5158}
5155MODULE_ALIAS("ext2");
5156#else 5159#else
5157static inline void register_as_ext2(void) { } 5160static inline void register_as_ext2(void) { }
5158static inline void unregister_as_ext2(void) { } 5161static inline void unregister_as_ext2(void) { }
@@ -5185,7 +5188,6 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
5185 return 0; 5188 return 0;
5186 return 1; 5189 return 1;
5187} 5190}
5188MODULE_ALIAS("ext3");
5189#else 5191#else
5190static inline void register_as_ext3(void) { } 5192static inline void register_as_ext3(void) { }
5191static inline void unregister_as_ext3(void) { } 5193static inline void unregister_as_ext3(void) { }
@@ -5199,6 +5201,7 @@ static struct file_system_type ext4_fs_type = {
5199 .kill_sb = kill_block_super, 5201 .kill_sb = kill_block_super,
5200 .fs_flags = FS_REQUIRES_DEV, 5202 .fs_flags = FS_REQUIRES_DEV,
5201}; 5203};
5204MODULE_ALIAS_FS("ext4");
5202 5205
5203static int __init ext4_init_feat_adverts(void) 5206static int __init ext4_init_feat_adverts(void)
5204{ 5207{
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8c117649a035..fea6e582a2ed 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -687,6 +687,7 @@ static struct file_system_type f2fs_fs_type = {
687 .kill_sb = kill_block_super, 687 .kill_sb = kill_block_super,
688 .fs_flags = FS_REQUIRES_DEV, 688 .fs_flags = FS_REQUIRES_DEV,
689}; 689};
690MODULE_ALIAS_FS("f2fs");
690 691
691static int __init init_inodecache(void) 692static int __init init_inodecache(void)
692{ 693{
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index e2cfda94a28d..081b759cff83 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -668,6 +668,7 @@ static struct file_system_type msdos_fs_type = {
668 .kill_sb = kill_block_super, 668 .kill_sb = kill_block_super,
669 .fs_flags = FS_REQUIRES_DEV, 669 .fs_flags = FS_REQUIRES_DEV,
670}; 670};
671MODULE_ALIAS_FS("msdos");
671 672
672static int __init init_msdos_fs(void) 673static int __init init_msdos_fs(void)
673{ 674{
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index ac959d655e7d..2da952036a3d 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1073,6 +1073,7 @@ static struct file_system_type vfat_fs_type = {
1073 .kill_sb = kill_block_super, 1073 .kill_sb = kill_block_super,
1074 .fs_flags = FS_REQUIRES_DEV, 1074 .fs_flags = FS_REQUIRES_DEV,
1075}; 1075};
1076MODULE_ALIAS_FS("vfat");
1076 1077
1077static int __init init_vfat_fs(void) 1078static int __init init_vfat_fs(void)
1078{ 1079{
diff --git a/fs/filesystems.c b/fs/filesystems.c
index da165f6adcbf..92567d95ba6a 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -273,7 +273,7 @@ struct file_system_type *get_fs_type(const char *name)
273 int len = dot ? dot - name : strlen(name); 273 int len = dot ? dot - name : strlen(name);
274 274
275 fs = __get_fs_type(name, len); 275 fs = __get_fs_type(name, len);
276 if (!fs && (request_module("%.*s", len, name) == 0)) 276 if (!fs && (request_module("fs-%.*s", len, name) == 0))
277 fs = __get_fs_type(name, len); 277 fs = __get_fs_type(name, len);
278 278
279 if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { 279 if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index fed2c8afb3a9..e37eb274e492 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -52,7 +52,6 @@ MODULE_AUTHOR("Christoph Hellwig");
52MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver"); 52MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver");
53MODULE_LICENSE("Dual BSD/GPL"); 53MODULE_LICENSE("Dual BSD/GPL");
54 54
55MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */
56 55
57 56
58static void vxfs_put_super(struct super_block *); 57static void vxfs_put_super(struct super_block *);
@@ -258,6 +257,8 @@ static struct file_system_type vxfs_fs_type = {
258 .kill_sb = kill_block_super, 257 .kill_sb = kill_block_super,
259 .fs_flags = FS_REQUIRES_DEV, 258 .fs_flags = FS_REQUIRES_DEV,
260}; 259};
260MODULE_ALIAS_FS("vxfs"); /* makes mount -t vxfs autoload the module */
261MODULE_ALIAS("vxfs");
261 262
262static int __init 263static int __init
263vxfs_init(void) 264vxfs_init(void)
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index b7978b9f75ef..a0b0855d00a9 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -341,6 +341,7 @@ static struct file_system_type fuse_ctl_fs_type = {
341 .mount = fuse_ctl_mount, 341 .mount = fuse_ctl_mount,
342 .kill_sb = fuse_ctl_kill_sb, 342 .kill_sb = fuse_ctl_kill_sb,
343}; 343};
344MODULE_ALIAS_FS("fusectl");
344 345
345int __init fuse_ctl_init(void) 346int __init fuse_ctl_init(void)
346{ 347{
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index df00993ed108..137185c3884f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1117,6 +1117,7 @@ static struct file_system_type fuse_fs_type = {
1117 .mount = fuse_mount, 1117 .mount = fuse_mount,
1118 .kill_sb = fuse_kill_sb_anon, 1118 .kill_sb = fuse_kill_sb_anon,
1119}; 1119};
1120MODULE_ALIAS_FS("fuse");
1120 1121
1121#ifdef CONFIG_BLOCK 1122#ifdef CONFIG_BLOCK
1122static struct dentry *fuse_mount_blk(struct file_system_type *fs_type, 1123static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
@@ -1146,6 +1147,7 @@ static struct file_system_type fuseblk_fs_type = {
1146 .kill_sb = fuse_kill_sb_blk, 1147 .kill_sb = fuse_kill_sb_blk,
1147 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1148 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1148}; 1149};
1150MODULE_ALIAS_FS("fuseblk");
1149 1151
1150static inline int register_fuseblk(void) 1152static inline int register_fuseblk(void)
1151{ 1153{
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1b612be4b873..60ede2a0f43f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -20,6 +20,7 @@
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/lockdep.h> 22#include <linux/lockdep.h>
23#include <linux/module.h>
23 24
24#include "gfs2.h" 25#include "gfs2.h"
25#include "incore.h" 26#include "incore.h"
@@ -1425,6 +1426,7 @@ struct file_system_type gfs2_fs_type = {
1425 .kill_sb = gfs2_kill_sb, 1426 .kill_sb = gfs2_kill_sb,
1426 .owner = THIS_MODULE, 1427 .owner = THIS_MODULE,
1427}; 1428};
1429MODULE_ALIAS_FS("gfs2");
1428 1430
1429struct file_system_type gfs2meta_fs_type = { 1431struct file_system_type gfs2meta_fs_type = {
1430 .name = "gfs2meta", 1432 .name = "gfs2meta",
@@ -1432,4 +1434,4 @@ struct file_system_type gfs2meta_fs_type = {
1432 .mount = gfs2_mount_meta, 1434 .mount = gfs2_mount_meta,
1433 .owner = THIS_MODULE, 1435 .owner = THIS_MODULE,
1434}; 1436};
1435 1437MODULE_ALIAS_FS("gfs2meta");
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index e93ddaadfd1e..bbaaa8a4ee64 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -466,6 +466,7 @@ static struct file_system_type hfs_fs_type = {
466 .kill_sb = kill_block_super, 466 .kill_sb = kill_block_super,
467 .fs_flags = FS_REQUIRES_DEV, 467 .fs_flags = FS_REQUIRES_DEV,
468}; 468};
469MODULE_ALIAS_FS("hfs");
469 470
470static void hfs_init_once(void *p) 471static void hfs_init_once(void *p)
471{ 472{
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 974c26f96fae..7b87284e46dc 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -654,6 +654,7 @@ static struct file_system_type hfsplus_fs_type = {
654 .kill_sb = kill_block_super, 654 .kill_sb = kill_block_super,
655 .fs_flags = FS_REQUIRES_DEV, 655 .fs_flags = FS_REQUIRES_DEV,
656}; 656};
657MODULE_ALIAS_FS("hfsplus");
657 658
658static void hfsplus_init_once(void *p) 659static void hfsplus_init_once(void *p)
659{ 660{
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fbabb906066f..0f6e52d22b84 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -845,15 +845,8 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
845 return err; 845 return err;
846 846
847 if ((attr->ia_valid & ATTR_SIZE) && 847 if ((attr->ia_valid & ATTR_SIZE) &&
848 attr->ia_size != i_size_read(inode)) { 848 attr->ia_size != i_size_read(inode))
849 int error;
850
851 error = inode_newsize_ok(inode, attr->ia_size);
852 if (error)
853 return error;
854
855 truncate_setsize(inode, attr->ia_size); 849 truncate_setsize(inode, attr->ia_size);
856 }
857 850
858 setattr_copy(inode, attr); 851 setattr_copy(inode, attr);
859 mark_inode_dirty(inode); 852 mark_inode_dirty(inode);
@@ -993,6 +986,7 @@ static struct file_system_type hostfs_type = {
993 .kill_sb = hostfs_kill_sb, 986 .kill_sb = hostfs_kill_sb,
994 .fs_flags = 0, 987 .fs_flags = 0,
995}; 988};
989MODULE_ALIAS_FS("hostfs");
996 990
997static int __init init_hostfs(void) 991static int __init init_hostfs(void)
998{ 992{
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a3076228523d..a0617e706957 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -688,6 +688,7 @@ static struct file_system_type hpfs_fs_type = {
688 .kill_sb = kill_block_super, 688 .kill_sb = kill_block_super,
689 .fs_flags = FS_REQUIRES_DEV, 689 .fs_flags = FS_REQUIRES_DEV,
690}; 690};
691MODULE_ALIAS_FS("hpfs");
691 692
692static int __init init_hpfs_fs(void) 693static int __init init_hpfs_fs(void)
693{ 694{
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 74f55703be49..126d3c2e2dee 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,6 +748,7 @@ static struct file_system_type hppfs_type = {
748 .kill_sb = kill_anon_super, 748 .kill_sb = kill_anon_super,
749 .fs_flags = 0, 749 .fs_flags = 0,
750}; 750};
751MODULE_ALIAS_FS("hppfs");
751 752
752static int __init init_hppfs(void) 753static int __init init_hppfs(void)
753{ 754{
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7f94e0cbc69c..84e3d856e91d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -896,6 +896,7 @@ static struct file_system_type hugetlbfs_fs_type = {
896 .mount = hugetlbfs_mount, 896 .mount = hugetlbfs_mount,
897 .kill_sb = kill_litter_super, 897 .kill_sb = kill_litter_super,
898}; 898};
899MODULE_ALIAS_FS("hugetlbfs");
899 900
900static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; 901static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
901 902
diff --git a/fs/internal.h b/fs/internal.h
index 507141fceb99..4be78237d896 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool);
125 * dcache.c 125 * dcache.c
126 */ 126 */
127extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); 127extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
128
129/*
130 * read_write.c
131 */
132extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 67ce52507d7d..d9b8aebdeb22 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1556,6 +1556,8 @@ static struct file_system_type iso9660_fs_type = {
1556 .kill_sb = kill_block_super, 1556 .kill_sb = kill_block_super,
1557 .fs_flags = FS_REQUIRES_DEV, 1557 .fs_flags = FS_REQUIRES_DEV,
1558}; 1558};
1559MODULE_ALIAS_FS("iso9660");
1560MODULE_ALIAS("iso9660");
1559 1561
1560static int __init init_iso9660_fs(void) 1562static int __init init_iso9660_fs(void)
1561{ 1563{
@@ -1593,5 +1595,3 @@ static void __exit exit_iso9660_fs(void)
1593module_init(init_iso9660_fs) 1595module_init(init_iso9660_fs)
1594module_exit(exit_iso9660_fs) 1596module_exit(exit_iso9660_fs)
1595MODULE_LICENSE("GPL"); 1597MODULE_LICENSE("GPL");
1596/* Actual filesystem name is iso9660, as requested in filesystems.c */
1597MODULE_ALIAS("iso9660");
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d6ee5aed56b1..325bc019ed88 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1065,9 +1065,12 @@ out:
1065void jbd2_journal_set_triggers(struct buffer_head *bh, 1065void jbd2_journal_set_triggers(struct buffer_head *bh,
1066 struct jbd2_buffer_trigger_type *type) 1066 struct jbd2_buffer_trigger_type *type)
1067{ 1067{
1068 struct journal_head *jh = bh2jh(bh); 1068 struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
1069 1069
1070 if (WARN_ON(!jh))
1071 return;
1070 jh->b_triggers = type; 1072 jh->b_triggers = type;
1073 jbd2_journal_put_journal_head(jh);
1071} 1074}
1072 1075
1073void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, 1076void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
@@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1119{ 1122{
1120 transaction_t *transaction = handle->h_transaction; 1123 transaction_t *transaction = handle->h_transaction;
1121 journal_t *journal = transaction->t_journal; 1124 journal_t *journal = transaction->t_journal;
1122 struct journal_head *jh = bh2jh(bh); 1125 struct journal_head *jh;
1123 int ret = 0; 1126 int ret = 0;
1124 1127
1125 jbd_debug(5, "journal_head %p\n", jh);
1126 JBUFFER_TRACE(jh, "entry");
1127 if (is_handle_aborted(handle)) 1128 if (is_handle_aborted(handle))
1128 goto out; 1129 goto out;
1129 if (!buffer_jbd(bh)) { 1130 jh = jbd2_journal_grab_journal_head(bh);
1131 if (!jh) {
1130 ret = -EUCLEAN; 1132 ret = -EUCLEAN;
1131 goto out; 1133 goto out;
1132 } 1134 }
1135 jbd_debug(5, "journal_head %p\n", jh);
1136 JBUFFER_TRACE(jh, "entry");
1133 1137
1134 jbd_lock_bh_state(bh); 1138 jbd_lock_bh_state(bh);
1135 1139
@@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1220 spin_unlock(&journal->j_list_lock); 1224 spin_unlock(&journal->j_list_lock);
1221out_unlock_bh: 1225out_unlock_bh:
1222 jbd_unlock_bh_state(bh); 1226 jbd_unlock_bh_state(bh);
1227 jbd2_journal_put_journal_head(jh);
1223out: 1228out:
1224 JBUFFER_TRACE(jh, "exit"); 1229 JBUFFER_TRACE(jh, "exit");
1225 WARN_ON(ret); /* All errors are bugs, so dump the stack */ 1230 WARN_ON(ret); /* All errors are bugs, so dump the stack */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d3d8799e2187..0defb1cc2a35 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -356,6 +356,7 @@ static struct file_system_type jffs2_fs_type = {
356 .mount = jffs2_mount, 356 .mount = jffs2_mount,
357 .kill_sb = jffs2_kill_sb, 357 .kill_sb = jffs2_kill_sb,
358}; 358};
359MODULE_ALIAS_FS("jffs2");
359 360
360static int __init init_jffs2_fs(void) 361static int __init init_jffs2_fs(void)
361{ 362{
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 060ba638becb..2003e830ed1c 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -833,6 +833,7 @@ static struct file_system_type jfs_fs_type = {
833 .kill_sb = kill_block_super, 833 .kill_sb = kill_block_super,
834 .fs_flags = FS_REQUIRES_DEV, 834 .fs_flags = FS_REQUIRES_DEV,
835}; 835};
836MODULE_ALIAS_FS("jfs");
836 837
837static void init_once(void *foo) 838static void init_once(void *foo)
838{ 839{
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 345c24b8a6f8..54360293bcb5 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -608,6 +608,7 @@ static struct file_system_type logfs_fs_type = {
608 .fs_flags = FS_REQUIRES_DEV, 608 .fs_flags = FS_REQUIRES_DEV,
609 609
610}; 610};
611MODULE_ALIAS_FS("logfs");
611 612
612static int __init logfs_init(void) 613static int __init logfs_init(void)
613{ 614{
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 99541cceb584..df122496f328 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -660,6 +660,7 @@ static struct file_system_type minix_fs_type = {
660 .kill_sb = kill_block_super, 660 .kill_sb = kill_block_super,
661 .fs_flags = FS_REQUIRES_DEV, 661 .fs_flags = FS_REQUIRES_DEV,
662}; 662};
663MODULE_ALIAS_FS("minix");
663 664
664static int __init init_minix_fs(void) 665static int __init init_minix_fs(void)
665{ 666{
diff --git a/fs/namei.c b/fs/namei.c
index 961bc1268366..57ae9c8c66bf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -689,8 +689,6 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
689 nd->path = *path; 689 nd->path = *path;
690 nd->inode = nd->path.dentry->d_inode; 690 nd->inode = nd->path.dentry->d_inode;
691 nd->flags |= LOOKUP_JUMPED; 691 nd->flags |= LOOKUP_JUMPED;
692
693 BUG_ON(nd->inode->i_op->follow_link);
694} 692}
695 693
696static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) 694static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
diff --git a/fs/namespace.c b/fs/namespace.c
index 50ca17d3cb45..d581e45c0a9f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
798 } 798 }
799 799
800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; 800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
801 /* Don't allow unprivileged users to change mount flags */
802 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
803 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
804
801 atomic_inc(&sb->s_active); 805 atomic_inc(&sb->s_active);
802 mnt->mnt.mnt_sb = sb; 806 mnt->mnt.mnt_sb = sb;
803 mnt->mnt.mnt_root = dget(root); 807 mnt->mnt.mnt_root = dget(root);
@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1713 if (readonly_request == __mnt_is_readonly(mnt)) 1717 if (readonly_request == __mnt_is_readonly(mnt))
1714 return 0; 1718 return 0;
1715 1719
1720 if (mnt->mnt_flags & MNT_LOCK_READONLY)
1721 return -EPERM;
1722
1716 if (readonly_request) 1723 if (readonly_request)
1717 error = mnt_make_readonly(real_mount(mnt)); 1724 error = mnt_make_readonly(real_mount(mnt));
1718 else 1725 else
@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2339 /* First pass: copy the tree topology */ 2346 /* First pass: copy the tree topology */
2340 copy_flags = CL_COPY_ALL | CL_EXPIRE; 2347 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2341 if (user_ns != mnt_ns->user_ns) 2348 if (user_ns != mnt_ns->user_ns)
2342 copy_flags |= CL_SHARED_TO_SLAVE; 2349 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2343 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2350 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2344 if (IS_ERR(new)) { 2351 if (IS_ERR(new)) {
2345 up_write(&namespace_sem); 2352 up_write(&namespace_sem);
@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt)
2732 return check_mnt(real_mount(mnt)); 2739 return check_mnt(real_mount(mnt));
2733} 2740}
2734 2741
2742bool current_chrooted(void)
2743{
2744 /* Does the current process have a non-standard root */
2745 struct path ns_root;
2746 struct path fs_root;
2747 bool chrooted;
2748
2749 /* Find the namespace root */
2750 ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
2751 ns_root.dentry = ns_root.mnt->mnt_root;
2752 path_get(&ns_root);
2753 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
2754 ;
2755
2756 get_fs_root(current->fs, &fs_root);
2757
2758 chrooted = !path_equal(&fs_root, &ns_root);
2759
2760 path_put(&fs_root);
2761 path_put(&ns_root);
2762
2763 return chrooted;
2764}
2765
2766void update_mnt_policy(struct user_namespace *userns)
2767{
2768 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
2769 struct mount *mnt;
2770
2771 down_read(&namespace_sem);
2772 list_for_each_entry(mnt, &ns->list, mnt_list) {
2773 switch (mnt->mnt.mnt_sb->s_magic) {
2774 case SYSFS_MAGIC:
2775 userns->may_mount_sysfs = true;
2776 break;
2777 case PROC_SUPER_MAGIC:
2778 userns->may_mount_proc = true;
2779 break;
2780 }
2781 if (userns->may_mount_sysfs && userns->may_mount_proc)
2782 break;
2783 }
2784 up_read(&namespace_sem);
2785}
2786
2735static void *mntns_get(struct task_struct *task) 2787static void *mntns_get(struct task_struct *task)
2736{ 2788{
2737 struct mnt_namespace *ns = NULL; 2789 struct mnt_namespace *ns = NULL;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 7dafd6899a62..26910c8154da 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1051,6 +1051,7 @@ static struct file_system_type ncp_fs_type = {
1051 .kill_sb = kill_anon_super, 1051 .kill_sb = kill_anon_super,
1052 .fs_flags = FS_BINARY_MOUNTDATA, 1052 .fs_flags = FS_BINARY_MOUNTDATA,
1053}; 1053};
1054MODULE_ALIAS_FS("ncpfs");
1054 1055
1055static int __init init_ncp_fs(void) 1056static int __init init_ncp_fs(void)
1056{ 1057{
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17b..6fc7b5cae92b 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
55 55
56 bl_pipe_msg.bl_wq = &nn->bl_wq; 56 bl_pipe_msg.bl_wq = &nn->bl_wq;
57 memset(msg, 0, sizeof(*msg)); 57 memset(msg, 0, sizeof(*msg));
58 msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); 58 msg->len = sizeof(bl_msg) + bl_msg.totallen;
59 msg->data = kzalloc(msg->len, GFP_NOFS);
59 if (!msg->data) 60 if (!msg->data)
60 goto out; 61 goto out;
61 62
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
66 memcpy(msg->data, &bl_msg, sizeof(bl_msg)); 67 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
67 dataptr = (uint8_t *) msg->data; 68 dataptr = (uint8_t *) msg->data;
68 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); 69 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
69 msg->len = sizeof(bl_msg) + bl_msg.totallen;
70 70
71 add_wait_queue(&nn->bl_wq, &wq); 71 add_wait_queue(&nn->bl_wq, &wq);
72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { 72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dc0f98dfa717..c516da5873fd 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -726,9 +726,9 @@ out1:
726 return ret; 726 return ret;
727} 727}
728 728
729static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) 729static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
730{ 730{
731 return key_instantiate_and_link(key, data, strlen(data) + 1, 731 return key_instantiate_and_link(key, data, datalen,
732 id_resolver_cache->thread_keyring, 732 id_resolver_cache->thread_keyring,
733 authkey); 733 authkey);
734} 734}
@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
738 struct key *key, struct key *authkey) 738 struct key *key, struct key *authkey)
739{ 739{
740 char id_str[NFS_UINT_MAXLEN]; 740 char id_str[NFS_UINT_MAXLEN];
741 size_t len;
741 int ret = -ENOKEY; 742 int ret = -ENOKEY;
742 743
743 /* ret = -ENOKEY */ 744 /* ret = -ENOKEY */
@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
747 case IDMAP_CONV_NAMETOID: 748 case IDMAP_CONV_NAMETOID:
748 if (strcmp(upcall->im_name, im->im_name) != 0) 749 if (strcmp(upcall->im_name, im->im_name) != 0)
749 break; 750 break;
750 sprintf(id_str, "%d", im->im_id); 751 /* Note: here we store the NUL terminator too */
751 ret = nfs_idmap_instantiate(key, authkey, id_str); 752 len = sprintf(id_str, "%d", im->im_id) + 1;
753 ret = nfs_idmap_instantiate(key, authkey, id_str, len);
752 break; 754 break;
753 case IDMAP_CONV_IDTONAME: 755 case IDMAP_CONV_IDTONAME:
754 if (upcall->im_id != im->im_id) 756 if (upcall->im_id != im->im_id)
755 break; 757 break;
756 ret = nfs_idmap_instantiate(key, authkey, im->im_name); 758 len = strlen(im->im_name);
759 ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
757 break; 760 break;
758 default: 761 default:
759 ret = -EINVAL; 762 ret = -EINVAL;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb044c109..4fb234d3aefb 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
129{ 129{
130 if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 130 if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
131 return; 131 return;
132 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
133 pnfs_return_layout(inode); 132 pnfs_return_layout(inode);
134} 133}
135 134
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb0f901..26431cf62ddb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2632 int status; 2632 int status;
2633 2633
2634 if (pnfs_ld_layoutret_on_setattr(inode)) 2634 if (pnfs_ld_layoutret_on_setattr(inode))
2635 pnfs_return_layout(inode); 2635 pnfs_commit_and_return_layout(inode);
2636 2636
2637 nfs_fattr_init(fattr); 2637 nfs_fattr_init(fattr);
2638 2638
@@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
6416static void nfs4_layoutcommit_release(void *calldata) 6416static void nfs4_layoutcommit_release(void *calldata)
6417{ 6417{
6418 struct nfs4_layoutcommit_data *data = calldata; 6418 struct nfs4_layoutcommit_data *data = calldata;
6419 struct pnfs_layout_segment *lseg, *tmp;
6420 unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
6421 6419
6422 pnfs_cleanup_layoutcommit(data); 6420 pnfs_cleanup_layoutcommit(data);
6423 /* Matched by references in pnfs_set_layoutcommit */
6424 list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
6425 list_del_init(&lseg->pls_lc_list);
6426 if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
6427 &lseg->pls_flags))
6428 pnfs_put_lseg(lseg);
6429 }
6430
6431 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
6432 smp_mb__after_clear_bit();
6433 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
6434
6435 put_rpccred(data->cred); 6421 put_rpccred(data->cred);
6436 kfree(data); 6422 kfree(data);
6437} 6423}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aad6258..4bdffe0ba025 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
417 lo_seg_intersecting(lseg_range, recall_range); 417 lo_seg_intersecting(lseg_range, recall_range);
418} 418}
419 419
420static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
421 struct list_head *tmp_list)
422{
423 if (!atomic_dec_and_test(&lseg->pls_refcount))
424 return false;
425 pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
426 list_add(&lseg->pls_list, tmp_list);
427 return true;
428}
429
420/* Returns 1 if lseg is removed from list, 0 otherwise */ 430/* Returns 1 if lseg is removed from list, 0 otherwise */
421static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 431static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
422 struct list_head *tmp_list) 432 struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
430 */ 440 */
431 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 441 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
432 atomic_read(&lseg->pls_refcount)); 442 atomic_read(&lseg->pls_refcount));
433 if (atomic_dec_and_test(&lseg->pls_refcount)) { 443 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
434 pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
435 list_add(&lseg->pls_list, tmp_list);
436 rv = 1; 444 rv = 1;
437 }
438 } 445 }
439 return rv; 446 return rv;
440} 447}
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
777 return lseg; 784 return lseg;
778} 785}
779 786
787static void pnfs_clear_layoutcommit(struct inode *inode,
788 struct list_head *head)
789{
790 struct nfs_inode *nfsi = NFS_I(inode);
791 struct pnfs_layout_segment *lseg, *tmp;
792
793 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
794 return;
795 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
796 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
797 continue;
798 pnfs_lseg_dec_and_remove_zero(lseg, head);
799 }
800}
801
780/* 802/*
781 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 803 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
782 * when the layout segment list is empty. 804 * when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
808 /* Reference matched in nfs4_layoutreturn_release */ 830 /* Reference matched in nfs4_layoutreturn_release */
809 pnfs_get_layout_hdr(lo); 831 pnfs_get_layout_hdr(lo);
810 empty = list_empty(&lo->plh_segs); 832 empty = list_empty(&lo->plh_segs);
833 pnfs_clear_layoutcommit(ino, &tmp_list);
811 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 834 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
812 /* Don't send a LAYOUTRETURN if list was initially empty */ 835 /* Don't send a LAYOUTRETURN if list was initially empty */
813 if (empty) { 836 if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
820 spin_unlock(&ino->i_lock); 843 spin_unlock(&ino->i_lock);
821 pnfs_free_lseg_list(&tmp_list); 844 pnfs_free_lseg_list(&tmp_list);
822 845
823 WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
824
825 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 846 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
826 if (unlikely(lrp == NULL)) { 847 if (unlikely(lrp == NULL)) {
827 status = -ENOMEM; 848 status = -ENOMEM;
@@ -845,6 +866,33 @@ out:
845} 866}
846EXPORT_SYMBOL_GPL(_pnfs_return_layout); 867EXPORT_SYMBOL_GPL(_pnfs_return_layout);
847 868
869int
870pnfs_commit_and_return_layout(struct inode *inode)
871{
872 struct pnfs_layout_hdr *lo;
873 int ret;
874
875 spin_lock(&inode->i_lock);
876 lo = NFS_I(inode)->layout;
877 if (lo == NULL) {
878 spin_unlock(&inode->i_lock);
879 return 0;
880 }
881 pnfs_get_layout_hdr(lo);
882 /* Block new layoutgets and read/write to ds */
883 lo->plh_block_lgets++;
884 spin_unlock(&inode->i_lock);
885 filemap_fdatawait(inode->i_mapping);
886 ret = pnfs_layoutcommit_inode(inode, true);
887 if (ret == 0)
888 ret = _pnfs_return_layout(inode);
889 spin_lock(&inode->i_lock);
890 lo->plh_block_lgets--;
891 spin_unlock(&inode->i_lock);
892 pnfs_put_layout_hdr(lo);
893 return ret;
894}
895
848bool pnfs_roc(struct inode *ino) 896bool pnfs_roc(struct inode *ino)
849{ 897{
850 struct pnfs_layout_hdr *lo; 898 struct pnfs_layout_hdr *lo;
@@ -1458,7 +1506,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1458 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 1506 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1459 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1507 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1460 PNFS_LAYOUTRET_ON_ERROR) { 1508 PNFS_LAYOUTRET_ON_ERROR) {
1461 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1462 pnfs_return_layout(hdr->inode); 1509 pnfs_return_layout(hdr->inode);
1463 } 1510 }
1464 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1511 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1660,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1613 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 1660 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1614 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1661 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1615 PNFS_LAYOUTRET_ON_ERROR) { 1662 PNFS_LAYOUTRET_ON_ERROR) {
1616 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1617 pnfs_return_layout(hdr->inode); 1663 pnfs_return_layout(hdr->inode);
1618 } 1664 }
1619 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1665 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1746,11 +1792,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1746 1792
1747 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 1793 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
1748 if (lseg->pls_range.iomode == IOMODE_RW && 1794 if (lseg->pls_range.iomode == IOMODE_RW &&
1749 test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 1795 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
1750 list_add(&lseg->pls_lc_list, listp); 1796 list_add(&lseg->pls_lc_list, listp);
1751 } 1797 }
1752} 1798}
1753 1799
1800static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
1801{
1802 struct pnfs_layout_segment *lseg, *tmp;
1803 unsigned long *bitlock = &NFS_I(inode)->flags;
1804
1805 /* Matched by references in pnfs_set_layoutcommit */
1806 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
1807 list_del_init(&lseg->pls_lc_list);
1808 pnfs_put_lseg(lseg);
1809 }
1810
1811 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
1812 smp_mb__after_clear_bit();
1813 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
1814}
1815
1754void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 1816void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1755{ 1817{
1756 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 1818 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
1795 1857
1796 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 1858 if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
1797 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 1859 nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
1860 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
1798} 1861}
1799 1862
1800/* 1863/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba80417748..f5f8a470a647 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
219void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 219void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
220int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 220int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
221int _pnfs_return_layout(struct inode *); 221int _pnfs_return_layout(struct inode *);
222int pnfs_commit_and_return_layout(struct inode *);
222void pnfs_ld_write_done(struct nfs_write_data *); 223void pnfs_ld_write_done(struct nfs_write_data *);
223void pnfs_ld_read_done(struct nfs_read_data *); 224void pnfs_ld_read_done(struct nfs_read_data *);
224struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 225struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
407 return 0; 408 return 0;
408} 409}
409 410
411static inline int pnfs_commit_and_return_layout(struct inode *inode)
412{
413 return 0;
414}
415
410static inline bool 416static inline bool
411pnfs_ld_layoutret_on_setattr(struct inode *inode) 417pnfs_ld_layoutret_on_setattr(struct inode *inode)
412{ 418{
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 17b32b722457..2f8a29db0f1b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -294,6 +294,7 @@ struct file_system_type nfs_fs_type = {
294 .kill_sb = nfs_kill_super, 294 .kill_sb = nfs_kill_super,
295 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, 295 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
296}; 296};
297MODULE_ALIAS_FS("nfs");
297EXPORT_SYMBOL_GPL(nfs_fs_type); 298EXPORT_SYMBOL_GPL(nfs_fs_type);
298 299
299struct file_system_type nfs_xdev_fs_type = { 300struct file_system_type nfs_xdev_fs_type = {
@@ -333,6 +334,8 @@ struct file_system_type nfs4_fs_type = {
333 .kill_sb = nfs_kill_super, 334 .kill_sb = nfs_kill_super,
334 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, 335 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
335}; 336};
337MODULE_ALIAS_FS("nfs4");
338MODULE_ALIAS("nfs4");
336EXPORT_SYMBOL_GPL(nfs4_fs_type); 339EXPORT_SYMBOL_GPL(nfs4_fs_type);
337 340
338static int __init register_nfs4_fs(void) 341static int __init register_nfs4_fs(void)
@@ -2717,6 +2720,5 @@ module_param(send_implementation_id, ushort, 0644);
2717MODULE_PARM_DESC(send_implementation_id, 2720MODULE_PARM_DESC(send_implementation_id,
2718 "Send implementation ID with NFSv4.1 exchange_id"); 2721 "Send implementation ID with NFSv4.1 exchange_id");
2719MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string"); 2722MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string");
2720MODULE_ALIAS("nfs4");
2721 2723
2722#endif /* CONFIG_NFS_V4 */ 2724#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 16d39c6c4fbb..2e27430b9070 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,37 +230,6 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag)
230 __nfs4_file_put_access(fp, oflag); 230 __nfs4_file_put_access(fp, oflag);
231} 231}
232 232
233static inline int get_new_stid(struct nfs4_stid *stid)
234{
235 static int min_stateid = 0;
236 struct idr *stateids = &stid->sc_client->cl_stateids;
237 int new_stid;
238 int error;
239
240 error = idr_get_new_above(stateids, stid, min_stateid, &new_stid);
241 /*
242 * Note: the necessary preallocation was done in
243 * nfs4_alloc_stateid(). The idr code caps the number of
244 * preallocations that can exist at a time, but the state lock
245 * prevents anyone from using ours before we get here:
246 */
247 WARN_ON_ONCE(error);
248 /*
249 * It shouldn't be a problem to reuse an opaque stateid value.
250 * I don't think it is for 4.1. But with 4.0 I worry that, for
251 * example, a stray write retransmission could be accepted by
252 * the server when it should have been rejected. Therefore,
253 * adopt a trick from the sctp code to attempt to maximize the
254 * amount of time until an id is reused, by ensuring they always
255 * "increase" (mod INT_MAX):
256 */
257
258 min_stateid = new_stid+1;
259 if (min_stateid == INT_MAX)
260 min_stateid = 0;
261 return new_stid;
262}
263
264static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct 233static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
265kmem_cache *slab) 234kmem_cache *slab)
266{ 235{
@@ -273,9 +242,8 @@ kmem_cache *slab)
273 if (!stid) 242 if (!stid)
274 return NULL; 243 return NULL;
275 244
276 if (!idr_pre_get(stateids, GFP_KERNEL)) 245 new_id = idr_alloc(stateids, stid, min_stateid, 0, GFP_KERNEL);
277 goto out_free; 246 if (new_id < 0)
278 if (idr_get_new_above(stateids, stid, min_stateid, &new_id))
279 goto out_free; 247 goto out_free;
280 stid->sc_client = cl; 248 stid->sc_client = cl;
281 stid->sc_type = 0; 249 stid->sc_type = 0;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 62c1ee128aeb..ca05f6dc3544 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -102,7 +102,8 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
102{ 102{
103 if (rp->c_type == RC_REPLBUFF) 103 if (rp->c_type == RC_REPLBUFF)
104 kfree(rp->c_replvec.iov_base); 104 kfree(rp->c_replvec.iov_base);
105 hlist_del(&rp->c_hash); 105 if (!hlist_unhashed(&rp->c_hash))
106 hlist_del(&rp->c_hash);
106 list_del(&rp->c_lru); 107 list_del(&rp->c_lru);
107 --num_drc_entries; 108 --num_drc_entries;
108 kmem_cache_free(drc_slab, rp); 109 kmem_cache_free(drc_slab, rp);
@@ -118,6 +119,10 @@ nfsd_reply_cache_free(struct svc_cacherep *rp)
118 119
119int nfsd_reply_cache_init(void) 120int nfsd_reply_cache_init(void)
120{ 121{
122 INIT_LIST_HEAD(&lru_head);
123 max_drc_entries = nfsd_cache_size_limit();
124 num_drc_entries = 0;
125
121 register_shrinker(&nfsd_reply_cache_shrinker); 126 register_shrinker(&nfsd_reply_cache_shrinker);
122 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 127 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
123 0, 0, NULL); 128 0, 0, NULL);
@@ -128,10 +133,6 @@ int nfsd_reply_cache_init(void)
128 if (!cache_hash) 133 if (!cache_hash)
129 goto out_nomem; 134 goto out_nomem;
130 135
131 INIT_LIST_HEAD(&lru_head);
132 max_drc_entries = nfsd_cache_size_limit();
133 num_drc_entries = 0;
134
135 return 0; 136 return 0;
136out_nomem: 137out_nomem:
137 printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); 138 printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 13a21c8fca49..f33455b4d957 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1090,6 +1090,7 @@ static struct file_system_type nfsd_fs_type = {
1090 .mount = nfsd_mount, 1090 .mount = nfsd_mount,
1091 .kill_sb = nfsd_umount, 1091 .kill_sb = nfsd_umount,
1092}; 1092};
1093MODULE_ALIAS_FS("nfsd");
1093 1094
1094#ifdef CONFIG_PROC_FS 1095#ifdef CONFIG_PROC_FS
1095static int create_proc_exports_entry(void) 1096static int create_proc_exports_entry(void)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2a7eb536de0b..2b2e2396a869 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1013,6 +1013,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1013 int host_err; 1013 int host_err;
1014 int stable = *stablep; 1014 int stable = *stablep;
1015 int use_wgather; 1015 int use_wgather;
1016 loff_t pos = offset;
1016 1017
1017 dentry = file->f_path.dentry; 1018 dentry = file->f_path.dentry;
1018 inode = dentry->d_inode; 1019 inode = dentry->d_inode;
@@ -1025,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1025 1026
1026 /* Write the data. */ 1027 /* Write the data. */
1027 oldfs = get_fs(); set_fs(KERNEL_DS); 1028 oldfs = get_fs(); set_fs(KERNEL_DS);
1028 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1029 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
1029 set_fs(oldfs); 1030 set_fs(oldfs);
1030 if (host_err < 0) 1031 if (host_err < 0)
1031 goto out_nfserr; 1032 goto out_nfserr;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 3c991dc84f2f..c7d1f9f18b09 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1361,6 +1361,7 @@ struct file_system_type nilfs_fs_type = {
1361 .kill_sb = kill_block_super, 1361 .kill_sb = kill_block_super,
1362 .fs_flags = FS_REQUIRES_DEV, 1362 .fs_flags = FS_REQUIRES_DEV,
1363}; 1363};
1364MODULE_ALIAS_FS("nilfs2");
1364 1365
1365static void nilfs_inode_init_once(void *obj) 1366static void nilfs_inode_init_once(void *obj)
1366{ 1367{
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 4a8289f8b16c..82650d52d916 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3079,6 +3079,7 @@ static struct file_system_type ntfs_fs_type = {
3079 .kill_sb = kill_block_super, 3079 .kill_sb = kill_block_super,
3080 .fs_flags = FS_REQUIRES_DEV, 3080 .fs_flags = FS_REQUIRES_DEV,
3081}; 3081};
3082MODULE_ALIAS_FS("ntfs");
3082 3083
3083/* Stable names for the slab caches. */ 3084/* Stable names for the slab caches. */
3084static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache"; 3085static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache";
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 4c5fc8d77dc2..12bafb7265ce 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -640,6 +640,7 @@ static struct file_system_type dlmfs_fs_type = {
640 .mount = dlmfs_mount, 640 .mount = dlmfs_mount,
641 .kill_sb = kill_litter_super, 641 .kill_sb = kill_litter_super,
642}; 642};
643MODULE_ALIAS_FS("ocfs2_dlmfs");
643 644
644static int __init init_dlmfs_fs(void) 645static int __init init_dlmfs_fs(void)
645{ 646{
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 9b6910dec4ba..01b85165552b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1266,6 +1266,7 @@ static struct file_system_type ocfs2_fs_type = {
1266 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1266 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1267 .next = NULL 1267 .next = NULL
1268}; 1268};
1269MODULE_ALIAS_FS("ocfs2");
1269 1270
1270static int ocfs2_check_set_options(struct super_block *sb, 1271static int ocfs2_check_set_options(struct super_block *sb,
1271 struct mount_options *options) 1272 struct mount_options *options)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 25d715c7c87a..d8b0afde2179 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -572,6 +572,7 @@ static struct file_system_type omfs_fs_type = {
572 .kill_sb = kill_block_super, 572 .kill_sb = kill_block_super,
573 .fs_flags = FS_REQUIRES_DEV, 573 .fs_flags = FS_REQUIRES_DEV,
574}; 574};
575MODULE_ALIAS_FS("omfs");
575 576
576static int __init init_omfs_fs(void) 577static int __init init_omfs_fs(void)
577{ 578{
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ae47fa7efb9d..75885ffde44e 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -432,6 +432,7 @@ static struct file_system_type openprom_fs_type = {
432 .mount = openprom_mount, 432 .mount = openprom_mount,
433 .kill_sb = kill_anon_super, 433 .kill_sb = kill_anon_super,
434}; 434};
435MODULE_ALIAS_FS("openpromfs");
435 436
436static void op_inode_init_once(void *data) 437static void op_inode_init_once(void *data)
437{ 438{
diff --git a/fs/pipe.c b/fs/pipe.c
index 64a494cef0a0..2234f3f61f8d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -863,6 +863,9 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
863{ 863{
864 int ret = -ENOENT; 864 int ret = -ENOENT;
865 865
866 if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
867 return -EINVAL;
868
866 mutex_lock(&inode->i_mutex); 869 mutex_lock(&inode->i_mutex);
867 870
868 if (inode->i_pipe) { 871 if (inode->i_pipe) {
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..8b29d2164da6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -9,6 +9,7 @@
9#include <linux/mnt_namespace.h> 9#include <linux/mnt_namespace.h>
10#include <linux/mount.h> 10#include <linux/mount.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/nsproxy.h>
12#include "internal.h" 13#include "internal.h"
13#include "pnode.h" 14#include "pnode.h"
14 15
@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
220int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, 221int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
221 struct mount *source_mnt, struct list_head *tree_list) 222 struct mount *source_mnt, struct list_head *tree_list)
222{ 223{
224 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
223 struct mount *m, *child; 225 struct mount *m, *child;
224 int ret = 0; 226 int ret = 0;
225 struct mount *prev_dest_mnt = dest_mnt; 227 struct mount *prev_dest_mnt = dest_mnt;
@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
237 239
238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 240 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
239 241
242 /* Notice when we are propagating across user namespaces */
243 if (m->mnt_ns->user_ns != user_ns)
244 type |= CL_UNPRIVILEGED;
245
240 child = copy_tree(source, source->mnt.mnt_root, type); 246 child = copy_tree(source, source->mnt.mnt_root, type);
241 if (IS_ERR(child)) { 247 if (IS_ERR(child)) {
242 ret = PTR_ERR(child); 248 ret = PTR_ERR(child);
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..a0493d5ebfbf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -23,6 +23,7 @@
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25#define CL_SHARED_TO_SLAVE 0x20 25#define CL_SHARED_TO_SLAVE 0x20
26#define CL_UNPRIVILEGED 0x40
26 27
27static inline void set_mnt_shared(struct mount *mnt) 28static inline void set_mnt_shared(struct mount *mnt)
28{ 29{
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a86aebc9ba7c..869116c2afbe 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -446,9 +446,10 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
446 446
447struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) 447struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
448{ 448{
449 struct inode *inode = iget_locked(sb, de->low_ino); 449 struct inode *inode = new_inode_pseudo(sb);
450 450
451 if (inode && (inode->i_state & I_NEW)) { 451 if (inode) {
452 inode->i_ino = de->low_ino;
452 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 453 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
453 PROC_I(inode)->pde = de; 454 PROC_I(inode)->pde = de;
454 455
@@ -476,7 +477,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
476 inode->i_fop = de->proc_fops; 477 inode->i_fop = de->proc_fops;
477 } 478 }
478 } 479 }
479 unlock_new_inode(inode);
480 } else 480 } else
481 pde_put(de); 481 pde_put(de);
482 return inode; 482 return inode;
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b7a47196c8c3..66b51c0383da 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -118,7 +118,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
118 struct super_block *sb = inode->i_sb; 118 struct super_block *sb = inode->i_sb;
119 struct proc_inode *ei = PROC_I(inode); 119 struct proc_inode *ei = PROC_I(inode);
120 struct task_struct *task; 120 struct task_struct *task;
121 struct dentry *ns_dentry; 121 struct path ns_path;
122 void *error = ERR_PTR(-EACCES); 122 void *error = ERR_PTR(-EACCES);
123 123
124 task = get_proc_task(inode); 124 task = get_proc_task(inode);
@@ -128,14 +128,14 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
128 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 128 if (!ptrace_may_access(task, PTRACE_MODE_READ))
129 goto out_put_task; 129 goto out_put_task;
130 130
131 ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); 131 ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
132 if (IS_ERR(ns_dentry)) { 132 if (IS_ERR(ns_path.dentry)) {
133 error = ERR_CAST(ns_dentry); 133 error = ERR_CAST(ns_path.dentry);
134 goto out_put_task; 134 goto out_put_task;
135 } 135 }
136 136
137 dput(nd->path.dentry); 137 ns_path.mnt = mntget(nd->path.mnt);
138 nd->path.dentry = ns_dentry; 138 nd_jump_link(nd, &ns_path);
139 error = NULL; 139 error = NULL;
140 140
141out_put_task: 141out_put_task:
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26bac..9c7fab1d23f0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/user_namespace.h>
19#include <linux/mount.h> 20#include <linux/mount.h>
20#include <linux/pid_namespace.h> 21#include <linux/pid_namespace.h>
21#include <linux/parser.h> 22#include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
108 } else { 109 } else {
109 ns = task_active_pid_ns(current); 110 ns = task_active_pid_ns(current);
110 options = data; 111 options = data;
112
113 if (!current_user_ns()->may_mount_proc)
114 return ERR_PTR(-EPERM);
111 } 115 }
112 116
113 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); 117 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 43098bb5723a..2e8caa62da78 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -412,6 +412,7 @@ static struct file_system_type qnx4_fs_type = {
412 .kill_sb = kill_block_super, 412 .kill_sb = kill_block_super,
413 .fs_flags = FS_REQUIRES_DEV, 413 .fs_flags = FS_REQUIRES_DEV,
414}; 414};
415MODULE_ALIAS_FS("qnx4");
415 416
416static int __init init_qnx4_fs(void) 417static int __init init_qnx4_fs(void)
417{ 418{
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 57199a52a351..8d941edfefa1 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -672,6 +672,7 @@ static struct file_system_type qnx6_fs_type = {
672 .kill_sb = kill_block_super, 672 .kill_sb = kill_block_super,
673 .fs_flags = FS_REQUIRES_DEV, 673 .fs_flags = FS_REQUIRES_DEV,
674}; 674};
675MODULE_ALIAS_FS("qnx6");
675 676
676static int __init init_qnx6_fs(void) 677static int __init init_qnx6_fs(void)
677{ 678{
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 05ae3c97f7a5..3e64169ef527 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1439,8 +1439,11 @@ static void __dquot_initialize(struct inode *inode, int type)
1439 * did a write before quota was turned on 1439 * did a write before quota was turned on
1440 */ 1440 */
1441 rsv = inode_get_rsv_space(inode); 1441 rsv = inode_get_rsv_space(inode);
1442 if (unlikely(rsv)) 1442 if (unlikely(rsv)) {
1443 spin_lock(&dq_data_lock);
1443 dquot_resv_space(inode->i_dquot[cnt], rsv); 1444 dquot_resv_space(inode->i_dquot[cnt], rsv);
1445 spin_unlock(&dq_data_lock);
1446 }
1444 } 1447 }
1445 } 1448 }
1446out_err: 1449out_err:
diff --git a/fs/read_write.c b/fs/read_write.c
index a698eff457fb..e6ddc8dceb96 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
17#include <linux/splice.h> 17#include <linux/splice.h>
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include "read_write.h" 19#include "read_write.h"
20#include "internal.h"
20 21
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/unistd.h> 23#include <asm/unistd.h>
@@ -417,6 +418,33 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
417 418
418EXPORT_SYMBOL(do_sync_write); 419EXPORT_SYMBOL(do_sync_write);
419 420
421ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
422{
423 mm_segment_t old_fs;
424 const char __user *p;
425 ssize_t ret;
426
427 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
428 return -EINVAL;
429
430 old_fs = get_fs();
431 set_fs(get_ds());
432 p = (__force const char __user *)buf;
433 if (count > MAX_RW_COUNT)
434 count = MAX_RW_COUNT;
435 if (file->f_op->write)
436 ret = file->f_op->write(file, p, count, pos);
437 else
438 ret = do_sync_write(file, p, count, pos);
439 set_fs(old_fs);
440 if (ret > 0) {
441 fsnotify_modify(file);
442 add_wchar(current, ret);
443 }
444 inc_syscw(current);
445 return ret;
446}
447
420ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 448ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
421{ 449{
422 ssize_t ret; 450 ssize_t ret;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 418bdc3a57da..f8a23c3078f8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1147,8 +1147,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1147 "on filesystem root."); 1147 "on filesystem root.");
1148 return 0; 1148 return 0;
1149 } 1149 }
1150 qf_names[qtype] = 1150 qf_names[qtype] = kstrdup(arg, GFP_KERNEL);
1151 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1152 if (!qf_names[qtype]) { 1151 if (!qf_names[qtype]) {
1153 reiserfs_warning(s, "reiserfs-2502", 1152 reiserfs_warning(s, "reiserfs-2502",
1154 "not enough memory " 1153 "not enough memory "
@@ -1156,7 +1155,6 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1156 "quotafile name."); 1155 "quotafile name.");
1157 return 0; 1156 return 0;
1158 } 1157 }
1159 strcpy(qf_names[qtype], arg);
1160 if (qtype == USRQUOTA) 1158 if (qtype == USRQUOTA)
1161 *mount_options |= 1 << REISERFS_USRQUOTA; 1159 *mount_options |= 1 << REISERFS_USRQUOTA;
1162 else 1160 else
@@ -2434,6 +2432,7 @@ struct file_system_type reiserfs_fs_type = {
2434 .kill_sb = reiserfs_kill_sb, 2432 .kill_sb = reiserfs_kill_sb,
2435 .fs_flags = FS_REQUIRES_DEV, 2433 .fs_flags = FS_REQUIRES_DEV,
2436}; 2434};
2435MODULE_ALIAS_FS("reiserfs");
2437 2436
2438MODULE_DESCRIPTION("ReiserFS journaled filesystem"); 2437MODULE_DESCRIPTION("ReiserFS journaled filesystem");
2439MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>"); 2438MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>");
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 7e8d3a80bdab..15cbc41ee365 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -599,6 +599,7 @@ static struct file_system_type romfs_fs_type = {
599 .kill_sb = romfs_kill_sb, 599 .kill_sb = romfs_kill_sb,
600 .fs_flags = FS_REQUIRES_DEV, 600 .fs_flags = FS_REQUIRES_DEV,
601}; 601};
602MODULE_ALIAS_FS("romfs");
602 603
603/* 604/*
604 * inode storage initialiser 605 * inode storage initialiser
diff --git a/fs/splice.c b/fs/splice.c
index 718bd0056384..29e394e49ddd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h> 33#include <linux/socket.h>
34#include "internal.h"
34 35
35/* 36/*
36 * Attempt to steal a page from a pipe buffer. This should perhaps go into 37 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1048{ 1049{
1049 int ret; 1050 int ret;
1050 void *data; 1051 void *data;
1052 loff_t tmp = sd->pos;
1051 1053
1052 data = buf->ops->map(pipe, buf, 0); 1054 data = buf->ops->map(pipe, buf, 0);
1053 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); 1055 ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
1054 buf->ops->unmap(pipe, buf, data); 1056 buf->ops->unmap(pipe, buf, data);
1055 1057
1056 return ret; 1058 return ret;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 260e3928d4f5..60553a9053ca 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -489,6 +489,7 @@ static struct file_system_type squashfs_fs_type = {
489 .kill_sb = kill_block_super, 489 .kill_sb = kill_block_super,
490 .fs_flags = FS_REQUIRES_DEV 490 .fs_flags = FS_REQUIRES_DEV
491}; 491};
492MODULE_ALIAS_FS("squashfs");
492 493
493static const struct super_operations squashfs_super_ops = { 494static const struct super_operations squashfs_super_ops = {
494 .alloc_inode = squashfs_alloc_inode, 495 .alloc_inode = squashfs_alloc_inode,
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2fbdff6be25c..e14512678c9b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1020,6 +1020,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1020 ino = parent_sd->s_ino; 1020 ino = parent_sd->s_ino;
1021 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) 1021 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
1022 filp->f_pos++; 1022 filp->f_pos++;
1023 else
1024 return 0;
1023 } 1025 }
1024 if (filp->f_pos == 1) { 1026 if (filp->f_pos == 1) {
1025 if (parent_sd->s_parent) 1027 if (parent_sd->s_parent)
@@ -1028,6 +1030,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1028 ino = parent_sd->s_ino; 1030 ino = parent_sd->s_ino;
1029 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) 1031 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
1030 filp->f_pos++; 1032 filp->f_pos++;
1033 else
1034 return 0;
1031 } 1035 }
1032 mutex_lock(&sysfs_mutex); 1036 mutex_lock(&sysfs_mutex);
1033 for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); 1037 for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
@@ -1058,10 +1062,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1058 return 0; 1062 return 0;
1059} 1063}
1060 1064
1065static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
1066{
1067 struct inode *inode = file_inode(file);
1068 loff_t ret;
1069
1070 mutex_lock(&inode->i_mutex);
1071 ret = generic_file_llseek(file, offset, whence);
1072 mutex_unlock(&inode->i_mutex);
1073
1074 return ret;
1075}
1061 1076
1062const struct file_operations sysfs_dir_operations = { 1077const struct file_operations sysfs_dir_operations = {
1063 .read = generic_read_dir, 1078 .read = generic_read_dir,
1064 .readdir = sysfs_readdir, 1079 .readdir = sysfs_readdir,
1065 .release = sysfs_dir_release, 1080 .release = sysfs_dir_release,
1066 .llseek = generic_file_llseek, 1081 .llseek = sysfs_dir_llseek,
1067}; 1082};
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5ec733..afd83273e6ce 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/magic.h> 20#include <linux/magic.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/user_namespace.h>
22 23
23#include "sysfs.h" 24#include "sysfs.h"
24 25
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
111 struct super_block *sb; 112 struct super_block *sb;
112 int error; 113 int error;
113 114
115 if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
116 return ERR_PTR(-EPERM);
117
114 info = kzalloc(sizeof(*info), GFP_KERNEL); 118 info = kzalloc(sizeof(*info), GFP_KERNEL);
115 if (!info) 119 if (!info)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a38e87bdd78d..d0c6a007ce83 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -545,6 +545,7 @@ static struct file_system_type sysv_fs_type = {
545 .kill_sb = kill_block_super, 545 .kill_sb = kill_block_super,
546 .fs_flags = FS_REQUIRES_DEV, 546 .fs_flags = FS_REQUIRES_DEV,
547}; 547};
548MODULE_ALIAS_FS("sysv");
548 549
549static struct file_system_type v7_fs_type = { 550static struct file_system_type v7_fs_type = {
550 .owner = THIS_MODULE, 551 .owner = THIS_MODULE,
@@ -553,6 +554,8 @@ static struct file_system_type v7_fs_type = {
553 .kill_sb = kill_block_super, 554 .kill_sb = kill_block_super,
554 .fs_flags = FS_REQUIRES_DEV, 555 .fs_flags = FS_REQUIRES_DEV,
555}; 556};
557MODULE_ALIAS_FS("v7");
558MODULE_ALIAS("v7");
556 559
557static int __init init_sysv_fs(void) 560static int __init init_sysv_fs(void)
558{ 561{
@@ -586,5 +589,4 @@ static void __exit exit_sysv_fs(void)
586 589
587module_init(init_sysv_fs) 590module_init(init_sysv_fs)
588module_exit(exit_sysv_fs) 591module_exit(exit_sysv_fs)
589MODULE_ALIAS("v7");
590MODULE_LICENSE("GPL"); 592MODULE_LICENSE("GPL");
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ddc0f6ae65e9..ac838b844936 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2174,6 +2174,7 @@ static struct file_system_type ubifs_fs_type = {
2174 .mount = ubifs_mount, 2174 .mount = ubifs_mount,
2175 .kill_sb = kill_ubifs_super, 2175 .kill_sb = kill_ubifs_super,
2176}; 2176};
2177MODULE_ALIAS_FS("ubifs");
2177 2178
2178/* 2179/*
2179 * Inode slab cache constructor. 2180 * Inode slab cache constructor.
diff --git a/fs/udf/super.c b/fs/udf/super.c
index bc5b30a819e8..9ac4057a86c9 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -118,6 +118,7 @@ static struct file_system_type udf_fstype = {
118 .kill_sb = kill_block_super, 118 .kill_sb = kill_block_super,
119 .fs_flags = FS_REQUIRES_DEV, 119 .fs_flags = FS_REQUIRES_DEV,
120}; 120};
121MODULE_ALIAS_FS("udf");
121 122
122static struct kmem_cache *udf_inode_cachep; 123static struct kmem_cache *udf_inode_cachep;
123 124
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index dc8e3a861d0f..329f2f53b7ed 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1500,6 +1500,7 @@ static struct file_system_type ufs_fs_type = {
1500 .kill_sb = kill_block_super, 1500 .kill_sb = kill_block_super,
1501 .fs_flags = FS_REQUIRES_DEV, 1501 .fs_flags = FS_REQUIRES_DEV,
1502}; 1502};
1503MODULE_ALIAS_FS("ufs");
1503 1504
1504static int __init init_ufs_fs(void) 1505static int __init init_ufs_fs(void)
1505{ 1506{
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4e8f0df82d02..8459b5d8cb71 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1334,6 +1334,12 @@ _xfs_buf_ioapply(
1334 int size; 1334 int size;
1335 int i; 1335 int i;
1336 1336
1337 /*
1338 * Make sure we capture only current IO errors rather than stale errors
1339 * left over from previous use of the buffer (e.g. failed readahead).
1340 */
1341 bp->b_error = 0;
1342
1337 if (bp->b_flags & XBF_WRITE) { 1343 if (bp->b_flags & XBF_WRITE) {
1338 if (bp->b_flags & XBF_SYNCIO) 1344 if (bp->b_flags & XBF_SYNCIO)
1339 rw = WRITE_SYNC; 1345 rw = WRITE_SYNC;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 912d83d8860a..5a30dd899d2b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -325,7 +325,7 @@ xfs_iomap_eof_want_preallocate(
325 * rather than falling short due to things like stripe unit/width alignment of 325 * rather than falling short due to things like stripe unit/width alignment of
326 * real extents. 326 * real extents.
327 */ 327 */
328STATIC int 328STATIC xfs_fsblock_t
329xfs_iomap_eof_prealloc_initial_size( 329xfs_iomap_eof_prealloc_initial_size(
330 struct xfs_mount *mp, 330 struct xfs_mount *mp,
331 struct xfs_inode *ip, 331 struct xfs_inode *ip,
@@ -413,7 +413,7 @@ xfs_iomap_prealloc_size(
413 * have a large file on a small filesystem and the above 413 * have a large file on a small filesystem and the above
414 * lowspace thresholds are smaller than MAXEXTLEN. 414 * lowspace thresholds are smaller than MAXEXTLEN.
415 */ 415 */
416 while (alloc_blocks >= freesp) 416 while (alloc_blocks && alloc_blocks >= freesp)
417 alloc_blocks >>= 4; 417 alloc_blocks >>= 4;
418 } 418 }
419 419
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c407121873b4..ea341cea68cb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1561,6 +1561,7 @@ static struct file_system_type xfs_fs_type = {
1561 .kill_sb = kill_block_super, 1561 .kill_sb = kill_block_super,
1562 .fs_flags = FS_REQUIRES_DEV, 1562 .fs_flags = FS_REQUIRES_DEV,
1563}; 1563};
1564MODULE_ALIAS_FS("xfs");
1564 1565
1565STATIC int __init 1566STATIC int __init
1566xfs_init_zones(void) 1567xfs_init_zones(void)