aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2014-12-08 20:39:29 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2014-12-08 20:39:29 -0500
commitba00410b8131b23edfb0e09f8b6dd26c8eb621fb (patch)
treec08504e4d2fa51ac91cef544f336d0169806c49f /fs
parent8ce74dd6057832618957fc2cbd38fa959c3a0a6c (diff)
parentaa583096d9767892983332e7c1a984bd17e3cd39 (diff)
Merge branch 'iov_iter' into for-next
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/btrfs/ctree.c14
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c43
-rw-r--r--fs/btrfs/extent-tree.c18
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/locking.c24
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/buffer.c38
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/ext3/super.c7
-rw-r--r--fs/ext4/extents.c9
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/namei.c28
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c17
-rw-r--r--fs/isofs/inode.c42
-rw-r--r--fs/jbd/revoke.c7
-rw-r--r--fs/jbd2/revoke.c10
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c14
-rw-r--r--fs/nfs/delegation.c25
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/direct.c1
-rw-r--r--fs/nfs/filelayout/filelayout.c3
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/netns.h1
-rw-r--r--fs/nfs/nfs4proc.c95
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/notify/fsnotify.c36
-rw-r--r--fs/notify/fsnotify.h4
-rw-r--r--fs/notify/inode_mark.c25
-rw-r--r--fs/notify/mark.c36
-rw-r--r--fs/notify/vfsmount_mark.c8
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/overlayfs/Kconfig2
-rw-r--r--fs/overlayfs/Makefile4
-rw-r--r--fs/overlayfs/dir.c31
-rw-r--r--fs/overlayfs/inode.c27
-rw-r--r--fs/overlayfs/readdir.c41
-rw-r--r--fs/overlayfs/super.c61
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c72
-rw-r--r--fs/xfs/xfs_itable.c250
-rw-r--r--fs/xfs/xfs_itable.h16
54 files changed, 558 insertions, 504 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 34a1b9dea6dd..da0bbb456d3f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/
104obj-$(CONFIG_AUTOFS4_FS) += autofs4/ 104obj-$(CONFIG_AUTOFS4_FS) += autofs4/
105obj-$(CONFIG_ADFS_FS) += adfs/ 105obj-$(CONFIG_ADFS_FS) += adfs/
106obj-$(CONFIG_FUSE_FS) += fuse/ 106obj-$(CONFIG_FUSE_FS) += fuse/
107obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ 107obj-$(CONFIG_OVERLAY_FS) += overlayfs/
108obj-$(CONFIG_UDF_FS) += udf/ 108obj-$(CONFIG_UDF_FS) += udf/
109obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 109obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
110obj-$(CONFIG_OMFS_FS) += omfs/ 110obj-$(CONFIG_OMFS_FS) += omfs/
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 19bc6162fb8e..150822ee0a0b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
80{ 80{
81 int i; 81 int i;
82 82
83#ifdef CONFIG_DEBUG_LOCK_ALLOC
84 /* lockdep really cares that we take all of these spinlocks
85 * in the right order. If any of the locks in the path are not
86 * currently blocking, it is going to complain. So, make really
87 * really sure by forcing the path to blocking before we clear
88 * the path blocking.
89 */
90 if (held) { 83 if (held) {
91 btrfs_set_lock_blocking_rw(held, held_rw); 84 btrfs_set_lock_blocking_rw(held, held_rw);
92 if (held_rw == BTRFS_WRITE_LOCK) 85 if (held_rw == BTRFS_WRITE_LOCK)
@@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
95 held_rw = BTRFS_READ_LOCK_BLOCKING; 88 held_rw = BTRFS_READ_LOCK_BLOCKING;
96 } 89 }
97 btrfs_set_path_blocking(p); 90 btrfs_set_path_blocking(p);
98#endif
99 91
100 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 92 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
101 if (p->nodes[i] && p->locks[i]) { 93 if (p->nodes[i] && p->locks[i]) {
@@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
107 } 99 }
108 } 100 }
109 101
110#ifdef CONFIG_DEBUG_LOCK_ALLOC
111 if (held) 102 if (held)
112 btrfs_clear_lock_blocking_rw(held, held_rw); 103 btrfs_clear_lock_blocking_rw(held, held_rw);
113#endif
114} 104}
115 105
116/* this also releases the path */ 106/* this also releases the path */
@@ -2893,7 +2883,7 @@ cow_done:
2893 } 2883 }
2894 p->locks[level] = BTRFS_WRITE_LOCK; 2884 p->locks[level] = BTRFS_WRITE_LOCK;
2895 } else { 2885 } else {
2896 err = btrfs_try_tree_read_lock(b); 2886 err = btrfs_tree_read_lock_atomic(b);
2897 if (!err) { 2887 if (!err) {
2898 btrfs_set_path_blocking(p); 2888 btrfs_set_path_blocking(p);
2899 btrfs_tree_read_lock(b); 2889 btrfs_tree_read_lock(b);
@@ -3025,7 +3015,7 @@ again:
3025 } 3015 }
3026 3016
3027 level = btrfs_header_level(b); 3017 level = btrfs_header_level(b);
3028 err = btrfs_try_tree_read_lock(b); 3018 err = btrfs_tree_read_lock_atomic(b);
3029 if (!err) { 3019 if (!err) {
3030 btrfs_set_path_blocking(p); 3020 btrfs_set_path_blocking(p);
3031 btrfs_tree_read_lock(b); 3021 btrfs_tree_read_lock(b);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d557264ee974..fe69edda11fb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3276,7 +3276,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3276 struct btrfs_root *root, unsigned long count); 3276 struct btrfs_root *root, unsigned long count);
3277int btrfs_async_run_delayed_refs(struct btrfs_root *root, 3277int btrfs_async_run_delayed_refs(struct btrfs_root *root,
3278 unsigned long count, int wait); 3278 unsigned long count, int wait);
3279int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 3279int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
3280int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 3280int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
3281 struct btrfs_root *root, u64 bytenr, 3281 struct btrfs_root *root, u64 bytenr,
3282 u64 offset, int metadata, u64 *refs, u64 *flags); 3282 u64 offset, int metadata, u64 *refs, u64 *flags);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1ad0f47ac850..1bf9f897065d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3817,19 +3817,19 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3817 struct btrfs_super_block *sb = fs_info->super_copy; 3817 struct btrfs_super_block *sb = fs_info->super_copy;
3818 int ret = 0; 3818 int ret = 0;
3819 3819
3820 if (sb->root_level > BTRFS_MAX_LEVEL) { 3820 if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
3821 printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", 3821 printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
3822 sb->root_level, BTRFS_MAX_LEVEL); 3822 btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
3823 ret = -EINVAL; 3823 ret = -EINVAL;
3824 } 3824 }
3825 if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { 3825 if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
3826 printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", 3826 printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n",
3827 sb->chunk_root_level, BTRFS_MAX_LEVEL); 3827 btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
3828 ret = -EINVAL; 3828 ret = -EINVAL;
3829 } 3829 }
3830 if (sb->log_root_level > BTRFS_MAX_LEVEL) { 3830 if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
3831 printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", 3831 printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n",
3832 sb->log_root_level, BTRFS_MAX_LEVEL); 3832 btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
3833 ret = -EINVAL; 3833 ret = -EINVAL;
3834 } 3834 }
3835 3835
@@ -3837,15 +3837,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3837 * The common minimum, we don't know if we can trust the nodesize/sectorsize 3837 * The common minimum, we don't know if we can trust the nodesize/sectorsize
3838 * items yet, they'll be verified later. Issue just a warning. 3838 * items yet, they'll be verified later. Issue just a warning.
3839 */ 3839 */
3840 if (!IS_ALIGNED(sb->root, 4096)) 3840 if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
3841 printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", 3841 printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
3842 sb->root); 3842 sb->root);
3843 if (!IS_ALIGNED(sb->chunk_root, 4096)) 3843 if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
3844 printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", 3844 printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
3845 sb->chunk_root); 3845 sb->chunk_root);
3846 if (!IS_ALIGNED(sb->log_root, 4096)) 3846 if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
3847 printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", 3847 printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
3848 sb->log_root); 3848 btrfs_super_log_root(sb));
3849 3849
3850 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { 3850 if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
3851 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", 3851 printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
@@ -3857,13 +3857,13 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3857 * Hint to catch really bogus numbers, bitflips or so, more exact checks are 3857 * Hint to catch really bogus numbers, bitflips or so, more exact checks are
3858 * done later 3858 * done later
3859 */ 3859 */
3860 if (sb->num_devices > (1UL << 31)) 3860 if (btrfs_super_num_devices(sb) > (1UL << 31))
3861 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", 3861 printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
3862 sb->num_devices); 3862 btrfs_super_num_devices(sb));
3863 3863
3864 if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { 3864 if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
3865 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", 3865 printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
3866 sb->bytenr, BTRFS_SUPER_INFO_OFFSET); 3866 btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
3867 ret = -EINVAL; 3867 ret = -EINVAL;
3868 } 3868 }
3869 3869
@@ -3871,14 +3871,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3871 * The generation is a global counter, we'll trust it more than the others 3871 * The generation is a global counter, we'll trust it more than the others
3872 * but it's still possible that it's the one that's wrong. 3872 * but it's still possible that it's the one that's wrong.
3873 */ 3873 */
3874 if (sb->generation < sb->chunk_root_generation) 3874 if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
3875 printk(KERN_WARNING 3875 printk(KERN_WARNING
3876 "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", 3876 "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n",
3877 sb->generation, sb->chunk_root_generation); 3877 btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb));
3878 if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) 3878 if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
3879 && btrfs_super_cache_generation(sb) != (u64)-1)
3879 printk(KERN_WARNING 3880 printk(KERN_WARNING
3880 "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", 3881 "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n",
3881 sb->generation, sb->cache_generation); 3882 btrfs_super_generation(sb), btrfs_super_cache_generation(sb));
3882 3883
3883 return ret; 3884 return ret;
3884} 3885}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d56589571012..47c1ba141082 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -710,8 +710,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
710 rcu_read_unlock(); 710 rcu_read_unlock();
711} 711}
712 712
713/* simple helper to search for an existing extent at a given offset */ 713/* simple helper to search for an existing data extent at a given offset */
714int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 714int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
715{ 715{
716 int ret; 716 int ret;
717 struct btrfs_key key; 717 struct btrfs_key key;
@@ -726,12 +726,6 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
726 key.type = BTRFS_EXTENT_ITEM_KEY; 726 key.type = BTRFS_EXTENT_ITEM_KEY;
727 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 727 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
728 0, 0); 728 0, 0);
729 if (ret > 0) {
730 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
731 if (key.objectid == start &&
732 key.type == BTRFS_METADATA_ITEM_KEY)
733 ret = 0;
734 }
735 btrfs_free_path(path); 729 btrfs_free_path(path);
736 return ret; 730 return ret;
737} 731}
@@ -786,7 +780,6 @@ search_again:
786 else 780 else
787 key.type = BTRFS_EXTENT_ITEM_KEY; 781 key.type = BTRFS_EXTENT_ITEM_KEY;
788 782
789again:
790 ret = btrfs_search_slot(trans, root->fs_info->extent_root, 783 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
791 &key, path, 0, 0); 784 &key, path, 0, 0);
792 if (ret < 0) 785 if (ret < 0)
@@ -802,13 +795,6 @@ again:
802 key.offset == root->nodesize) 795 key.offset == root->nodesize)
803 ret = 0; 796 ret = 0;
804 } 797 }
805 if (ret) {
806 key.objectid = bytenr;
807 key.type = BTRFS_EXTENT_ITEM_KEY;
808 key.offset = root->nodesize;
809 btrfs_release_path(path);
810 goto again;
811 }
812 } 798 }
813 799
814 if (ret == 0) { 800 if (ret == 0) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 783a94355efd..84a2d1868271 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
413 ret = 0; 413 ret = 0;
414fail: 414fail:
415 while (ret < 0 && !list_empty(&tmplist)) { 415 while (ret < 0 && !list_empty(&tmplist)) {
416 sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); 416 sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
417 list_del(&sums->list); 417 list_del(&sums->list);
418 kfree(sums); 418 kfree(sums);
419 } 419 }
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5665d2149249..f8229ef1b46d 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -128,6 +128,26 @@ again:
128} 128}
129 129
130/* 130/*
131 * take a spinning read lock.
132 * returns 1 if we get the read lock and 0 if we don't
133 * this won't wait for blocking writers
134 */
135int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
136{
137 if (atomic_read(&eb->blocking_writers))
138 return 0;
139
140 read_lock(&eb->lock);
141 if (atomic_read(&eb->blocking_writers)) {
142 read_unlock(&eb->lock);
143 return 0;
144 }
145 atomic_inc(&eb->read_locks);
146 atomic_inc(&eb->spinning_readers);
147 return 1;
148}
149
150/*
131 * returns 1 if we get the read lock and 0 if we don't 151 * returns 1 if we get the read lock and 0 if we don't
132 * this won't wait for blocking writers 152 * this won't wait for blocking writers
133 */ 153 */
@@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
158 atomic_read(&eb->blocking_readers)) 178 atomic_read(&eb->blocking_readers))
159 return 0; 179 return 0;
160 180
161 if (!write_trylock(&eb->lock)) 181 write_lock(&eb->lock);
162 return 0;
163
164 if (atomic_read(&eb->blocking_writers) || 182 if (atomic_read(&eb->blocking_writers) ||
165 atomic_read(&eb->blocking_readers)) { 183 atomic_read(&eb->blocking_readers)) {
166 write_unlock(&eb->lock); 184 write_unlock(&eb->lock);
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index b81e0e9a4894..c44a9d5f5362 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
35void btrfs_assert_tree_locked(struct extent_buffer *eb); 35void btrfs_assert_tree_locked(struct extent_buffer *eb);
36int btrfs_try_tree_read_lock(struct extent_buffer *eb); 36int btrfs_try_tree_read_lock(struct extent_buffer *eb);
37int btrfs_try_tree_write_lock(struct extent_buffer *eb); 37int btrfs_try_tree_write_lock(struct extent_buffer *eb);
38int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
39
38 40
39static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) 41static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
40{ 42{
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a2b97ef10317..54bd91ece35b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2151,6 +2151,7 @@ static void __exit exit_btrfs_fs(void)
2151 extent_map_exit(); 2151 extent_map_exit();
2152 extent_io_exit(); 2152 extent_io_exit();
2153 btrfs_interface_exit(); 2153 btrfs_interface_exit();
2154 btrfs_end_io_wq_exit();
2154 unregister_filesystem(&btrfs_fs_type); 2155 unregister_filesystem(&btrfs_fs_type);
2155 btrfs_exit_sysfs(); 2156 btrfs_exit_sysfs();
2156 btrfs_cleanup_fs_uuids(); 2157 btrfs_cleanup_fs_uuids();
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1475979e5718..286213cec861 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -672,7 +672,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
672 * is this extent already allocated in the extent 672 * is this extent already allocated in the extent
673 * allocation tree? If so, just add a reference 673 * allocation tree? If so, just add a reference
674 */ 674 */
675 ret = btrfs_lookup_extent(root, ins.objectid, 675 ret = btrfs_lookup_data_extent(root, ins.objectid,
676 ins.offset); 676 ins.offset);
677 if (ret == 0) { 677 if (ret == 0) {
678 ret = btrfs_inc_extent_ref(trans, root, 678 ret = btrfs_inc_extent_ref(trans, root,
diff --git a/fs/buffer.c b/fs/buffer.c
index 6c48f20eddd4..20805db2c987 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -128,21 +128,15 @@ __clear_page_buffers(struct page *page)
128 page_cache_release(page); 128 page_cache_release(page);
129} 129}
130 130
131 131static void buffer_io_error(struct buffer_head *bh, char *msg)
132static int quiet_error(struct buffer_head *bh)
133{
134 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
135 return 0;
136 return 1;
137}
138
139
140static void buffer_io_error(struct buffer_head *bh)
141{ 132{
142 char b[BDEVNAME_SIZE]; 133 char b[BDEVNAME_SIZE];
143 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", 134
135 if (!test_bit(BH_Quiet, &bh->b_state))
136 printk_ratelimited(KERN_ERR
137 "Buffer I/O error on dev %s, logical block %llu%s\n",
144 bdevname(bh->b_bdev, b), 138 bdevname(bh->b_bdev, b),
145 (unsigned long long)bh->b_blocknr); 139 (unsigned long long)bh->b_blocknr, msg);
146} 140}
147 141
148/* 142/*
@@ -177,17 +171,10 @@ EXPORT_SYMBOL(end_buffer_read_sync);
177 171
178void end_buffer_write_sync(struct buffer_head *bh, int uptodate) 172void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
179{ 173{
180 char b[BDEVNAME_SIZE];
181
182 if (uptodate) { 174 if (uptodate) {
183 set_buffer_uptodate(bh); 175 set_buffer_uptodate(bh);
184 } else { 176 } else {
185 if (!quiet_error(bh)) { 177 buffer_io_error(bh, ", lost sync page write");
186 buffer_io_error(bh);
187 printk(KERN_WARNING "lost page write due to "
188 "I/O error on %s\n",
189 bdevname(bh->b_bdev, b));
190 }
191 set_buffer_write_io_error(bh); 178 set_buffer_write_io_error(bh);
192 clear_buffer_uptodate(bh); 179 clear_buffer_uptodate(bh);
193 } 180 }
@@ -304,8 +291,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
304 set_buffer_uptodate(bh); 291 set_buffer_uptodate(bh);
305 } else { 292 } else {
306 clear_buffer_uptodate(bh); 293 clear_buffer_uptodate(bh);
307 if (!quiet_error(bh)) 294 buffer_io_error(bh, ", async page read");
308 buffer_io_error(bh);
309 SetPageError(page); 295 SetPageError(page);
310 } 296 }
311 297
@@ -353,7 +339,6 @@ still_busy:
353 */ 339 */
354void end_buffer_async_write(struct buffer_head *bh, int uptodate) 340void end_buffer_async_write(struct buffer_head *bh, int uptodate)
355{ 341{
356 char b[BDEVNAME_SIZE];
357 unsigned long flags; 342 unsigned long flags;
358 struct buffer_head *first; 343 struct buffer_head *first;
359 struct buffer_head *tmp; 344 struct buffer_head *tmp;
@@ -365,12 +350,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
365 if (uptodate) { 350 if (uptodate) {
366 set_buffer_uptodate(bh); 351 set_buffer_uptodate(bh);
367 } else { 352 } else {
368 if (!quiet_error(bh)) { 353 buffer_io_error(bh, ", lost async page write");
369 buffer_io_error(bh);
370 printk(KERN_WARNING "lost page write due to "
371 "I/O error on %s\n",
372 bdevname(bh->b_bdev, b));
373 }
374 set_bit(AS_EIO, &page->mapping->flags); 354 set_bit(AS_EIO, &page->mapping->flags);
375 set_buffer_write_io_error(bh); 355 set_buffer_write_io_error(bh);
376 clear_buffer_uptodate(bh); 356 clear_buffer_uptodate(bh);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 659f2ea9e6f7..cefca661464b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2638 2638
2639 for (i = 0; i < CEPH_CAP_BITS; i++) 2639 for (i = 0; i < CEPH_CAP_BITS; i++)
2640 if ((dirty & (1 << i)) && 2640 if ((dirty & (1 << i)) &&
2641 flush_tid == ci->i_cap_flush_tid[i]) 2641 (u16)flush_tid == ci->i_cap_flush_tid[i])
2642 cleaned |= 1 << i; 2642 cleaned |= 1 << i;
2643 2643
2644 dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," 2644 dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
diff --git a/fs/dcache.c b/fs/dcache.c
index a6c5d7e9d622..e368d4f412f9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -777,6 +777,7 @@ restart:
777 struct dentry *parent = lock_parent(dentry); 777 struct dentry *parent = lock_parent(dentry);
778 if (likely(!dentry->d_lockref.count)) { 778 if (likely(!dentry->d_lockref.count)) {
779 __dentry_kill(dentry); 779 __dentry_kill(dentry);
780 dput(parent);
780 goto restart; 781 goto restart;
781 } 782 }
782 if (parent) 783 if (parent)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7015db0bafd1..eb742d0e67ff 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1354,13 +1354,6 @@ set_qf_format:
1354 "not specified."); 1354 "not specified.");
1355 return 0; 1355 return 0;
1356 } 1356 }
1357 } else {
1358 if (sbi->s_jquota_fmt) {
1359 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1360 "specified with no journaling "
1361 "enabled.");
1362 return 0;
1363 }
1364 } 1357 }
1365#endif 1358#endif
1366 return 1; 1359 return 1;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 37043d0b2be8..0b16fb4c06d3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3603,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3603 } 3603 }
3604 } 3604 }
3605 3605
3606 allocated = ext4_split_extent(handle, inode, ppath, 3606 err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3607 &split_map, split_flag, flags); 3607 flags);
3608 if (allocated < 0) 3608 if (err > 0)
3609 err = allocated; 3609 err = 0;
3610
3611out: 3610out:
3612 /* If we have gotten a failure, don't zero out status tree */ 3611 /* If we have gotten a failure, don't zero out status tree */
3613 if (!err) 3612 if (!err)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index aca7b24a4432..8131be8c0af3 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -137,10 +137,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
137 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); 137 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
138 } 138 }
139 139
140 iocb->private = &overwrite;
140 if (o_direct) { 141 if (o_direct) {
141 blk_start_plug(&plug); 142 blk_start_plug(&plug);
142 143
143 iocb->private = &overwrite;
144 144
145 /* check whether we do a DIO overwrite or not */ 145 /* check whether we do a DIO overwrite or not */
146 if (ext4_should_dioread_nolock(inode) && !aio_mutex && 146 if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8012a5daf401..ac644c31ca67 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -887,6 +887,10 @@ got:
887 struct buffer_head *block_bitmap_bh; 887 struct buffer_head *block_bitmap_bh;
888 888
889 block_bitmap_bh = ext4_read_block_bitmap(sb, group); 889 block_bitmap_bh = ext4_read_block_bitmap(sb, group);
890 if (!block_bitmap_bh) {
891 err = -EIO;
892 goto out;
893 }
890 BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); 894 BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
891 err = ext4_journal_get_write_access(handle, block_bitmap_bh); 895 err = ext4_journal_get_write_access(handle, block_bitmap_bh);
892 if (err) { 896 if (err) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e9777f93cf05..3356ab5395f4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4959,7 +4959,12 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4959 if (val) 4959 if (val)
4960 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4960 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4961 else { 4961 else {
4962 jbd2_journal_flush(journal); 4962 err = jbd2_journal_flush(journal);
4963 if (err < 0) {
4964 jbd2_journal_unlock_updates(journal);
4965 ext4_inode_resume_unlocked_dio(inode);
4966 return err;
4967 }
4963 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4968 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4964 } 4969 }
4965 ext4_set_aops(inode); 4970 ext4_set_aops(inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 123798c5ac31..426211882f72 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1816,31 +1816,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1816 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; 1816 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1817 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; 1817 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1818 ext4fs_dirhash(name, namelen, &hinfo); 1818 ext4fs_dirhash(name, namelen, &hinfo);
1819 memset(frames, 0, sizeof(frames));
1819 frame = frames; 1820 frame = frames;
1820 frame->entries = entries; 1821 frame->entries = entries;
1821 frame->at = entries; 1822 frame->at = entries;
1822 frame->bh = bh; 1823 frame->bh = bh;
1823 bh = bh2; 1824 bh = bh2;
1824 1825
1825 ext4_handle_dirty_dx_node(handle, dir, frame->bh); 1826 retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1826 ext4_handle_dirty_dirent_node(handle, dir, bh); 1827 if (retval)
1828 goto out_frames;
1829 retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
1830 if (retval)
1831 goto out_frames;
1827 1832
1828 de = do_split(handle,dir, &bh, frame, &hinfo); 1833 de = do_split(handle,dir, &bh, frame, &hinfo);
1829 if (IS_ERR(de)) { 1834 if (IS_ERR(de)) {
1830 /* 1835 retval = PTR_ERR(de);
1831 * Even if the block split failed, we have to properly write 1836 goto out_frames;
1832 * out all the changes we did so far. Otherwise we can end up
1833 * with corrupted filesystem.
1834 */
1835 ext4_mark_inode_dirty(handle, dir);
1836 dx_release(frames);
1837 return PTR_ERR(de);
1838 } 1837 }
1839 dx_release(frames); 1838 dx_release(frames);
1840 1839
1841 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 1840 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1842 brelse(bh); 1841 brelse(bh);
1843 return retval; 1842 return retval;
1843out_frames:
1844 /*
1845 * Even if the block split failed, we have to properly write
1846 * out all the changes we did so far. Otherwise we can end up
1847 * with corrupted filesystem.
1848 */
1849 ext4_mark_inode_dirty(handle, dir);
1850 dx_release(frames);
1851 return retval;
1844} 1852}
1845 1853
1846/* 1854/*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f298c60f907d..ca4588388fc3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1081,7 +1081,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
1081 break; 1081 break;
1082 1082
1083 if (meta_bg == 0) 1083 if (meta_bg == 0)
1084 backup_block = group * bpg + blk_off; 1084 backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
1085 else 1085 else
1086 backup_block = (ext4_group_first_block_no(sb, group) + 1086 backup_block = (ext4_group_first_block_no(sb, group) +
1087 ext4_bg_has_super(sb, group)); 1087 ext4_bg_has_super(sb, group));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1eda6ab0ef9d..2c9e6864abd9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3526,6 +3526,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3526#ifdef CONFIG_EXT4_FS_POSIX_ACL 3526#ifdef CONFIG_EXT4_FS_POSIX_ACL
3527 set_opt(sb, POSIX_ACL); 3527 set_opt(sb, POSIX_ACL);
3528#endif 3528#endif
3529 /* don't forget to enable journal_csum when metadata_csum is enabled. */
3530 if (ext4_has_metadata_csum(sb))
3531 set_opt(sb, JOURNAL_CHECKSUM);
3532
3529 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3533 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3530 set_opt(sb, JOURNAL_DATA); 3534 set_opt(sb, JOURNAL_DATA);
3531 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3535 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3943,7 +3947,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3943 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && 3947 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
3944 !(sb->s_flags & MS_RDONLY)) 3948 !(sb->s_flags & MS_RDONLY))
3945 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) 3949 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3946 goto failed_mount3; 3950 goto failed_mount3a;
3947 3951
3948 /* 3952 /*
3949 * The first inode we look at is the journal inode. Don't try 3953 * The first inode we look at is the journal inode. Don't try
@@ -3952,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3952 if (!test_opt(sb, NOLOAD) && 3956 if (!test_opt(sb, NOLOAD) &&
3953 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3957 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3954 if (ext4_load_journal(sb, es, journal_devnum)) 3958 if (ext4_load_journal(sb, es, journal_devnum))
3955 goto failed_mount3; 3959 goto failed_mount3a;
3956 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 3960 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3957 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3961 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3958 ext4_msg(sb, KERN_ERR, "required journal recovery " 3962 ext4_msg(sb, KERN_ERR, "required journal recovery "
@@ -4240,6 +4244,7 @@ failed_mount_wq:
4240 jbd2_journal_destroy(sbi->s_journal); 4244 jbd2_journal_destroy(sbi->s_journal);
4241 sbi->s_journal = NULL; 4245 sbi->s_journal = NULL;
4242 } 4246 }
4247failed_mount3a:
4243 ext4_es_unregister_shrinker(sbi); 4248 ext4_es_unregister_shrinker(sbi);
4244failed_mount3: 4249failed_mount3:
4245 del_timer_sync(&sbi->s_err_report); 4250 del_timer_sync(&sbi->s_err_report);
@@ -4841,6 +4846,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4841 goto restore_opts; 4846 goto restore_opts;
4842 } 4847 }
4843 4848
4849 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4850 test_opt(sb, JOURNAL_CHECKSUM)) {
4851 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4852 "during remount not supported");
4853 err = -EINVAL;
4854 goto restore_opts;
4855 }
4856
4844 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 4857 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4845 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 4858 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4846 ext4_msg(sb, KERN_ERR, "can't mount with " 4859 ext4_msg(sb, KERN_ERR, "can't mount with "
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index fe839b915116..d67a16f2a45d 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -174,27 +174,6 @@ struct iso9660_options{
174 * Compute the hash for the isofs name corresponding to the dentry. 174 * Compute the hash for the isofs name corresponding to the dentry.
175 */ 175 */
176static int 176static int
177isofs_hash_common(struct qstr *qstr, int ms)
178{
179 const char *name;
180 int len;
181
182 len = qstr->len;
183 name = qstr->name;
184 if (ms) {
185 while (len && name[len-1] == '.')
186 len--;
187 }
188
189 qstr->hash = full_name_hash(name, len);
190
191 return 0;
192}
193
194/*
195 * Compute the hash for the isofs name corresponding to the dentry.
196 */
197static int
198isofs_hashi_common(struct qstr *qstr, int ms) 177isofs_hashi_common(struct qstr *qstr, int ms)
199{ 178{
200 const char *name; 179 const char *name;
@@ -263,6 +242,27 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
263} 242}
264 243
265#ifdef CONFIG_JOLIET 244#ifdef CONFIG_JOLIET
245/*
246 * Compute the hash for the isofs name corresponding to the dentry.
247 */
248static int
249isofs_hash_common(struct qstr *qstr, int ms)
250{
251 const char *name;
252 int len;
253
254 len = qstr->len;
255 name = qstr->name;
256 if (ms) {
257 while (len && name[len-1] == '.')
258 len--;
259 }
260
261 qstr->hash = full_name_hash(name, len);
262
263 return 0;
264}
265
266static int 266static int
267isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) 267isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
268{ 268{
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 8898bbd2b61e..dcead636c33b 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -93,6 +93,7 @@
93#include <linux/bio.h> 93#include <linux/bio.h>
94#endif 94#endif
95#include <linux/log2.h> 95#include <linux/log2.h>
96#include <linux/hash.h>
96 97
97static struct kmem_cache *revoke_record_cache; 98static struct kmem_cache *revoke_record_cache;
98static struct kmem_cache *revoke_table_cache; 99static struct kmem_cache *revoke_table_cache;
@@ -129,15 +130,11 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int);
129 130
130/* Utility functions to maintain the revoke table */ 131/* Utility functions to maintain the revoke table */
131 132
132/* Borrowed from buffer.c: this is a tried and tested block hash function */
133static inline int hash(journal_t *journal, unsigned int block) 133static inline int hash(journal_t *journal, unsigned int block)
134{ 134{
135 struct jbd_revoke_table_s *table = journal->j_revoke; 135 struct jbd_revoke_table_s *table = journal->j_revoke;
136 int hash_shift = table->hash_shift;
137 136
138 return ((block << (hash_shift - 6)) ^ 137 return hash_32(block, table->hash_shift);
139 (block >> 13) ^
140 (block << (hash_shift - 12))) & (table->hash_size - 1);
141} 138}
142 139
143static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, 140static int insert_revoke_hash(journal_t *journal, unsigned int blocknr,
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index d5e95a175c92..c6cbaef2bda1 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -92,6 +92,7 @@
92#include <linux/init.h> 92#include <linux/init.h>
93#include <linux/bio.h> 93#include <linux/bio.h>
94#include <linux/log2.h> 94#include <linux/log2.h>
95#include <linux/hash.h>
95#endif 96#endif
96 97
97static struct kmem_cache *jbd2_revoke_record_cache; 98static struct kmem_cache *jbd2_revoke_record_cache;
@@ -130,16 +131,9 @@ static void flush_descriptor(journal_t *, struct buffer_head *, int, int);
130 131
131/* Utility functions to maintain the revoke table */ 132/* Utility functions to maintain the revoke table */
132 133
133/* Borrowed from buffer.c: this is a tried and tested block hash function */
134static inline int hash(journal_t *journal, unsigned long long block) 134static inline int hash(journal_t *journal, unsigned long long block)
135{ 135{
136 struct jbd2_revoke_table_s *table = journal->j_revoke; 136 return hash_64(block, journal->j_revoke->hash_shift);
137 int hash_shift = table->hash_shift;
138 int hash = (int)block ^ (int)((block >> 31) >> 1);
139
140 return ((hash << (hash_shift - 6)) ^
141 (hash >> 13) ^
142 (hash << (hash_shift - 12))) & (table->hash_size - 1);
143} 137}
144 138
145static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, 139static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
diff --git a/fs/namei.c b/fs/namei.c
index 922f27068c4c..db5fe86319e6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3154,7 +3154,8 @@ static int do_tmpfile(int dfd, struct filename *pathname,
3154 if (error) 3154 if (error)
3155 goto out2; 3155 goto out2;
3156 audit_inode(pathname, nd->path.dentry, 0); 3156 audit_inode(pathname, nd->path.dentry, 0);
3157 error = may_open(&nd->path, op->acc_mode, op->open_flag); 3157 /* Don't check for other permissions, the inode was just created */
3158 error = may_open(&nd->path, MAY_OPEN, op->open_flag);
3158 if (error) 3159 if (error)
3159 goto out2; 3160 goto out2;
3160 file->f_path.mnt = nd->path.mnt; 3161 file->f_path.mnt = nd->path.mnt;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 5228f201d3d5..4f46f7a05289 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
378 loff_t offset = header->args.offset; 378 loff_t offset = header->args.offset;
379 size_t count = header->args.count; 379 size_t count = header->args.count;
380 struct page **pages = header->args.pages; 380 struct page **pages = header->args.pages;
381 int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; 381 int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
382 unsigned int pg_len; 382 unsigned int pg_len;
383 struct blk_plug plug; 383 struct blk_plug plug;
384 int i; 384 int i;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index 2a15fa437880..dbe5839cdeba 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
65 65
66 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); 66 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
67 67
68 mutex_lock(&nn->bl_mutex);
68 bl_pipe_msg.bl_wq = &nn->bl_wq; 69 bl_pipe_msg.bl_wq = &nn->bl_wq;
69 70
70 b->simple.len += 4; /* single volume */ 71 b->simple.len += 4; /* single volume */
71 if (b->simple.len > PAGE_SIZE) 72 if (b->simple.len > PAGE_SIZE)
72 return -EIO; 73 goto out_unlock;
73 74
74 memset(msg, 0, sizeof(*msg)); 75 memset(msg, 0, sizeof(*msg));
75 msg->len = sizeof(*bl_msg) + b->simple.len; 76 msg->len = sizeof(*bl_msg) + b->simple.len;
76 msg->data = kzalloc(msg->len, gfp_mask); 77 msg->data = kzalloc(msg->len, gfp_mask);
77 if (!msg->data) 78 if (!msg->data)
78 goto out; 79 goto out_free_data;
79 80
80 bl_msg = msg->data; 81 bl_msg = msg->data;
81 bl_msg->type = BL_DEVICE_MOUNT, 82 bl_msg->type = BL_DEVICE_MOUNT,
@@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
87 rc = rpc_queue_upcall(nn->bl_device_pipe, msg); 88 rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
88 if (rc < 0) { 89 if (rc < 0) {
89 remove_wait_queue(&nn->bl_wq, &wq); 90 remove_wait_queue(&nn->bl_wq, &wq);
90 goto out; 91 goto out_free_data;
91 } 92 }
92 93
93 set_current_state(TASK_UNINTERRUPTIBLE); 94 set_current_state(TASK_UNINTERRUPTIBLE);
@@ -97,12 +98,14 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
97 if (reply->status != BL_DEVICE_REQUEST_PROC) { 98 if (reply->status != BL_DEVICE_REQUEST_PROC) {
98 printk(KERN_WARNING "%s failed to decode device: %d\n", 99 printk(KERN_WARNING "%s failed to decode device: %d\n",
99 __func__, reply->status); 100 __func__, reply->status);
100 goto out; 101 goto out_free_data;
101 } 102 }
102 103
103 dev = MKDEV(reply->major, reply->minor); 104 dev = MKDEV(reply->major, reply->minor);
104out: 105out_free_data:
105 kfree(msg->data); 106 kfree(msg->data);
107out_unlock:
108 mutex_unlock(&nn->bl_mutex);
106 return dev; 109 return dev;
107} 110}
108 111
@@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net)
232 struct nfs_net *nn = net_generic(net, nfs_net_id); 235 struct nfs_net *nn = net_generic(net, nfs_net_id);
233 struct dentry *dentry; 236 struct dentry *dentry;
234 237
238 mutex_init(&nn->bl_mutex);
235 init_waitqueue_head(&nn->bl_wq); 239 init_waitqueue_head(&nn->bl_wq);
236 nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); 240 nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
237 if (IS_ERR(nn->bl_device_pipe)) 241 if (IS_ERR(nn->bl_device_pipe))
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5853f53db732..7f3f60641344 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -125,6 +125,8 @@ again:
125 continue; 125 continue;
126 if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) 126 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
127 continue; 127 continue;
128 if (!nfs4_valid_open_stateid(state))
129 continue;
128 if (!nfs4_stateid_match(&state->stateid, stateid)) 130 if (!nfs4_stateid_match(&state->stateid, stateid))
129 continue; 131 continue;
130 get_nfs_open_context(ctx); 132 get_nfs_open_context(ctx);
@@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
193{ 195{
194 int res = 0; 196 int res = 0;
195 197
196 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); 198 if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
199 res = nfs4_proc_delegreturn(inode,
200 delegation->cred,
201 &delegation->stateid,
202 issync);
197 nfs_free_delegation(delegation); 203 nfs_free_delegation(delegation);
198 return res; 204 return res;
199} 205}
@@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
380{ 386{
381 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 387 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
382 struct nfs_inode *nfsi = NFS_I(inode); 388 struct nfs_inode *nfsi = NFS_I(inode);
383 int err; 389 int err = 0;
384 390
385 if (delegation == NULL) 391 if (delegation == NULL)
386 return 0; 392 return 0;
387 do { 393 do {
394 if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
395 break;
388 err = nfs_delegation_claim_opens(inode, &delegation->stateid); 396 err = nfs_delegation_claim_opens(inode, &delegation->stateid);
389 if (!issync || err != -EAGAIN) 397 if (!issync || err != -EAGAIN)
390 break; 398 break;
@@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
605 rcu_read_unlock(); 613 rcu_read_unlock();
606} 614}
607 615
616static void nfs_revoke_delegation(struct inode *inode)
617{
618 struct nfs_delegation *delegation;
619 rcu_read_lock();
620 delegation = rcu_dereference(NFS_I(inode)->delegation);
621 if (delegation != NULL) {
622 set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
623 nfs_mark_return_delegation(NFS_SERVER(inode), delegation);
624 }
625 rcu_read_unlock();
626}
627
608void nfs_remove_bad_delegation(struct inode *inode) 628void nfs_remove_bad_delegation(struct inode *inode)
609{ 629{
610 struct nfs_delegation *delegation; 630 struct nfs_delegation *delegation;
611 631
632 nfs_revoke_delegation(inode);
612 delegation = nfs_inode_detach_delegation(inode); 633 delegation = nfs_inode_detach_delegation(inode);
613 if (delegation) { 634 if (delegation) {
614 nfs_inode_find_state_and_recover(inode, &delegation->stateid); 635 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 5c1cce39297f..e3c20a3ccc93 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -31,6 +31,7 @@ enum {
31 NFS_DELEGATION_RETURN_IF_CLOSED, 31 NFS_DELEGATION_RETURN_IF_CLOSED,
32 NFS_DELEGATION_REFERENCED, 32 NFS_DELEGATION_REFERENCED,
33 NFS_DELEGATION_RETURNING, 33 NFS_DELEGATION_RETURNING,
34 NFS_DELEGATION_REVOKED,
34}; 35};
35 36
36int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 37int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 105ccc30572d..9b0c55cb2a2e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1527 case -ENOENT: 1527 case -ENOENT:
1528 d_drop(dentry); 1528 d_drop(dentry);
1529 d_add(dentry, NULL); 1529 d_add(dentry, NULL);
1530 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1530 break; 1531 break;
1531 case -EISDIR: 1532 case -EISDIR:
1532 case -ENOTDIR: 1533 case -ENOTDIR:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 20cffc830468..10bf07280f4a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref)
266{ 266{
267 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); 267 struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
268 268
269 nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
269 if (dreq->l_ctx != NULL) 270 if (dreq->l_ctx != NULL)
270 nfs_put_lock_context(dreq->l_ctx); 271 nfs_put_lock_context(dreq->l_ctx);
271 if (dreq->ctx != NULL) 272 if (dreq->ctx != NULL)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 46fab1cb455a..7afb52f6a25a 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task,
145 case -NFS4ERR_DELEG_REVOKED: 145 case -NFS4ERR_DELEG_REVOKED:
146 case -NFS4ERR_ADMIN_REVOKED: 146 case -NFS4ERR_ADMIN_REVOKED:
147 case -NFS4ERR_BAD_STATEID: 147 case -NFS4ERR_BAD_STATEID:
148 if (state == NULL)
149 break;
150 nfs_remove_bad_delegation(state->inode);
151 case -NFS4ERR_OPENMODE: 148 case -NFS4ERR_OPENMODE:
152 if (state == NULL) 149 if (state == NULL)
153 break; 150 break;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6388a59f2add..00689a8a85e4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -626,7 +626,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
626{ 626{
627 struct inode *inode = dentry->d_inode; 627 struct inode *inode = dentry->d_inode;
628 int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; 628 int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
629 int err; 629 int err = 0;
630 630
631 trace_nfs_getattr_enter(inode); 631 trace_nfs_getattr_enter(inode);
632 /* Flush out writes to the server in order to update c/mtime. */ 632 /* Flush out writes to the server in order to update c/mtime. */
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index ef221fb8a183..f0e06e4acbef 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -19,6 +19,7 @@ struct nfs_net {
19 struct rpc_pipe *bl_device_pipe; 19 struct rpc_pipe *bl_device_pipe;
20 struct bl_dev_msg bl_mount_reply; 20 struct bl_dev_msg bl_mount_reply;
21 wait_queue_head_t bl_wq; 21 wait_queue_head_t bl_wq;
22 struct mutex bl_mutex;
22 struct list_head nfs_client_list; 23 struct list_head nfs_client_list;
23 struct list_head nfs_volume_list; 24 struct list_head nfs_volume_list;
24#if IS_ENABLED(CONFIG_NFS_V4) 25#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 405bd95c1f58..69dc20a743f9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -370,11 +370,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
370 case -NFS4ERR_DELEG_REVOKED: 370 case -NFS4ERR_DELEG_REVOKED:
371 case -NFS4ERR_ADMIN_REVOKED: 371 case -NFS4ERR_ADMIN_REVOKED:
372 case -NFS4ERR_BAD_STATEID: 372 case -NFS4ERR_BAD_STATEID:
373 if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) {
374 nfs_remove_bad_delegation(inode);
375 exception->retry = 1;
376 break;
377 }
378 if (state == NULL) 373 if (state == NULL)
379 break; 374 break;
380 ret = nfs4_schedule_stateid_recovery(server, state); 375 ret = nfs4_schedule_stateid_recovery(server, state);
@@ -1654,7 +1649,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
1654 nfs_inode_find_state_and_recover(state->inode, 1649 nfs_inode_find_state_and_recover(state->inode,
1655 stateid); 1650 stateid);
1656 nfs4_schedule_stateid_recovery(server, state); 1651 nfs4_schedule_stateid_recovery(server, state);
1657 return 0; 1652 return -EAGAIN;
1658 case -NFS4ERR_DELAY: 1653 case -NFS4ERR_DELAY:
1659 case -NFS4ERR_GRACE: 1654 case -NFS4ERR_GRACE:
1660 set_bit(NFS_DELEGATED_STATE, &state->flags); 1655 set_bit(NFS_DELEGATED_STATE, &state->flags);
@@ -2109,46 +2104,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
2109 return ret; 2104 return ret;
2110} 2105}
2111 2106
2107static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state)
2108{
2109 nfs_remove_bad_delegation(state->inode);
2110 write_seqlock(&state->seqlock);
2111 nfs4_stateid_copy(&state->stateid, &state->open_stateid);
2112 write_sequnlock(&state->seqlock);
2113 clear_bit(NFS_DELEGATED_STATE, &state->flags);
2114}
2115
2116static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
2117{
2118 if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL)
2119 nfs_finish_clear_delegation_stateid(state);
2120}
2121
2122static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
2123{
2124 /* NFSv4.0 doesn't allow for delegation recovery on open expire */
2125 nfs40_clear_delegation_stateid(state);
2126 return nfs4_open_expired(sp, state);
2127}
2128
2112#if defined(CONFIG_NFS_V4_1) 2129#if defined(CONFIG_NFS_V4_1)
2113static void nfs41_clear_delegation_stateid(struct nfs4_state *state) 2130static void nfs41_check_delegation_stateid(struct nfs4_state *state)
2114{ 2131{
2115 struct nfs_server *server = NFS_SERVER(state->inode); 2132 struct nfs_server *server = NFS_SERVER(state->inode);
2116 nfs4_stateid *stateid = &state->stateid; 2133 nfs4_stateid stateid;
2117 struct nfs_delegation *delegation; 2134 struct nfs_delegation *delegation;
2118 struct rpc_cred *cred = NULL; 2135 struct rpc_cred *cred;
2119 int status = -NFS4ERR_BAD_STATEID; 2136 int status;
2120
2121 /* If a state reset has been done, test_stateid is unneeded */
2122 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
2123 return;
2124 2137
2125 /* Get the delegation credential for use by test/free_stateid */ 2138 /* Get the delegation credential for use by test/free_stateid */
2126 rcu_read_lock(); 2139 rcu_read_lock();
2127 delegation = rcu_dereference(NFS_I(state->inode)->delegation); 2140 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
2128 if (delegation != NULL && 2141 if (delegation == NULL) {
2129 nfs4_stateid_match(&delegation->stateid, stateid)) {
2130 cred = get_rpccred(delegation->cred);
2131 rcu_read_unlock();
2132 status = nfs41_test_stateid(server, stateid, cred);
2133 trace_nfs4_test_delegation_stateid(state, NULL, status);
2134 } else
2135 rcu_read_unlock(); 2142 rcu_read_unlock();
2143 return;
2144 }
2145
2146 nfs4_stateid_copy(&stateid, &delegation->stateid);
2147 cred = get_rpccred(delegation->cred);
2148 rcu_read_unlock();
2149 status = nfs41_test_stateid(server, &stateid, cred);
2150 trace_nfs4_test_delegation_stateid(state, NULL, status);
2136 2151
2137 if (status != NFS_OK) { 2152 if (status != NFS_OK) {
2138 /* Free the stateid unless the server explicitly 2153 /* Free the stateid unless the server explicitly
2139 * informs us the stateid is unrecognized. */ 2154 * informs us the stateid is unrecognized. */
2140 if (status != -NFS4ERR_BAD_STATEID) 2155 if (status != -NFS4ERR_BAD_STATEID)
2141 nfs41_free_stateid(server, stateid, cred); 2156 nfs41_free_stateid(server, &stateid, cred);
2142 nfs_remove_bad_delegation(state->inode); 2157 nfs_finish_clear_delegation_stateid(state);
2143
2144 write_seqlock(&state->seqlock);
2145 nfs4_stateid_copy(&state->stateid, &state->open_stateid);
2146 write_sequnlock(&state->seqlock);
2147 clear_bit(NFS_DELEGATED_STATE, &state->flags);
2148 } 2158 }
2149 2159
2150 if (cred != NULL) 2160 put_rpccred(cred);
2151 put_rpccred(cred);
2152} 2161}
2153 2162
2154/** 2163/**
@@ -2192,7 +2201,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
2192{ 2201{
2193 int status; 2202 int status;
2194 2203
2195 nfs41_clear_delegation_stateid(state); 2204 nfs41_check_delegation_stateid(state);
2196 status = nfs41_check_open_stateid(state); 2205 status = nfs41_check_open_stateid(state);
2197 if (status != NFS_OK) 2206 if (status != NFS_OK)
2198 status = nfs4_open_expired(sp, state); 2207 status = nfs4_open_expired(sp, state);
@@ -2231,19 +2240,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2231 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 2240 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
2232 2241
2233 ret = _nfs4_proc_open(opendata); 2242 ret = _nfs4_proc_open(opendata);
2234 if (ret != 0) { 2243 if (ret != 0)
2235 if (ret == -ENOENT) {
2236 dentry = opendata->dentry;
2237 if (dentry->d_inode)
2238 d_delete(dentry);
2239 else if (d_unhashed(dentry))
2240 d_add(dentry, NULL);
2241
2242 nfs_set_verifier(dentry,
2243 nfs_save_change_attribute(opendata->dir->d_inode));
2244 }
2245 goto out; 2244 goto out;
2246 }
2247 2245
2248 state = nfs4_opendata_to_nfs4_state(opendata); 2246 state = nfs4_opendata_to_nfs4_state(opendata);
2249 ret = PTR_ERR(state); 2247 ret = PTR_ERR(state);
@@ -4841,9 +4839,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
4841 case -NFS4ERR_DELEG_REVOKED: 4839 case -NFS4ERR_DELEG_REVOKED:
4842 case -NFS4ERR_ADMIN_REVOKED: 4840 case -NFS4ERR_ADMIN_REVOKED:
4843 case -NFS4ERR_BAD_STATEID: 4841 case -NFS4ERR_BAD_STATEID:
4844 if (state == NULL)
4845 break;
4846 nfs_remove_bad_delegation(state->inode);
4847 case -NFS4ERR_OPENMODE: 4842 case -NFS4ERR_OPENMODE:
4848 if (state == NULL) 4843 if (state == NULL)
4849 break; 4844 break;
@@ -8341,7 +8336,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
8341static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { 8336static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
8342 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 8337 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
8343 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 8338 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
8344 .recover_open = nfs4_open_expired, 8339 .recover_open = nfs40_open_expired,
8345 .recover_lock = nfs4_lock_expired, 8340 .recover_lock = nfs4_lock_expired,
8346 .establish_clid = nfs4_init_clientid, 8341 .establish_clid = nfs4_init_clientid,
8347}; 8342};
@@ -8408,8 +8403,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
8408 | NFS_CAP_CHANGE_ATTR 8403 | NFS_CAP_CHANGE_ATTR
8409 | NFS_CAP_POSIX_LOCK 8404 | NFS_CAP_POSIX_LOCK
8410 | NFS_CAP_STATEID_NFSV41 8405 | NFS_CAP_STATEID_NFSV41
8411 | NFS_CAP_ATOMIC_OPEN_V1 8406 | NFS_CAP_ATOMIC_OPEN_V1,
8412 | NFS_CAP_SEEK,
8413 .init_client = nfs41_init_client, 8407 .init_client = nfs41_init_client,
8414 .shutdown_client = nfs41_shutdown_client, 8408 .shutdown_client = nfs41_shutdown_client,
8415 .match_stateid = nfs41_match_stateid, 8409 .match_stateid = nfs41_match_stateid,
@@ -8431,7 +8425,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
8431 | NFS_CAP_CHANGE_ATTR 8425 | NFS_CAP_CHANGE_ATTR
8432 | NFS_CAP_POSIX_LOCK 8426 | NFS_CAP_POSIX_LOCK
8433 | NFS_CAP_STATEID_NFSV41 8427 | NFS_CAP_STATEID_NFSV41
8434 | NFS_CAP_ATOMIC_OPEN_V1, 8428 | NFS_CAP_ATOMIC_OPEN_V1
8429 | NFS_CAP_SEEK,
8435 .init_client = nfs41_init_client, 8430 .init_client = nfs41_init_client,
8436 .shutdown_client = nfs41_shutdown_client, 8431 .shutdown_client = nfs41_shutdown_client,
8437 .match_stateid = nfs41_match_stateid, 8432 .match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 12493846a2d3..f83b02dc9166 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -715,8 +715,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
715 715
716 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) 716 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
717 nfs_release_request(req); 717 nfs_release_request(req);
718 else
719 WARN_ON_ONCE(1);
720} 718}
721 719
722static void 720static void
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index cdeb3cfd6f32..0beb023f25ac 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1272,7 +1272,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
1272 */ 1272 */
1273 if (argp->opcnt == resp->opcnt) 1273 if (argp->opcnt == resp->opcnt)
1274 return false; 1274 return false;
1275 1275 if (next->opnum == OP_ILLEGAL)
1276 return false;
1276 nextd = OPDESC(next); 1277 nextd = OPDESC(next);
1277 /* 1278 /*
1278 * Rest of 2.6.3.1.1: certain operations will return WRONGSEC 1279 * Rest of 2.6.3.1.1: certain operations will return WRONGSEC
@@ -1589,7 +1590,8 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
1589static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, 1590static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
1590 struct nfsd4_op *op) 1591 struct nfsd4_op *op)
1591{ 1592{
1592 return NFS4_MAX_SESSIONID_LEN + 20; 1593 return (op_encode_hdr_size
1594 + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
1593} 1595}
1594 1596
1595static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1597static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@@ -1893,6 +1895,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
1893 .op_func = (nfsd4op_func)nfsd4_sequence, 1895 .op_func = (nfsd4op_func)nfsd4_sequence,
1894 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1896 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
1895 .op_name = "OP_SEQUENCE", 1897 .op_name = "OP_SEQUENCE",
1898 .op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize,
1896 }, 1899 },
1897 [OP_DESTROY_CLIENTID] = { 1900 [OP_DESTROY_CLIENTID] = {
1898 .op_func = (nfsd4op_func)nfsd4_destroy_clientid, 1901 .op_func = (nfsd4op_func)nfsd4_destroy_clientid,
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 700129940c6e..41e39102743a 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
229 &fsnotify_mark_srcu); 229 &fsnotify_mark_srcu);
230 } 230 }
231 231
232 /*
233 * We need to merge inode & vfsmount mark lists so that inode mark
234 * ignore masks are properly reflected for mount mark notifications.
235 * That's why this traversal is so complicated...
236 */
232 while (inode_node || vfsmount_node) { 237 while (inode_node || vfsmount_node) {
233 inode_group = vfsmount_group = NULL; 238 inode_group = NULL;
239 inode_mark = NULL;
240 vfsmount_group = NULL;
241 vfsmount_mark = NULL;
234 242
235 if (inode_node) { 243 if (inode_node) {
236 inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), 244 inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
244 vfsmount_group = vfsmount_mark->group; 252 vfsmount_group = vfsmount_mark->group;
245 } 253 }
246 254
247 if (inode_group > vfsmount_group) { 255 if (inode_group && vfsmount_group) {
248 /* handle inode */ 256 int cmp = fsnotify_compare_groups(inode_group,
249 ret = send_to_group(to_tell, inode_mark, NULL, mask, 257 vfsmount_group);
250 data, data_is, cookie, file_name); 258 if (cmp > 0) {
251 /* we didn't use the vfsmount_mark */ 259 inode_group = NULL;
252 vfsmount_group = NULL; 260 inode_mark = NULL;
253 } else if (vfsmount_group > inode_group) { 261 } else if (cmp < 0) {
254 ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, 262 vfsmount_group = NULL;
255 data, data_is, cookie, file_name); 263 vfsmount_mark = NULL;
256 inode_group = NULL; 264 }
257 } else {
258 ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
259 mask, data, data_is, cookie,
260 file_name);
261 } 265 }
266 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
267 data, data_is, cookie, file_name);
262 268
263 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) 269 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
264 goto out; 270 goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 9c0898c4cfe1..3b68b0ae0a97 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
12/* protects reads of inode and vfsmount marks list */ 12/* protects reads of inode and vfsmount marks list */
13extern struct srcu_struct fsnotify_mark_srcu; 13extern struct srcu_struct fsnotify_mark_srcu;
14 14
15/* compare two groups for sorting of marks lists */
16extern int fsnotify_compare_groups(struct fsnotify_group *a,
17 struct fsnotify_group *b);
18
15extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, 19extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
16 __u32 mask); 20 __u32 mask);
17/* add a mark to an inode */ 21/* add a mark to an inode */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 9ce062218de9..dfbf5447eea4 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
194{ 194{
195 struct fsnotify_mark *lmark, *last = NULL; 195 struct fsnotify_mark *lmark, *last = NULL;
196 int ret = 0; 196 int ret = 0;
197 int cmp;
197 198
198 mark->flags |= FSNOTIFY_MARK_FLAG_INODE; 199 mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
199 200
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
219 goto out; 220 goto out;
220 } 221 }
221 222
222 if (mark->group->priority < lmark->group->priority) 223 cmp = fsnotify_compare_groups(lmark->group, mark->group);
223 continue; 224 if (cmp < 0)
224
225 if ((mark->group->priority == lmark->group->priority) &&
226 (mark->group < lmark->group))
227 continue; 225 continue;
228 226
229 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); 227 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
@@ -288,20 +286,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
288 spin_unlock(&inode->i_lock); 286 spin_unlock(&inode->i_lock);
289 287
290 /* In case the dropping of a reference would nuke next_i. */ 288 /* In case the dropping of a reference would nuke next_i. */
291 if ((&next_i->i_sb_list != list) && 289 while (&next_i->i_sb_list != list) {
292 atomic_read(&next_i->i_count)) {
293 spin_lock(&next_i->i_lock); 290 spin_lock(&next_i->i_lock);
294 if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { 291 if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
292 atomic_read(&next_i->i_count)) {
295 __iget(next_i); 293 __iget(next_i);
296 need_iput = next_i; 294 need_iput = next_i;
295 spin_unlock(&next_i->i_lock);
296 break;
297 } 297 }
298 spin_unlock(&next_i->i_lock); 298 spin_unlock(&next_i->i_lock);
299 next_i = list_entry(next_i->i_sb_list.next,
300 struct inode, i_sb_list);
299 } 301 }
300 302
301 /* 303 /*
302 * We can safely drop inode_sb_list_lock here because we hold 304 * We can safely drop inode_sb_list_lock here because either
303 * references on both inode and next_i. Also no new inodes 305 * we actually hold references on both inode and next_i or
304 * will be added since the umount has begun. 306 * end of list. Also no new inodes will be added since the
307 * umount has begun.
305 */ 308 */
306 spin_unlock(&inode_sb_list_lock); 309 spin_unlock(&inode_sb_list_lock);
307 310
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d90deaa08e78..34c38fabf514 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
210} 210}
211 211
212/* 212/*
213 * Sorting function for lists of fsnotify marks.
214 *
215 * Fanotify supports different notification classes (reflected as priority of
216 * notification group). Events shall be passed to notification groups in
217 * decreasing priority order. To achieve this marks in notification lists for
218 * inodes and vfsmounts are sorted so that priorities of corresponding groups
219 * are descending.
220 *
221 * Furthermore correct handling of the ignore mask requires processing inode
222 * and vfsmount marks of each group together. Using the group address as
223 * further sort criterion provides a unique sorting order and thus we can
224 * merge inode and vfsmount lists of marks in linear time and find groups
225 * present in both lists.
226 *
227 * A return value of 1 signifies that b has priority over a.
228 * A return value of 0 signifies that the two marks have to be handled together.
229 * A return value of -1 signifies that a has priority over b.
230 */
231int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
232{
233 if (a == b)
234 return 0;
235 if (!a)
236 return 1;
237 if (!b)
238 return -1;
239 if (a->priority < b->priority)
240 return 1;
241 if (a->priority > b->priority)
242 return -1;
243 if (a < b)
244 return 1;
245 return -1;
246}
247
248/*
213 * Attach an initialized mark to a given group and fs object. 249 * Attach an initialized mark to a given group and fs object.
214 * These marks may be used for the fsnotify backend to determine which 250 * These marks may be used for the fsnotify backend to determine which
215 * event types should be delivered to which group. 251 * event types should be delivered to which group.
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index ac851e8376b1..faefa72a11eb 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
153 struct mount *m = real_mount(mnt); 153 struct mount *m = real_mount(mnt);
154 struct fsnotify_mark *lmark, *last = NULL; 154 struct fsnotify_mark *lmark, *last = NULL;
155 int ret = 0; 155 int ret = 0;
156 int cmp;
156 157
157 mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; 158 mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
158 159
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
178 goto out; 179 goto out;
179 } 180 }
180 181
181 if (mark->group->priority < lmark->group->priority) 182 cmp = fsnotify_compare_groups(lmark->group, mark->group);
182 continue; 183 if (cmp < 0)
183
184 if ((mark->group->priority == lmark->group->priority) &&
185 (mark->group < lmark->group))
186 continue; 184 continue;
187 185
188 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); 186 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 97de0fbd9f78..a96044004064 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
925 size_t veclen, size_t total) 925 size_t veclen, size_t total)
926{ 926{
927 int ret; 927 int ret;
928 struct msghdr msg; 928 struct msghdr msg = {.msg_flags = 0,};
929 929
930 if (sock == NULL) { 930 if (sock == NULL) {
931 ret = -EINVAL; 931 ret = -EINVAL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8add6f1030d7..b931e04e3388 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -158,7 +158,7 @@ bail_add:
158 * NOTE: This dentry already has ->d_op set from 158 * NOTE: This dentry already has ->d_op set from
159 * ocfs2_get_parent() and ocfs2_get_dentry() 159 * ocfs2_get_parent() and ocfs2_get_dentry()
160 */ 160 */
161 if (ret) 161 if (!IS_ERR_OR_NULL(ret))
162 dentry = ret; 162 dentry = ret;
163 163
164 status = ocfs2_dentry_attach_lock(dentry, inode, 164 status = ocfs2_dentry_attach_lock(dentry, inode,
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index e60125976873..34355818a2e0 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,4 +1,4 @@
1config OVERLAYFS_FS 1config OVERLAY_FS
2 tristate "Overlay filesystem support" 2 tristate "Overlay filesystem support"
3 help 3 help
4 An overlay filesystem combines two filesystems - an 'upper' filesystem 4 An overlay filesystem combines two filesystems - an 'upper' filesystem
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 8f91889480d0..900daed3e91d 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -2,6 +2,6 @@
2# Makefile for the overlay filesystem. 2# Makefile for the overlay filesystem.
3# 3#
4 4
5obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o 5obj-$(CONFIG_OVERLAY_FS) += overlay.o
6 6
7overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o 7overlay-objs := super.o inode.o dir.o readdir.o copy_up.o
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 15cd91ad9940..8ffc4b980f1b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -284,8 +284,7 @@ out:
284 return ERR_PTR(err); 284 return ERR_PTR(err);
285} 285}
286 286
287static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, 287static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
288 enum ovl_path_type type)
289{ 288{
290 int err; 289 int err;
291 struct dentry *ret = NULL; 290 struct dentry *ret = NULL;
@@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry,
294 err = ovl_check_empty_dir(dentry, &list); 293 err = ovl_check_empty_dir(dentry, &list);
295 if (err) 294 if (err)
296 ret = ERR_PTR(err); 295 ret = ERR_PTR(err);
297 else if (type == OVL_PATH_MERGE) 296 else {
298 ret = ovl_clear_empty(dentry, &list); 297 /*
298 * If no upperdentry then skip clearing whiteouts.
299 *
300 * Can race with copy-up, since we don't hold the upperdir
301 * mutex. Doesn't matter, since copy-up can't create a
302 * non-empty directory from an empty one.
303 */
304 if (ovl_dentry_upper(dentry))
305 ret = ovl_clear_empty(dentry, &list);
306 }
299 307
300 ovl_cache_free(&list); 308 ovl_cache_free(&list);
301 309
@@ -487,8 +495,7 @@ out:
487 return err; 495 return err;
488} 496}
489 497
490static int ovl_remove_and_whiteout(struct dentry *dentry, 498static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
491 enum ovl_path_type type, bool is_dir)
492{ 499{
493 struct dentry *workdir = ovl_workdir(dentry); 500 struct dentry *workdir = ovl_workdir(dentry);
494 struct inode *wdir = workdir->d_inode; 501 struct inode *wdir = workdir->d_inode;
@@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
500 int err; 507 int err;
501 508
502 if (is_dir) { 509 if (is_dir) {
503 opaquedir = ovl_check_empty_and_clear(dentry, type); 510 opaquedir = ovl_check_empty_and_clear(dentry);
504 err = PTR_ERR(opaquedir); 511 err = PTR_ERR(opaquedir);
505 if (IS_ERR(opaquedir)) 512 if (IS_ERR(opaquedir))
506 goto out; 513 goto out;
@@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
515 if (IS_ERR(whiteout)) 522 if (IS_ERR(whiteout))
516 goto out_unlock; 523 goto out_unlock;
517 524
518 if (type == OVL_PATH_LOWER) { 525 upper = ovl_dentry_upper(dentry);
526 if (!upper) {
519 upper = lookup_one_len(dentry->d_name.name, upperdir, 527 upper = lookup_one_len(dentry->d_name.name, upperdir,
520 dentry->d_name.len); 528 dentry->d_name.len);
521 err = PTR_ERR(upper); 529 err = PTR_ERR(upper);
522 if (IS_ERR(upper)) 530 if (IS_ERR(upper))
523 goto kill_whiteout; 531 goto kill_whiteout;
@@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
529 } else { 537 } else {
530 int flags = 0; 538 int flags = 0;
531 539
532 upper = ovl_dentry_upper(dentry);
533 if (opaquedir) 540 if (opaquedir)
534 upper = opaquedir; 541 upper = opaquedir;
535 err = -ESTALE; 542 err = -ESTALE;
@@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
648 cap_raise(override_cred->cap_effective, CAP_CHOWN); 655 cap_raise(override_cred->cap_effective, CAP_CHOWN);
649 old_cred = override_creds(override_cred); 656 old_cred = override_creds(override_cred);
650 657
651 err = ovl_remove_and_whiteout(dentry, type, is_dir); 658 err = ovl_remove_and_whiteout(dentry, is_dir);
652 659
653 revert_creds(old_cred); 660 revert_creds(old_cred);
654 put_cred(override_cred); 661 put_cred(override_cred);
@@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
781 } 788 }
782 789
783 if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { 790 if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) {
784 opaquedir = ovl_check_empty_and_clear(new, new_type); 791 opaquedir = ovl_check_empty_and_clear(new);
785 err = PTR_ERR(opaquedir); 792 err = PTR_ERR(opaquedir);
786 if (IS_ERR(opaquedir)) { 793 if (IS_ERR(opaquedir)) {
787 opaquedir = NULL; 794 opaquedir = NULL;
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index af2d18c9fcee..07d74b24913b 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -235,26 +235,36 @@ out:
235 return err; 235 return err;
236} 236}
237 237
238static bool ovl_need_xattr_filter(struct dentry *dentry,
239 enum ovl_path_type type)
240{
241 return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode);
242}
243
238ssize_t ovl_getxattr(struct dentry *dentry, const char *name, 244ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
239 void *value, size_t size) 245 void *value, size_t size)
240{ 246{
241 if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && 247 struct path realpath;
242 ovl_is_private_xattr(name)) 248 enum ovl_path_type type = ovl_path_real(dentry, &realpath);
249
250 if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
243 return -ENODATA; 251 return -ENODATA;
244 252
245 return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); 253 return vfs_getxattr(realpath.dentry, name, value, size);
246} 254}
247 255
248ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 256ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
249{ 257{
258 struct path realpath;
259 enum ovl_path_type type = ovl_path_real(dentry, &realpath);
250 ssize_t res; 260 ssize_t res;
251 int off; 261 int off;
252 262
253 res = vfs_listxattr(ovl_dentry_real(dentry), list, size); 263 res = vfs_listxattr(realpath.dentry, list, size);
254 if (res <= 0 || size == 0) 264 if (res <= 0 || size == 0)
255 return res; 265 return res;
256 266
257 if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) 267 if (!ovl_need_xattr_filter(dentry, type))
258 return res; 268 return res;
259 269
260 /* filter out private xattrs */ 270 /* filter out private xattrs */
@@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name)
279{ 289{
280 int err; 290 int err;
281 struct path realpath; 291 struct path realpath;
282 enum ovl_path_type type; 292 enum ovl_path_type type = ovl_path_real(dentry, &realpath);
283 293
284 err = ovl_want_write(dentry); 294 err = ovl_want_write(dentry);
285 if (err) 295 if (err)
286 goto out; 296 goto out;
287 297
288 if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && 298 err = -ENODATA;
289 ovl_is_private_xattr(name)) 299 if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
290 goto out_drop_write; 300 goto out_drop_write;
291 301
292 type = ovl_path_real(dentry, &realpath);
293 if (type == OVL_PATH_LOWER) { 302 if (type == OVL_PATH_LOWER) {
294 err = vfs_getxattr(realpath.dentry, name, NULL, 0); 303 err = vfs_getxattr(realpath.dentry, name, NULL, 0);
295 if (err < 0) 304 if (err < 0)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 301f64aa8a45..c0205990a9f5 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -168,7 +168,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
168{ 168{
169 struct ovl_dir_cache *cache = od->cache; 169 struct ovl_dir_cache *cache = od->cache;
170 170
171 list_del(&od->cursor.l_node); 171 list_del_init(&od->cursor.l_node);
172 WARN_ON(cache->refcount <= 0); 172 WARN_ON(cache->refcount <= 0);
173 cache->refcount--; 173 cache->refcount--;
174 if (!cache->refcount) { 174 if (!cache->refcount) {
@@ -276,11 +276,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir,
276 return 0; 276 return 0;
277} 277}
278 278
279static inline int ovl_dir_read_merged(struct path *upperpath, 279static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
280 struct path *lowerpath,
281 struct list_head *list)
282{ 280{
283 int err; 281 int err;
282 struct path lowerpath;
283 struct path upperpath;
284 struct ovl_readdir_data rdd = { 284 struct ovl_readdir_data rdd = {
285 .ctx.actor = ovl_fill_merge, 285 .ctx.actor = ovl_fill_merge,
286 .list = list, 286 .list = list,
@@ -288,25 +288,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath,
288 .is_merge = false, 288 .is_merge = false,
289 }; 289 };
290 290
291 if (upperpath->dentry) { 291 ovl_path_lower(dentry, &lowerpath);
292 err = ovl_dir_read(upperpath, &rdd); 292 ovl_path_upper(dentry, &upperpath);
293
294 if (upperpath.dentry) {
295 err = ovl_dir_read(&upperpath, &rdd);
293 if (err) 296 if (err)
294 goto out; 297 goto out;
295 298
296 if (lowerpath->dentry) { 299 if (lowerpath.dentry) {
297 err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); 300 err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd);
298 if (err) 301 if (err)
299 goto out; 302 goto out;
300 } 303 }
301 } 304 }
302 if (lowerpath->dentry) { 305 if (lowerpath.dentry) {
303 /* 306 /*
304 * Insert lowerpath entries before upperpath ones, this allows 307 * Insert lowerpath entries before upperpath ones, this allows
305 * offsets to be reasonably constant 308 * offsets to be reasonably constant
306 */ 309 */
307 list_add(&rdd.middle, rdd.list); 310 list_add(&rdd.middle, rdd.list);
308 rdd.is_merge = true; 311 rdd.is_merge = true;
309 err = ovl_dir_read(lowerpath, &rdd); 312 err = ovl_dir_read(&lowerpath, &rdd);
310 list_del(&rdd.middle); 313 list_del(&rdd.middle);
311 } 314 }
312out: 315out:
@@ -331,8 +334,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
331static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) 334static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
332{ 335{
333 int res; 336 int res;
334 struct path lowerpath;
335 struct path upperpath;
336 struct ovl_dir_cache *cache; 337 struct ovl_dir_cache *cache;
337 338
338 cache = ovl_dir_cache(dentry); 339 cache = ovl_dir_cache(dentry);
@@ -349,10 +350,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
349 cache->refcount = 1; 350 cache->refcount = 1;
350 INIT_LIST_HEAD(&cache->entries); 351 INIT_LIST_HEAD(&cache->entries);
351 352
352 ovl_path_lower(dentry, &lowerpath); 353 res = ovl_dir_read_merged(dentry, &cache->entries);
353 ovl_path_upper(dentry, &upperpath);
354
355 res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries);
356 if (res) { 354 if (res) {
357 ovl_cache_free(&cache->entries); 355 ovl_cache_free(&cache->entries);
358 kfree(cache); 356 kfree(cache);
@@ -454,10 +452,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
454 /* 452 /*
455 * Need to check if we started out being a lower dir, but got copied up 453 * Need to check if we started out being a lower dir, but got copied up
456 */ 454 */
457 if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { 455 if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) {
458 struct inode *inode = file_inode(file); 456 struct inode *inode = file_inode(file);
459 457
460 realfile =lockless_dereference(od->upperfile); 458 realfile = lockless_dereference(od->upperfile);
461 if (!realfile) { 459 if (!realfile) {
462 struct path upperpath; 460 struct path upperpath;
463 461
@@ -540,14 +538,9 @@ const struct file_operations ovl_dir_operations = {
540int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) 538int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
541{ 539{
542 int err; 540 int err;
543 struct path lowerpath;
544 struct path upperpath;
545 struct ovl_cache_entry *p; 541 struct ovl_cache_entry *p;
546 542
547 ovl_path_upper(dentry, &upperpath); 543 err = ovl_dir_read_merged(dentry, list);
548 ovl_path_lower(dentry, &lowerpath);
549
550 err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
551 if (err) 544 if (err)
552 return err; 545 return err;
553 546
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 08b704cebfc4..f16d318b71f8 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
24MODULE_DESCRIPTION("Overlay filesystem"); 24MODULE_DESCRIPTION("Overlay filesystem");
25MODULE_LICENSE("GPL"); 25MODULE_LICENSE("GPL");
26 26
27#define OVERLAYFS_SUPER_MAGIC 0x794c764f 27#define OVERLAYFS_SUPER_MAGIC 0x794c7630
28 28
29struct ovl_config { 29struct ovl_config {
30 char *lowerdir; 30 char *lowerdir;
@@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
84 84
85static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) 85static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
86{ 86{
87 struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); 87 return lockless_dereference(oe->__upperdentry);
88 /*
89 * Make sure to order reads to upperdentry wrt ovl_dentry_update()
90 */
91 smp_read_barrier_depends();
92 return upperdentry;
93} 88}
94 89
95void ovl_path_upper(struct dentry *dentry, struct path *path) 90void ovl_path_upper(struct dentry *dentry, struct path *path)
@@ -462,11 +457,34 @@ static const match_table_t ovl_tokens = {
462 {OPT_ERR, NULL} 457 {OPT_ERR, NULL}
463}; 458};
464 459
460static char *ovl_next_opt(char **s)
461{
462 char *sbegin = *s;
463 char *p;
464
465 if (sbegin == NULL)
466 return NULL;
467
468 for (p = sbegin; *p; p++) {
469 if (*p == '\\') {
470 p++;
471 if (!*p)
472 break;
473 } else if (*p == ',') {
474 *p = '\0';
475 *s = p + 1;
476 return sbegin;
477 }
478 }
479 *s = NULL;
480 return sbegin;
481}
482
465static int ovl_parse_opt(char *opt, struct ovl_config *config) 483static int ovl_parse_opt(char *opt, struct ovl_config *config)
466{ 484{
467 char *p; 485 char *p;
468 486
469 while ((p = strsep(&opt, ",")) != NULL) { 487 while ((p = ovl_next_opt(&opt)) != NULL) {
470 int token; 488 int token;
471 substring_t args[MAX_OPT_ARGS]; 489 substring_t args[MAX_OPT_ARGS];
472 490
@@ -554,15 +572,34 @@ out_dput:
554 goto out_unlock; 572 goto out_unlock;
555} 573}
556 574
575static void ovl_unescape(char *s)
576{
577 char *d = s;
578
579 for (;; s++, d++) {
580 if (*s == '\\')
581 s++;
582 *d = *s;
583 if (!*s)
584 break;
585 }
586}
587
557static int ovl_mount_dir(const char *name, struct path *path) 588static int ovl_mount_dir(const char *name, struct path *path)
558{ 589{
559 int err; 590 int err;
591 char *tmp = kstrdup(name, GFP_KERNEL);
592
593 if (!tmp)
594 return -ENOMEM;
560 595
561 err = kern_path(name, LOOKUP_FOLLOW, path); 596 ovl_unescape(tmp);
597 err = kern_path(tmp, LOOKUP_FOLLOW, path);
562 if (err) { 598 if (err) {
563 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 599 pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err);
564 err = -EINVAL; 600 err = -EINVAL;
565 } 601 }
602 kfree(tmp);
566 return err; 603 return err;
567} 604}
568 605
@@ -776,11 +813,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
776 813
777static struct file_system_type ovl_fs_type = { 814static struct file_system_type ovl_fs_type = {
778 .owner = THIS_MODULE, 815 .owner = THIS_MODULE,
779 .name = "overlayfs", 816 .name = "overlay",
780 .mount = ovl_mount, 817 .mount = ovl_mount,
781 .kill_sb = kill_anon_super, 818 .kill_sb = kill_anon_super,
782}; 819};
783MODULE_ALIAS_FS("overlayfs"); 820MODULE_ALIAS_FS("overlay");
784 821
785static int __init ovl_init(void) 822static int __init ovl_init(void)
786{ 823{
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8b663b2d9562..6b4527216a7f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -634,7 +634,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
634 dqstats_inc(DQST_LOOKUPS); 634 dqstats_inc(DQST_LOOKUPS);
635 err = sb->dq_op->write_dquot(dquot); 635 err = sb->dq_op->write_dquot(dquot);
636 if (!ret && err) 636 if (!ret && err)
637 err = ret; 637 ret = err;
638 dqput(dquot); 638 dqput(dquot);
639 spin_lock(&dq_list_lock); 639 spin_lock(&dq_list_lock);
640 } 640 }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 92e8f99a5857..281002689d64 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1338,7 +1338,10 @@ xfs_free_file_space(
1338 goto out; 1338 goto out;
1339} 1339}
1340 1340
1341 1341/*
1342 * Preallocate and zero a range of a file. This mechanism has the allocation
1343 * semantics of fallocate and in addition converts data in the range to zeroes.
1344 */
1342int 1345int
1343xfs_zero_file_space( 1346xfs_zero_file_space(
1344 struct xfs_inode *ip, 1347 struct xfs_inode *ip,
@@ -1346,65 +1349,30 @@ xfs_zero_file_space(
1346 xfs_off_t len) 1349 xfs_off_t len)
1347{ 1350{
1348 struct xfs_mount *mp = ip->i_mount; 1351 struct xfs_mount *mp = ip->i_mount;
1349 uint granularity; 1352 uint blksize;
1350 xfs_off_t start_boundary;
1351 xfs_off_t end_boundary;
1352 int error; 1353 int error;
1353 1354
1354 trace_xfs_zero_file_space(ip); 1355 trace_xfs_zero_file_space(ip);
1355 1356
1356 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1357 blksize = 1 << mp->m_sb.sb_blocklog;
1357 1358
1358 /* 1359 /*
1359 * Round the range of extents we are going to convert inwards. If the 1360 * Punch a hole and prealloc the range. We use hole punch rather than
1360 * offset is aligned, then it doesn't get changed so we zero from the 1361 * unwritten extent conversion for two reasons:
1361 * start of the block offset points to. 1362 *
1363 * 1.) Hole punch handles partial block zeroing for us.
1364 *
1365 * 2.) If prealloc returns ENOSPC, the file range is still zero-valued
1366 * by virtue of the hole punch.
1362 */ 1367 */
1363 start_boundary = round_up(offset, granularity); 1368 error = xfs_free_file_space(ip, offset, len);
1364 end_boundary = round_down(offset + len, granularity); 1369 if (error)
1365 1370 goto out;
1366 ASSERT(start_boundary >= offset);
1367 ASSERT(end_boundary <= offset + len);
1368
1369 if (start_boundary < end_boundary - 1) {
1370 /*
1371 * Writeback the range to ensure any inode size updates due to
1372 * appending writes make it to disk (otherwise we could just
1373 * punch out the delalloc blocks).
1374 */
1375 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1376 start_boundary, end_boundary - 1);
1377 if (error)
1378 goto out;
1379 truncate_pagecache_range(VFS_I(ip), start_boundary,
1380 end_boundary - 1);
1381
1382 /* convert the blocks */
1383 error = xfs_alloc_file_space(ip, start_boundary,
1384 end_boundary - start_boundary - 1,
1385 XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);
1386 if (error)
1387 goto out;
1388
1389 /* We've handled the interior of the range, now for the edges */
1390 if (start_boundary != offset) {
1391 error = xfs_iozero(ip, offset, start_boundary - offset);
1392 if (error)
1393 goto out;
1394 }
1395
1396 if (end_boundary != offset + len)
1397 error = xfs_iozero(ip, end_boundary,
1398 offset + len - end_boundary);
1399
1400 } else {
1401 /*
1402 * It's either a sub-granularity range or the range spanned lies
1403 * partially across two adjacent blocks.
1404 */
1405 error = xfs_iozero(ip, offset, len);
1406 }
1407 1371
1372 error = xfs_alloc_file_space(ip, round_down(offset, blksize),
1373 round_up(offset + len, blksize) -
1374 round_down(offset, blksize),
1375 XFS_BMAPI_PREALLOC);
1408out: 1376out:
1409 return error; 1377 return error;
1410 1378
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f1deb961a296..894924a5129b 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk(
236 XFS_WANT_CORRUPTED_RETURN(stat == 1); 236 XFS_WANT_CORRUPTED_RETURN(stat == 1);
237 237
238 /* Check if the record contains the inode in request */ 238 /* Check if the record contains the inode in request */
239 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) 239 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
240 return -EINVAL; 240 *icount = 0;
241 return 0;
242 }
241 243
242 idx = agino - irec->ir_startino + 1; 244 idx = agino - irec->ir_startino + 1;
243 if (idx < XFS_INODES_PER_CHUNK && 245 if (idx < XFS_INODES_PER_CHUNK &&
@@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk(
262 264
263#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 265#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
264 266
267struct xfs_bulkstat_agichunk {
268 char __user **ac_ubuffer;/* pointer into user's buffer */
269 int ac_ubleft; /* bytes left in user's buffer */
270 int ac_ubelem; /* spaces used in user's buffer */
271};
272
265/* 273/*
266 * Process inodes in chunk with a pointer to a formatter function 274 * Process inodes in chunk with a pointer to a formatter function
267 * that will iget the inode and fill in the appropriate structure. 275 * that will iget the inode and fill in the appropriate structure.
268 */ 276 */
269int 277static int
270xfs_bulkstat_ag_ichunk( 278xfs_bulkstat_ag_ichunk(
271 struct xfs_mount *mp, 279 struct xfs_mount *mp,
272 xfs_agnumber_t agno, 280 xfs_agnumber_t agno,
273 struct xfs_inobt_rec_incore *irbp, 281 struct xfs_inobt_rec_incore *irbp,
274 bulkstat_one_pf formatter, 282 bulkstat_one_pf formatter,
275 size_t statstruct_size, 283 size_t statstruct_size,
276 struct xfs_bulkstat_agichunk *acp) 284 struct xfs_bulkstat_agichunk *acp,
285 xfs_agino_t *last_agino)
277{ 286{
278 xfs_ino_t lastino = acp->ac_lastino;
279 char __user **ubufp = acp->ac_ubuffer; 287 char __user **ubufp = acp->ac_ubuffer;
280 int ubleft = acp->ac_ubleft; 288 int chunkidx;
281 int ubelem = acp->ac_ubelem;
282 int chunkidx, clustidx;
283 int error = 0; 289 int error = 0;
284 xfs_agino_t agino; 290 xfs_agino_t agino = irbp->ir_startino;
285 291
286 for (agino = irbp->ir_startino, chunkidx = clustidx = 0; 292 for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
287 XFS_BULKSTAT_UBLEFT(ubleft) && 293 chunkidx++, agino++) {
288 irbp->ir_freecount < XFS_INODES_PER_CHUNK; 294 int fmterror;
289 chunkidx++, clustidx++, agino++) {
290 int fmterror; /* bulkstat formatter result */
291 int ubused; 295 int ubused;
292 xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
293 296
294 ASSERT(chunkidx < XFS_INODES_PER_CHUNK); 297 /* inode won't fit in buffer, we are done */
298 if (acp->ac_ubleft < statstruct_size)
299 break;
295 300
296 /* Skip if this inode is free */ 301 /* Skip if this inode is free */
297 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { 302 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
298 lastino = ino;
299 continue; 303 continue;
300 }
301
302 /*
303 * Count used inodes as free so we can tell when the
304 * chunk is used up.
305 */
306 irbp->ir_freecount++;
307 304
308 /* Get the inode and fill in a single buffer */ 305 /* Get the inode and fill in a single buffer */
309 ubused = statstruct_size; 306 ubused = statstruct_size;
310 error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); 307 error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
311 if (fmterror == BULKSTAT_RV_NOTHING) { 308 *ubufp, acp->ac_ubleft, &ubused, &fmterror);
312 if (error && error != -ENOENT && error != -EINVAL) { 309
313 ubleft = 0; 310 if (fmterror == BULKSTAT_RV_GIVEUP ||
314 break; 311 (error && error != -ENOENT && error != -EINVAL)) {
315 } 312 acp->ac_ubleft = 0;
316 lastino = ino;
317 continue;
318 }
319 if (fmterror == BULKSTAT_RV_GIVEUP) {
320 ubleft = 0;
321 ASSERT(error); 313 ASSERT(error);
322 break; 314 break;
323 } 315 }
324 if (*ubufp) 316
325 *ubufp += ubused; 317 /* be careful not to leak error if at end of chunk */
326 ubleft -= ubused; 318 if (fmterror == BULKSTAT_RV_NOTHING || error) {
327 ubelem++; 319 error = 0;
328 lastino = ino; 320 continue;
321 }
322
323 *ubufp += ubused;
324 acp->ac_ubleft -= ubused;
325 acp->ac_ubelem++;
329 } 326 }
330 327
331 acp->ac_lastino = lastino; 328 /*
332 acp->ac_ubleft = ubleft; 329 * Post-update *last_agino. At this point, agino will always point one
333 acp->ac_ubelem = ubelem; 330 * inode past the last inode we processed successfully. Hence we
331 * substract that inode when setting the *last_agino cursor so that we
332 * return the correct cookie to userspace. On the next bulkstat call,
333 * the inode under the lastino cookie will be skipped as we have already
334 * processed it here.
335 */
336 *last_agino = agino - 1;
334 337
335 return error; 338 return error;
336} 339}
@@ -353,45 +356,33 @@ xfs_bulkstat(
353 xfs_agino_t agino; /* inode # in allocation group */ 356 xfs_agino_t agino; /* inode # in allocation group */
354 xfs_agnumber_t agno; /* allocation group number */ 357 xfs_agnumber_t agno; /* allocation group number */
355 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 358 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
356 int end_of_ag; /* set if we've seen the ag end */
357 int error; /* error code */
358 int fmterror;/* bulkstat formatter result */
359 int i; /* loop index */
360 int icount; /* count of inodes good in irbuf */
361 size_t irbsize; /* size of irec buffer in bytes */ 359 size_t irbsize; /* size of irec buffer in bytes */
362 xfs_ino_t ino; /* inode number (filesystem) */
363 xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
364 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ 360 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
365 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
366 xfs_ino_t lastino; /* last inode number returned */
367 int nirbuf; /* size of irbuf */ 361 int nirbuf; /* size of irbuf */
368 int rval; /* return value error code */
369 int tmp; /* result value from btree calls */
370 int ubcount; /* size of user's buffer */ 362 int ubcount; /* size of user's buffer */
371 int ubleft; /* bytes left in user's buffer */ 363 struct xfs_bulkstat_agichunk ac;
372 char __user *ubufp; /* pointer into user's buffer */ 364 int error = 0;
373 int ubelem; /* spaces used in user's buffer */
374 365
375 /* 366 /*
376 * Get the last inode value, see if there's nothing to do. 367 * Get the last inode value, see if there's nothing to do.
377 */ 368 */
378 ino = (xfs_ino_t)*lastinop; 369 agno = XFS_INO_TO_AGNO(mp, *lastinop);
379 lastino = ino; 370 agino = XFS_INO_TO_AGINO(mp, *lastinop);
380 agno = XFS_INO_TO_AGNO(mp, ino);
381 agino = XFS_INO_TO_AGINO(mp, ino);
382 if (agno >= mp->m_sb.sb_agcount || 371 if (agno >= mp->m_sb.sb_agcount ||
383 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 372 *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
384 *done = 1; 373 *done = 1;
385 *ubcountp = 0; 374 *ubcountp = 0;
386 return 0; 375 return 0;
387 } 376 }
388 377
389 ubcount = *ubcountp; /* statstruct's */ 378 ubcount = *ubcountp; /* statstruct's */
390 ubleft = ubcount * statstruct_size; /* bytes */ 379 ac.ac_ubuffer = &ubuffer;
391 *ubcountp = ubelem = 0; 380 ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
381 ac.ac_ubelem = 0;
382
383 *ubcountp = 0;
392 *done = 0; 384 *done = 0;
393 fmterror = 0; 385
394 ubufp = ubuffer;
395 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); 386 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
396 if (!irbuf) 387 if (!irbuf)
397 return -ENOMEM; 388 return -ENOMEM;
@@ -402,9 +393,13 @@ xfs_bulkstat(
402 * Loop over the allocation groups, starting from the last 393 * Loop over the allocation groups, starting from the last
403 * inode returned; 0 means start of the allocation group. 394 * inode returned; 0 means start of the allocation group.
404 */ 395 */
405 rval = 0; 396 while (agno < mp->m_sb.sb_agcount) {
406 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { 397 struct xfs_inobt_rec_incore *irbp = irbuf;
407 cond_resched(); 398 struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
399 bool end_of_ag = false;
400 int icount = 0;
401 int stat;
402
408 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 403 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
409 if (error) 404 if (error)
410 break; 405 break;
@@ -414,10 +409,6 @@ xfs_bulkstat(
414 */ 409 */
415 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, 410 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
416 XFS_BTNUM_INO); 411 XFS_BTNUM_INO);
417 irbp = irbuf;
418 irbufend = irbuf + nirbuf;
419 end_of_ag = 0;
420 icount = 0;
421 if (agino > 0) { 412 if (agino > 0) {
422 /* 413 /*
423 * In the middle of an allocation group, we need to get 414 * In the middle of an allocation group, we need to get
@@ -427,22 +418,23 @@ xfs_bulkstat(
427 418
428 error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); 419 error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
429 if (error) 420 if (error)
430 break; 421 goto del_cursor;
431 if (icount) { 422 if (icount) {
432 irbp->ir_startino = r.ir_startino; 423 irbp->ir_startino = r.ir_startino;
433 irbp->ir_freecount = r.ir_freecount; 424 irbp->ir_freecount = r.ir_freecount;
434 irbp->ir_free = r.ir_free; 425 irbp->ir_free = r.ir_free;
435 irbp++; 426 irbp++;
436 agino = r.ir_startino + XFS_INODES_PER_CHUNK;
437 } 427 }
438 /* Increment to the next record */ 428 /* Increment to the next record */
439 error = xfs_btree_increment(cur, 0, &tmp); 429 error = xfs_btree_increment(cur, 0, &stat);
440 } else { 430 } else {
441 /* Start of ag. Lookup the first inode chunk */ 431 /* Start of ag. Lookup the first inode chunk */
442 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); 432 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
433 }
434 if (error || stat == 0) {
435 end_of_ag = true;
436 goto del_cursor;
443 } 437 }
444 if (error)
445 break;
446 438
447 /* 439 /*
448 * Loop through inode btree records in this ag, 440 * Loop through inode btree records in this ag,
@@ -451,10 +443,10 @@ xfs_bulkstat(
451 while (irbp < irbufend && icount < ubcount) { 443 while (irbp < irbufend && icount < ubcount) {
452 struct xfs_inobt_rec_incore r; 444 struct xfs_inobt_rec_incore r;
453 445
454 error = xfs_inobt_get_rec(cur, &r, &i); 446 error = xfs_inobt_get_rec(cur, &r, &stat);
455 if (error || i == 0) { 447 if (error || stat == 0) {
456 end_of_ag = 1; 448 end_of_ag = true;
457 break; 449 goto del_cursor;
458 } 450 }
459 451
460 /* 452 /*
@@ -469,77 +461,79 @@ xfs_bulkstat(
469 irbp++; 461 irbp++;
470 icount += XFS_INODES_PER_CHUNK - r.ir_freecount; 462 icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
471 } 463 }
472 /* 464 error = xfs_btree_increment(cur, 0, &stat);
473 * Set agino to after this chunk and bump the cursor. 465 if (error || stat == 0) {
474 */ 466 end_of_ag = true;
475 agino = r.ir_startino + XFS_INODES_PER_CHUNK; 467 goto del_cursor;
476 error = xfs_btree_increment(cur, 0, &tmp); 468 }
477 cond_resched(); 469 cond_resched();
478 } 470 }
471
479 /* 472 /*
480 * Drop the btree buffers and the agi buffer. 473 * Drop the btree buffers and the agi buffer as we can't hold any
481 * We can't hold any of the locks these represent 474 * of the locks these represent when calling iget. If there is a
482 * when calling iget. 475 * pending error, then we are done.
483 */ 476 */
477del_cursor:
484 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 478 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
485 xfs_buf_relse(agbp); 479 xfs_buf_relse(agbp);
480 if (error)
481 break;
486 /* 482 /*
487 * Now format all the good inodes into the user's buffer. 483 * Now format all the good inodes into the user's buffer. The
484 * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
485 * for the next loop iteration.
488 */ 486 */
489 irbufend = irbp; 487 irbufend = irbp;
490 for (irbp = irbuf; 488 for (irbp = irbuf;
491 irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { 489 irbp < irbufend && ac.ac_ubleft >= statstruct_size;
492 struct xfs_bulkstat_agichunk ac; 490 irbp++) {
493
494 ac.ac_lastino = lastino;
495 ac.ac_ubuffer = &ubuffer;
496 ac.ac_ubleft = ubleft;
497 ac.ac_ubelem = ubelem;
498 error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, 491 error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
499 formatter, statstruct_size, &ac); 492 formatter, statstruct_size, &ac,
493 &agino);
500 if (error) 494 if (error)
501 rval = error; 495 break;
502
503 lastino = ac.ac_lastino;
504 ubleft = ac.ac_ubleft;
505 ubelem = ac.ac_ubelem;
506 496
507 cond_resched(); 497 cond_resched();
508 } 498 }
499
509 /* 500 /*
510 * Set up for the next loop iteration. 501 * If we've run out of space or had a formatting error, we
502 * are now done
511 */ 503 */
512 if (XFS_BULKSTAT_UBLEFT(ubleft)) { 504 if (ac.ac_ubleft < statstruct_size || error)
513 if (end_of_ag) {
514 agno++;
515 agino = 0;
516 } else
517 agino = XFS_INO_TO_AGINO(mp, lastino);
518 } else
519 break; 505 break;
506
507 if (end_of_ag) {
508 agno++;
509 agino = 0;
510 }
520 } 511 }
521 /* 512 /*
522 * Done, we're either out of filesystem or space to put the data. 513 * Done, we're either out of filesystem or space to put the data.
523 */ 514 */
524 kmem_free(irbuf); 515 kmem_free(irbuf);
525 *ubcountp = ubelem; 516 *ubcountp = ac.ac_ubelem;
517
526 /* 518 /*
527 * Found some inodes, return them now and return the error next time. 519 * We found some inodes, so clear the error status and return them.
520 * The lastino pointer will point directly at the inode that triggered
521 * any error that occurred, so on the next call the error will be
522 * triggered again and propagated to userspace as there will be no
523 * formatted inodes in the buffer.
528 */ 524 */
529 if (ubelem) 525 if (ac.ac_ubelem)
530 rval = 0; 526 error = 0;
531 if (agno >= mp->m_sb.sb_agcount) { 527
532 /* 528 /*
533 * If we ran out of filesystem, mark lastino as off 529 * If we ran out of filesystem, lastino will point off the end of
534 * the end of the filesystem, so the next call 530 * the filesystem so the next call will return immediately.
535 * will return immediately. 531 */
536 */ 532 *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
537 *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); 533 if (agno >= mp->m_sb.sb_agcount)
538 *done = 1; 534 *done = 1;
539 } else
540 *lastinop = (xfs_ino_t)lastino;
541 535
542 return rval; 536 return error;
543} 537}
544 538
545int 539int
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index aaed08022eb9..6ea8b3912fa4 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
30 int *ubused, 30 int *ubused,
31 int *stat); 31 int *stat);
32 32
33struct xfs_bulkstat_agichunk {
34 xfs_ino_t ac_lastino; /* last inode returned */
35 char __user **ac_ubuffer;/* pointer into user's buffer */
36 int ac_ubleft; /* bytes left in user's buffer */
37 int ac_ubelem; /* spaces used in user's buffer */
38};
39
40int
41xfs_bulkstat_ag_ichunk(
42 struct xfs_mount *mp,
43 xfs_agnumber_t agno,
44 struct xfs_inobt_rec_incore *irbp,
45 bulkstat_one_pf formatter,
46 size_t statstruct_size,
47 struct xfs_bulkstat_agichunk *acp);
48
49/* 33/*
50 * Values for stat return value. 34 * Values for stat return value.
51 */ 35 */