aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c7
-rw-r--r--fs/btrfs/backref.c8
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/export.c10
-rw-r--r--fs/btrfs/extent-tree.c16
-rw-r--r--fs/btrfs/extent_io.c80
-rw-r--r--fs/btrfs/inode.c45
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/btrfs/send.c8
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/volumes.h8
-rw-r--r--fs/cifs/cifsencrypt.c53
-rw-r--r--fs/cifs/cifsfs.c5
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/inode.c34
-rw-r--r--fs/cifs/ioctl.c6
-rw-r--r--fs/cifs/smb2ops.c8
-rw-r--r--fs/cifs/smb2pdu.c84
-rw-r--r--fs/dax.c60
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/readpage.c4
-rw-r--r--fs/fs-writeback.c17
-rw-r--r--fs/mpage.c15
-rw-r--r--fs/namei.c8
-rw-r--r--fs/nfs/delegation.c8
-rw-r--r--fs/nfs/delegation.h2
-rw-r--r--fs/nfs/direct.c7
-rw-r--r--fs/nfs/filelayout/filelayout.c31
-rw-r--r--fs/nfs/nfs42proc.c4
-rw-r--r--fs/nfs/nfs4proc.c142
-rw-r--r--fs/nfs/nfs4state.c5
-rw-r--r--fs/nfs/nfs4trace.h2
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c35
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfs/write.c17
-rw-r--r--fs/nfsd/blocklayout.c8
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c9
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c8
-rw-r--r--fs/ramfs/file-nommu.c5
-rw-r--r--fs/ubifs/xattr.c3
-rw-r--r--fs/userfaultfd.c12
47 files changed, 571 insertions, 279 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 22ea424ee741..073bb57adab1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1242,6 +1242,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1242 goto out_clear; 1242 goto out_clear;
1243 } 1243 }
1244 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1244 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1245 /*
1246 * If the partition is not aligned on a page
1247 * boundary, we can't do dax I/O to it.
1248 */
1249 if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
1250 (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
1251 bdev->bd_inode->i_flags &= ~S_DAX;
1245 } 1252 }
1246 } else { 1253 } else {
1247 if (bdev->bd_contains == bdev) { 1254 if (bdev->bd_contains == bdev) {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index ecbc63d3143e..9a2ec79e8cfb 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1828,7 +1828,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
1828 int found = 0; 1828 int found = 0;
1829 struct extent_buffer *eb; 1829 struct extent_buffer *eb;
1830 struct btrfs_inode_extref *extref; 1830 struct btrfs_inode_extref *extref;
1831 struct extent_buffer *leaf;
1832 u32 item_size; 1831 u32 item_size;
1833 u32 cur_offset; 1832 u32 cur_offset;
1834 unsigned long ptr; 1833 unsigned long ptr;
@@ -1856,9 +1855,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
1856 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1855 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1857 btrfs_release_path(path); 1856 btrfs_release_path(path);
1858 1857
1859 leaf = path->nodes[0]; 1858 item_size = btrfs_item_size_nr(eb, slot);
1860 item_size = btrfs_item_size_nr(leaf, slot); 1859 ptr = btrfs_item_ptr_offset(eb, slot);
1861 ptr = btrfs_item_ptr_offset(leaf, slot);
1862 cur_offset = 0; 1860 cur_offset = 0;
1863 1861
1864 while (cur_offset < item_size) { 1862 while (cur_offset < item_size) {
@@ -1872,7 +1870,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
1872 if (ret) 1870 if (ret)
1873 break; 1871 break;
1874 1872
1875 cur_offset += btrfs_inode_extref_name_len(leaf, extref); 1873 cur_offset += btrfs_inode_extref_name_len(eb, extref);
1876 cur_offset += sizeof(*extref); 1874 cur_offset += sizeof(*extref);
1877 } 1875 }
1878 btrfs_tree_read_unlock_blocking(eb); 1876 btrfs_tree_read_unlock_blocking(eb);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 81220b2203c6..0ef5cc13fae2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,8 +44,6 @@
44#define BTRFS_INODE_IN_DELALLOC_LIST 9 44#define BTRFS_INODE_IN_DELALLOC_LIST 9
45#define BTRFS_INODE_READDIO_NEED_LOCK 10 45#define BTRFS_INODE_READDIO_NEED_LOCK 10
46#define BTRFS_INODE_HAS_PROPS 11 46#define BTRFS_INODE_HAS_PROPS 11
47/* DIO is ready to submit */
48#define BTRFS_INODE_DIO_READY 12
49/* 47/*
50 * The following 3 bits are meant only for the btree inode. 48 * The following 3 bits are meant only for the btree inode.
51 * When any of them is set, it means an error happened while writing an 49 * When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0d98aee34fee..1e60d00d4ea7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2847,6 +2847,8 @@ int open_ctree(struct super_block *sb,
2847 !extent_buffer_uptodate(chunk_root->node)) { 2847 !extent_buffer_uptodate(chunk_root->node)) {
2848 printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", 2848 printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
2849 sb->s_id); 2849 sb->s_id);
2850 if (!IS_ERR(chunk_root->node))
2851 free_extent_buffer(chunk_root->node);
2850 chunk_root->node = NULL; 2852 chunk_root->node = NULL;
2851 goto fail_tree_roots; 2853 goto fail_tree_roots;
2852 } 2854 }
@@ -2885,6 +2887,8 @@ retry_root_backup:
2885 !extent_buffer_uptodate(tree_root->node)) { 2887 !extent_buffer_uptodate(tree_root->node)) {
2886 printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", 2888 printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
2887 sb->s_id); 2889 sb->s_id);
2890 if (!IS_ERR(tree_root->node))
2891 free_extent_buffer(tree_root->node);
2888 tree_root->node = NULL; 2892 tree_root->node = NULL;
2889 goto recovery_tree_root; 2893 goto recovery_tree_root;
2890 } 2894 }
@@ -3765,9 +3769,7 @@ void close_ctree(struct btrfs_root *root)
3765 * block groups queued for removal, the deletion will be 3769 * block groups queued for removal, the deletion will be
3766 * skipped when we quit the cleaner thread. 3770 * skipped when we quit the cleaner thread.
3767 */ 3771 */
3768 mutex_lock(&root->fs_info->cleaner_mutex);
3769 btrfs_delete_unused_bgs(root->fs_info); 3772 btrfs_delete_unused_bgs(root->fs_info);
3770 mutex_unlock(&root->fs_info->cleaner_mutex);
3771 3773
3772 ret = btrfs_commit_super(root); 3774 ret = btrfs_commit_super(root);
3773 if (ret) 3775 if (ret)
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 8d052209f473..2513a7f53334 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -112,11 +112,11 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
112 u32 generation; 112 u32 generation;
113 113
114 if (fh_type == FILEID_BTRFS_WITH_PARENT) { 114 if (fh_type == FILEID_BTRFS_WITH_PARENT) {
115 if (fh_len != BTRFS_FID_SIZE_CONNECTABLE) 115 if (fh_len < BTRFS_FID_SIZE_CONNECTABLE)
116 return NULL; 116 return NULL;
117 root_objectid = fid->root_objectid; 117 root_objectid = fid->root_objectid;
118 } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) { 118 } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
119 if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) 119 if (fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT)
120 return NULL; 120 return NULL;
121 root_objectid = fid->parent_root_objectid; 121 root_objectid = fid->parent_root_objectid;
122 } else 122 } else
@@ -136,11 +136,11 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
136 u32 generation; 136 u32 generation;
137 137
138 if ((fh_type != FILEID_BTRFS_WITH_PARENT || 138 if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
139 fh_len != BTRFS_FID_SIZE_CONNECTABLE) && 139 fh_len < BTRFS_FID_SIZE_CONNECTABLE) &&
140 (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT || 140 (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
141 fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) && 141 fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
142 (fh_type != FILEID_BTRFS_WITHOUT_PARENT || 142 (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
143 fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) 143 fh_len < BTRFS_FID_SIZE_NON_CONNECTABLE))
144 return NULL; 144 return NULL;
145 145
146 objectid = fid->objectid; 146 objectid = fid->objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5411f0ab5683..601d7d45d164 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2828,6 +2828,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2828 struct btrfs_delayed_ref_head *head; 2828 struct btrfs_delayed_ref_head *head;
2829 int ret; 2829 int ret;
2830 int run_all = count == (unsigned long)-1; 2830 int run_all = count == (unsigned long)-1;
2831 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
2831 2832
2832 /* We'll clean this up in btrfs_cleanup_transaction */ 2833 /* We'll clean this up in btrfs_cleanup_transaction */
2833 if (trans->aborted) 2834 if (trans->aborted)
@@ -2844,6 +2845,7 @@ again:
2844#ifdef SCRAMBLE_DELAYED_REFS 2845#ifdef SCRAMBLE_DELAYED_REFS
2845 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2846 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2846#endif 2847#endif
2848 trans->can_flush_pending_bgs = false;
2847 ret = __btrfs_run_delayed_refs(trans, root, count); 2849 ret = __btrfs_run_delayed_refs(trans, root, count);
2848 if (ret < 0) { 2850 if (ret < 0) {
2849 btrfs_abort_transaction(trans, root, ret); 2851 btrfs_abort_transaction(trans, root, ret);
@@ -2893,6 +2895,7 @@ again:
2893 } 2895 }
2894out: 2896out:
2895 assert_qgroups_uptodate(trans); 2897 assert_qgroups_uptodate(trans);
2898 trans->can_flush_pending_bgs = can_flush_pending_bgs;
2896 return 0; 2899 return 0;
2897} 2900}
2898 2901
@@ -3742,10 +3745,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3742 found->bytes_reserved = 0; 3745 found->bytes_reserved = 0;
3743 found->bytes_readonly = 0; 3746 found->bytes_readonly = 0;
3744 found->bytes_may_use = 0; 3747 found->bytes_may_use = 0;
3745 if (total_bytes > 0) 3748 found->full = 0;
3746 found->full = 0;
3747 else
3748 found->full = 1;
3749 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 3749 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3750 found->chunk_alloc = 0; 3750 found->chunk_alloc = 0;
3751 found->flush = 0; 3751 found->flush = 0;
@@ -4309,7 +4309,8 @@ out:
4309 * the block groups that were made dirty during the lifetime of the 4309 * the block groups that were made dirty during the lifetime of the
4310 * transaction. 4310 * transaction.
4311 */ 4311 */
4312 if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) { 4312 if (trans->can_flush_pending_bgs &&
4313 trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
4313 btrfs_create_pending_block_groups(trans, trans->root); 4314 btrfs_create_pending_block_groups(trans, trans->root);
4314 btrfs_trans_release_chunk_metadata(trans); 4315 btrfs_trans_release_chunk_metadata(trans);
4315 } 4316 }
@@ -8668,7 +8669,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8668 } 8669 }
8669 8670
8670 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { 8671 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
8671 btrfs_drop_and_free_fs_root(tree_root->fs_info, root); 8672 btrfs_add_dropped_root(trans, root);
8672 } else { 8673 } else {
8673 free_extent_buffer(root->node); 8674 free_extent_buffer(root->node);
8674 free_extent_buffer(root->commit_root); 8675 free_extent_buffer(root->commit_root);
@@ -9563,7 +9564,9 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
9563 struct btrfs_block_group_item item; 9564 struct btrfs_block_group_item item;
9564 struct btrfs_key key; 9565 struct btrfs_key key;
9565 int ret = 0; 9566 int ret = 0;
9567 bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
9566 9568
9569 trans->can_flush_pending_bgs = false;
9567 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { 9570 list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
9568 if (ret) 9571 if (ret)
9569 goto next; 9572 goto next;
@@ -9584,6 +9587,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
9584next: 9587next:
9585 list_del_init(&block_group->bg_list); 9588 list_del_init(&block_group->bg_list);
9586 } 9589 }
9590 trans->can_flush_pending_bgs = can_flush_pending_bgs;
9587} 9591}
9588 9592
9589int btrfs_make_block_group(struct btrfs_trans_handle *trans, 9593int btrfs_make_block_group(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f1018cfbfefa..3915c9473e94 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2798 bio_end_io_t end_io_func, 2798 bio_end_io_t end_io_func,
2799 int mirror_num, 2799 int mirror_num,
2800 unsigned long prev_bio_flags, 2800 unsigned long prev_bio_flags,
2801 unsigned long bio_flags) 2801 unsigned long bio_flags,
2802 bool force_bio_submit)
2802{ 2803{
2803 int ret = 0; 2804 int ret = 0;
2804 struct bio *bio; 2805 struct bio *bio;
@@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2814 contig = bio_end_sector(bio) == sector; 2815 contig = bio_end_sector(bio) == sector;
2815 2816
2816 if (prev_bio_flags != bio_flags || !contig || 2817 if (prev_bio_flags != bio_flags || !contig ||
2818 force_bio_submit ||
2817 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || 2819 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
2818 bio_add_page(bio, page, page_size, offset) < page_size) { 2820 bio_add_page(bio, page, page_size, offset) < page_size) {
2819 ret = submit_one_bio(rw, bio, mirror_num, 2821 ret = submit_one_bio(rw, bio, mirror_num,
@@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree,
2910 get_extent_t *get_extent, 2912 get_extent_t *get_extent,
2911 struct extent_map **em_cached, 2913 struct extent_map **em_cached,
2912 struct bio **bio, int mirror_num, 2914 struct bio **bio, int mirror_num,
2913 unsigned long *bio_flags, int rw) 2915 unsigned long *bio_flags, int rw,
2916 u64 *prev_em_start)
2914{ 2917{
2915 struct inode *inode = page->mapping->host; 2918 struct inode *inode = page->mapping->host;
2916 u64 start = page_offset(page); 2919 u64 start = page_offset(page);
@@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree,
2958 } 2961 }
2959 while (cur <= end) { 2962 while (cur <= end) {
2960 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2963 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2964 bool force_bio_submit = false;
2961 2965
2962 if (cur >= last_byte) { 2966 if (cur >= last_byte) {
2963 char *userpage; 2967 char *userpage;
@@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree,
3008 block_start = em->block_start; 3012 block_start = em->block_start;
3009 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 3013 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3010 block_start = EXTENT_MAP_HOLE; 3014 block_start = EXTENT_MAP_HOLE;
3015
3016 /*
3017 * If we have a file range that points to a compressed extent
3018 * and it's followed by a consecutive file range that points to
3019 * to the same compressed extent (possibly with a different
3020 * offset and/or length, so it either points to the whole extent
3021 * or only part of it), we must make sure we do not submit a
3022 * single bio to populate the pages for the 2 ranges because
3023 * this makes the compressed extent read zero out the pages
3024 * belonging to the 2nd range. Imagine the following scenario:
3025 *
3026 * File layout
3027 * [0 - 8K] [8K - 24K]
3028 * | |
3029 * | |
3030 * points to extent X, points to extent X,
3031 * offset 4K, length of 8K offset 0, length 16K
3032 *
3033 * [extent X, compressed length = 4K uncompressed length = 16K]
3034 *
3035 * If the bio to read the compressed extent covers both ranges,
3036 * it will decompress extent X into the pages belonging to the
3037 * first range and then it will stop, zeroing out the remaining
3038 * pages that belong to the other range that points to extent X.
3039 * So here we make sure we submit 2 bios, one for the first
3040 * range and another one for the third range. Both will target
3041 * the same physical extent from disk, but we can't currently
3042 * make the compressed bio endio callback populate the pages
3043 * for both ranges because each compressed bio is tightly
3044 * coupled with a single extent map, and each range can have
3045 * an extent map with a different offset value relative to the
3046 * uncompressed data of our extent and different lengths. This
3047 * is a corner case so we prioritize correctness over
3048 * non-optimal behavior (submitting 2 bios for the same extent).
3049 */
3050 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3051 prev_em_start && *prev_em_start != (u64)-1 &&
3052 *prev_em_start != em->orig_start)
3053 force_bio_submit = true;
3054
3055 if (prev_em_start)
3056 *prev_em_start = em->orig_start;
3057
3011 free_extent_map(em); 3058 free_extent_map(em);
3012 em = NULL; 3059 em = NULL;
3013 3060
@@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree,
3057 bdev, bio, pnr, 3104 bdev, bio, pnr,
3058 end_bio_extent_readpage, mirror_num, 3105 end_bio_extent_readpage, mirror_num,
3059 *bio_flags, 3106 *bio_flags,
3060 this_bio_flag); 3107 this_bio_flag,
3108 force_bio_submit);
3061 if (!ret) { 3109 if (!ret) {
3062 nr++; 3110 nr++;
3063 *bio_flags = this_bio_flag; 3111 *bio_flags = this_bio_flag;
@@ -3084,7 +3132,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3084 get_extent_t *get_extent, 3132 get_extent_t *get_extent,
3085 struct extent_map **em_cached, 3133 struct extent_map **em_cached,
3086 struct bio **bio, int mirror_num, 3134 struct bio **bio, int mirror_num,
3087 unsigned long *bio_flags, int rw) 3135 unsigned long *bio_flags, int rw,
3136 u64 *prev_em_start)
3088{ 3137{
3089 struct inode *inode; 3138 struct inode *inode;
3090 struct btrfs_ordered_extent *ordered; 3139 struct btrfs_ordered_extent *ordered;
@@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3104 3153
3105 for (index = 0; index < nr_pages; index++) { 3154 for (index = 0; index < nr_pages; index++) {
3106 __do_readpage(tree, pages[index], get_extent, em_cached, bio, 3155 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
3107 mirror_num, bio_flags, rw); 3156 mirror_num, bio_flags, rw, prev_em_start);
3108 page_cache_release(pages[index]); 3157 page_cache_release(pages[index]);
3109 } 3158 }
3110} 3159}
@@ -3114,7 +3163,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
3114 int nr_pages, get_extent_t *get_extent, 3163 int nr_pages, get_extent_t *get_extent,
3115 struct extent_map **em_cached, 3164 struct extent_map **em_cached,
3116 struct bio **bio, int mirror_num, 3165 struct bio **bio, int mirror_num,
3117 unsigned long *bio_flags, int rw) 3166 unsigned long *bio_flags, int rw,
3167 u64 *prev_em_start)
3118{ 3168{
3119 u64 start = 0; 3169 u64 start = 0;
3120 u64 end = 0; 3170 u64 end = 0;
@@ -3135,7 +3185,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
3135 index - first_index, start, 3185 index - first_index, start,
3136 end, get_extent, em_cached, 3186 end, get_extent, em_cached,
3137 bio, mirror_num, bio_flags, 3187 bio, mirror_num, bio_flags,
3138 rw); 3188 rw, prev_em_start);
3139 start = page_start; 3189 start = page_start;
3140 end = start + PAGE_CACHE_SIZE - 1; 3190 end = start + PAGE_CACHE_SIZE - 1;
3141 first_index = index; 3191 first_index = index;
@@ -3146,7 +3196,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
3146 __do_contiguous_readpages(tree, &pages[first_index], 3196 __do_contiguous_readpages(tree, &pages[first_index],
3147 index - first_index, start, 3197 index - first_index, start,
3148 end, get_extent, em_cached, bio, 3198 end, get_extent, em_cached, bio,
3149 mirror_num, bio_flags, rw); 3199 mirror_num, bio_flags, rw,
3200 prev_em_start);
3150} 3201}
3151 3202
3152static int __extent_read_full_page(struct extent_io_tree *tree, 3203static int __extent_read_full_page(struct extent_io_tree *tree,
@@ -3172,7 +3223,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
3172 } 3223 }
3173 3224
3174 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, 3225 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3175 bio_flags, rw); 3226 bio_flags, rw, NULL);
3176 return ret; 3227 return ret;
3177} 3228}
3178 3229
@@ -3198,7 +3249,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3198 int ret; 3249 int ret;
3199 3250
3200 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, 3251 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3201 &bio_flags, READ); 3252 &bio_flags, READ, NULL);
3202 if (bio) 3253 if (bio)
3203 ret = submit_one_bio(READ, bio, mirror_num, bio_flags); 3254 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3204 return ret; 3255 return ret;
@@ -3451,7 +3502,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3451 sector, iosize, pg_offset, 3502 sector, iosize, pg_offset,
3452 bdev, &epd->bio, max_nr, 3503 bdev, &epd->bio, max_nr,
3453 end_bio_extent_writepage, 3504 end_bio_extent_writepage,
3454 0, 0, 0); 3505 0, 0, 0, false);
3455 if (ret) 3506 if (ret)
3456 SetPageError(page); 3507 SetPageError(page);
3457 } 3508 }
@@ -3754,7 +3805,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3754 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, 3805 ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
3755 PAGE_CACHE_SIZE, 0, bdev, &epd->bio, 3806 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3756 -1, end_bio_extent_buffer_writepage, 3807 -1, end_bio_extent_buffer_writepage,
3757 0, epd->bio_flags, bio_flags); 3808 0, epd->bio_flags, bio_flags, false);
3758 epd->bio_flags = bio_flags; 3809 epd->bio_flags = bio_flags;
3759 if (ret) { 3810 if (ret) {
3760 set_btree_ioerr(p); 3811 set_btree_ioerr(p);
@@ -4158,6 +4209,7 @@ int extent_readpages(struct extent_io_tree *tree,
4158 struct page *page; 4209 struct page *page;
4159 struct extent_map *em_cached = NULL; 4210 struct extent_map *em_cached = NULL;
4160 int nr = 0; 4211 int nr = 0;
4212 u64 prev_em_start = (u64)-1;
4161 4213
4162 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 4214 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4163 page = list_entry(pages->prev, struct page, lru); 4215 page = list_entry(pages->prev, struct page, lru);
@@ -4174,12 +4226,12 @@ int extent_readpages(struct extent_io_tree *tree,
4174 if (nr < ARRAY_SIZE(pagepool)) 4226 if (nr < ARRAY_SIZE(pagepool))
4175 continue; 4227 continue;
4176 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, 4228 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4177 &bio, 0, &bio_flags, READ); 4229 &bio, 0, &bio_flags, READ, &prev_em_start);
4178 nr = 0; 4230 nr = 0;
4179 } 4231 }
4180 if (nr) 4232 if (nr)
4181 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, 4233 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
4182 &bio, 0, &bio_flags, READ); 4234 &bio, 0, &bio_flags, READ, &prev_em_start);
4183 4235
4184 if (em_cached) 4236 if (em_cached)
4185 free_extent_map(em_cached); 4237 free_extent_map(em_cached);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0fa7253a2d7..611b66d73e80 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode)
5084 goto no_delete; 5084 goto no_delete;
5085 } 5085 }
5086 /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ 5086 /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
5087 btrfs_wait_ordered_range(inode, 0, (u64)-1); 5087 if (!special_file(inode->i_mode))
5088 btrfs_wait_ordered_range(inode, 0, (u64)-1);
5088 5089
5089 btrfs_free_io_failure_record(inode, 0, (u64)-1); 5090 btrfs_free_io_failure_record(inode, 0, (u64)-1);
5090 5091
@@ -7408,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
7408 return em; 7409 return em;
7409} 7410}
7410 7411
7412struct btrfs_dio_data {
7413 u64 outstanding_extents;
7414 u64 reserve;
7415};
7411 7416
7412static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 7417static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7413 struct buffer_head *bh_result, int create) 7418 struct buffer_head *bh_result, int create)
@@ -7415,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7415 struct extent_map *em; 7420 struct extent_map *em;
7416 struct btrfs_root *root = BTRFS_I(inode)->root; 7421 struct btrfs_root *root = BTRFS_I(inode)->root;
7417 struct extent_state *cached_state = NULL; 7422 struct extent_state *cached_state = NULL;
7423 struct btrfs_dio_data *dio_data = NULL;
7418 u64 start = iblock << inode->i_blkbits; 7424 u64 start = iblock << inode->i_blkbits;
7419 u64 lockstart, lockend; 7425 u64 lockstart, lockend;
7420 u64 len = bh_result->b_size; 7426 u64 len = bh_result->b_size;
7421 u64 *outstanding_extents = NULL;
7422 int unlock_bits = EXTENT_LOCKED; 7427 int unlock_bits = EXTENT_LOCKED;
7423 int ret = 0; 7428 int ret = 0;
7424 7429
@@ -7436,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7436 * that anything that needs to check if there's a transction doesn't get 7441 * that anything that needs to check if there's a transction doesn't get
7437 * confused. 7442 * confused.
7438 */ 7443 */
7439 outstanding_extents = current->journal_info; 7444 dio_data = current->journal_info;
7440 current->journal_info = NULL; 7445 current->journal_info = NULL;
7441 } 7446 }
7442 7447
@@ -7568,17 +7573,18 @@ unlock:
7568 * within our reservation, otherwise we need to adjust our inode 7573 * within our reservation, otherwise we need to adjust our inode
7569 * counter appropriately. 7574 * counter appropriately.
7570 */ 7575 */
7571 if (*outstanding_extents) { 7576 if (dio_data->outstanding_extents) {
7572 (*outstanding_extents)--; 7577 (dio_data->outstanding_extents)--;
7573 } else { 7578 } else {
7574 spin_lock(&BTRFS_I(inode)->lock); 7579 spin_lock(&BTRFS_I(inode)->lock);
7575 BTRFS_I(inode)->outstanding_extents++; 7580 BTRFS_I(inode)->outstanding_extents++;
7576 spin_unlock(&BTRFS_I(inode)->lock); 7581 spin_unlock(&BTRFS_I(inode)->lock);
7577 } 7582 }
7578 7583
7579 current->journal_info = outstanding_extents;
7580 btrfs_free_reserved_data_space(inode, len); 7584 btrfs_free_reserved_data_space(inode, len);
7581 set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); 7585 WARN_ON(dio_data->reserve < len);
7586 dio_data->reserve -= len;
7587 current->journal_info = dio_data;
7582 } 7588 }
7583 7589
7584 /* 7590 /*
@@ -7601,8 +7607,8 @@ unlock:
7601unlock_err: 7607unlock_err:
7602 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7608 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7603 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7609 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7604 if (outstanding_extents) 7610 if (dio_data)
7605 current->journal_info = outstanding_extents; 7611 current->journal_info = dio_data;
7606 return ret; 7612 return ret;
7607} 7613}
7608 7614
@@ -8329,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8329{ 8335{
8330 struct file *file = iocb->ki_filp; 8336 struct file *file = iocb->ki_filp;
8331 struct inode *inode = file->f_mapping->host; 8337 struct inode *inode = file->f_mapping->host;
8332 u64 outstanding_extents = 0; 8338 struct btrfs_root *root = BTRFS_I(inode)->root;
8339 struct btrfs_dio_data dio_data = { 0 };
8333 size_t count = 0; 8340 size_t count = 0;
8334 int flags = 0; 8341 int flags = 0;
8335 bool wakeup = true; 8342 bool wakeup = true;
@@ -8367,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8367 ret = btrfs_delalloc_reserve_space(inode, count); 8374 ret = btrfs_delalloc_reserve_space(inode, count);
8368 if (ret) 8375 if (ret)
8369 goto out; 8376 goto out;
8370 outstanding_extents = div64_u64(count + 8377 dio_data.outstanding_extents = div64_u64(count +
8371 BTRFS_MAX_EXTENT_SIZE - 1, 8378 BTRFS_MAX_EXTENT_SIZE - 1,
8372 BTRFS_MAX_EXTENT_SIZE); 8379 BTRFS_MAX_EXTENT_SIZE);
8373 8380
@@ -8376,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8376 * do the accounting properly if we go over the number we 8383 * do the accounting properly if we go over the number we
8377 * originally calculated. Abuse current->journal_info for this. 8384 * originally calculated. Abuse current->journal_info for this.
8378 */ 8385 */
8379 current->journal_info = &outstanding_extents; 8386 dio_data.reserve = round_up(count, root->sectorsize);
8387 current->journal_info = &dio_data;
8380 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 8388 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8381 &BTRFS_I(inode)->runtime_flags)) { 8389 &BTRFS_I(inode)->runtime_flags)) {
8382 inode_dio_end(inode); 8390 inode_dio_end(inode);
@@ -8391,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8391 if (iov_iter_rw(iter) == WRITE) { 8399 if (iov_iter_rw(iter) == WRITE) {
8392 current->journal_info = NULL; 8400 current->journal_info = NULL;
8393 if (ret < 0 && ret != -EIOCBQUEUED) { 8401 if (ret < 0 && ret != -EIOCBQUEUED) {
8394 /* 8402 if (dio_data.reserve)
8395 * If the error comes from submitting stage, 8403 btrfs_delalloc_release_space(inode,
8396 * btrfs_get_blocsk_direct() has free'd data space, 8404 dio_data.reserve);
8397 * and metadata space will be handled by
8398 * finish_ordered_fn, don't do that again to make
8399 * sure bytes_may_use is correct.
8400 */
8401 if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
8402 &BTRFS_I(inode)->runtime_flags))
8403 btrfs_delalloc_release_space(inode, count);
8404 } else if (ret >= 0 && (size_t)ret < count) 8405 } else if (ret >= 0 && (size_t)ret < count)
8405 btrfs_delalloc_release_space(inode, 8406 btrfs_delalloc_release_space(inode,
8406 count - (size_t)ret); 8407 count - (size_t)ret);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0adf5422fce9..3e3e6130637f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4639,6 +4639,11 @@ locked:
4639 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 4639 bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
4640 } 4640 }
4641 4641
4642 if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
4643 ret = -EINVAL;
4644 goto out_bargs;
4645 }
4646
4642do_balance: 4647do_balance:
4643 /* 4648 /*
4644 * Ownership of bctl and mutually_exclusive_operation_running 4649 * Ownership of bctl and mutually_exclusive_operation_running
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index aa72bfd28f7d..a739b825bdd3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1920,10 +1920,12 @@ static int did_overwrite_ref(struct send_ctx *sctx,
1920 /* 1920 /*
1921 * We know that it is or will be overwritten. Check this now. 1921 * We know that it is or will be overwritten. Check this now.
1922 * The current inode being processed might have been the one that caused 1922 * The current inode being processed might have been the one that caused
1923 * inode 'ino' to be orphanized, therefore ow_inode can actually be the 1923 * inode 'ino' to be orphanized, therefore check if ow_inode matches
1924 * same as sctx->send_progress. 1924 * the current inode being processed.
1925 */ 1925 */
1926 if (ow_inode <= sctx->send_progress) 1926 if ((ow_inode < sctx->send_progress) ||
1927 (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
1928 gen == sctx->cur_inode_gen))
1927 ret = 1; 1929 ret = 1;
1928 else 1930 else
1929 ret = 0; 1931 ret = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2b07b3581781..11d1eab9234d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1658 * groups on disk until we're mounted read-write again 1658 * groups on disk until we're mounted read-write again
1659 * unless we clean them up here. 1659 * unless we clean them up here.
1660 */ 1660 */
1661 mutex_lock(&root->fs_info->cleaner_mutex);
1662 btrfs_delete_unused_bgs(fs_info); 1661 btrfs_delete_unused_bgs(fs_info);
1663 mutex_unlock(&root->fs_info->cleaner_mutex);
1664 1662
1665 btrfs_dev_replace_suspend_for_unmount(fs_info); 1663 btrfs_dev_replace_suspend_for_unmount(fs_info);
1666 btrfs_scrub_cancel(fs_info); 1664 btrfs_scrub_cancel(fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8f259b3a66b3..a5b06442f0bf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
117 btrfs_unpin_free_ino(root); 117 btrfs_unpin_free_ino(root);
118 clear_btree_io_tree(&root->dirty_log_pages); 118 clear_btree_io_tree(&root->dirty_log_pages);
119 } 119 }
120
121 /* We can free old roots now. */
122 spin_lock(&trans->dropped_roots_lock);
123 while (!list_empty(&trans->dropped_roots)) {
124 root = list_first_entry(&trans->dropped_roots,
125 struct btrfs_root, root_list);
126 list_del_init(&root->root_list);
127 spin_unlock(&trans->dropped_roots_lock);
128 btrfs_drop_and_free_fs_root(fs_info, root);
129 spin_lock(&trans->dropped_roots_lock);
130 }
131 spin_unlock(&trans->dropped_roots_lock);
120 up_write(&fs_info->commit_root_sem); 132 up_write(&fs_info->commit_root_sem);
121} 133}
122 134
@@ -255,11 +267,13 @@ loop:
255 INIT_LIST_HEAD(&cur_trans->pending_ordered); 267 INIT_LIST_HEAD(&cur_trans->pending_ordered);
256 INIT_LIST_HEAD(&cur_trans->dirty_bgs); 268 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
257 INIT_LIST_HEAD(&cur_trans->io_bgs); 269 INIT_LIST_HEAD(&cur_trans->io_bgs);
270 INIT_LIST_HEAD(&cur_trans->dropped_roots);
258 mutex_init(&cur_trans->cache_write_mutex); 271 mutex_init(&cur_trans->cache_write_mutex);
259 cur_trans->num_dirty_bgs = 0; 272 cur_trans->num_dirty_bgs = 0;
260 spin_lock_init(&cur_trans->dirty_bgs_lock); 273 spin_lock_init(&cur_trans->dirty_bgs_lock);
261 INIT_LIST_HEAD(&cur_trans->deleted_bgs); 274 INIT_LIST_HEAD(&cur_trans->deleted_bgs);
262 spin_lock_init(&cur_trans->deleted_bgs_lock); 275 spin_lock_init(&cur_trans->deleted_bgs_lock);
276 spin_lock_init(&cur_trans->dropped_roots_lock);
263 list_add_tail(&cur_trans->list, &fs_info->trans_list); 277 list_add_tail(&cur_trans->list, &fs_info->trans_list);
264 extent_io_tree_init(&cur_trans->dirty_pages, 278 extent_io_tree_init(&cur_trans->dirty_pages,
265 fs_info->btree_inode->i_mapping); 279 fs_info->btree_inode->i_mapping);
@@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
336} 350}
337 351
338 352
353void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
354 struct btrfs_root *root)
355{
356 struct btrfs_transaction *cur_trans = trans->transaction;
357
358 /* Add ourselves to the transaction dropped list */
359 spin_lock(&cur_trans->dropped_roots_lock);
360 list_add_tail(&root->root_list, &cur_trans->dropped_roots);
361 spin_unlock(&cur_trans->dropped_roots_lock);
362
363 /* Make sure we don't try to update the root at commit time */
364 spin_lock(&root->fs_info->fs_roots_radix_lock);
365 radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
366 (unsigned long)root->root_key.objectid,
367 BTRFS_ROOT_TRANS_TAG);
368 spin_unlock(&root->fs_info->fs_roots_radix_lock);
369}
370
339int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 371int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
340 struct btrfs_root *root) 372 struct btrfs_root *root)
341{ 373{
@@ -525,6 +557,7 @@ again:
525 h->delayed_ref_elem.seq = 0; 557 h->delayed_ref_elem.seq = 0;
526 h->type = type; 558 h->type = type;
527 h->allocating_chunk = false; 559 h->allocating_chunk = false;
560 h->can_flush_pending_bgs = true;
528 h->reloc_reserved = false; 561 h->reloc_reserved = false;
529 h->sync = false; 562 h->sync = false;
530 INIT_LIST_HEAD(&h->qgroup_ref_list); 563 INIT_LIST_HEAD(&h->qgroup_ref_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index edc2fbc262d7..a994bb097ee5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -65,6 +65,7 @@ struct btrfs_transaction {
65 struct list_head switch_commits; 65 struct list_head switch_commits;
66 struct list_head dirty_bgs; 66 struct list_head dirty_bgs;
67 struct list_head io_bgs; 67 struct list_head io_bgs;
68 struct list_head dropped_roots;
68 u64 num_dirty_bgs; 69 u64 num_dirty_bgs;
69 70
70 /* 71 /*
@@ -76,6 +77,7 @@ struct btrfs_transaction {
76 spinlock_t dirty_bgs_lock; 77 spinlock_t dirty_bgs_lock;
77 struct list_head deleted_bgs; 78 struct list_head deleted_bgs;
78 spinlock_t deleted_bgs_lock; 79 spinlock_t deleted_bgs_lock;
80 spinlock_t dropped_roots_lock;
79 struct btrfs_delayed_ref_root delayed_refs; 81 struct btrfs_delayed_ref_root delayed_refs;
80 int aborted; 82 int aborted;
81 int dirty_bg_run; 83 int dirty_bg_run;
@@ -116,6 +118,7 @@ struct btrfs_trans_handle {
116 short aborted; 118 short aborted;
117 short adding_csums; 119 short adding_csums;
118 bool allocating_chunk; 120 bool allocating_chunk;
121 bool can_flush_pending_bgs;
119 bool reloc_reserved; 122 bool reloc_reserved;
120 bool sync; 123 bool sync;
121 unsigned int type; 124 unsigned int type;
@@ -216,5 +219,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info);
216int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 219int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
217void btrfs_put_transaction(struct btrfs_transaction *transaction); 220void btrfs_put_transaction(struct btrfs_transaction *transaction);
218void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); 221void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
219 222void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
223 struct btrfs_root *root);
220#endif 224#endif
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2ca784a14e84..595279a8b99f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -376,6 +376,14 @@ struct map_lookup {
376#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) 376#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
377#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) 377#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
378 378
379#define BTRFS_BALANCE_ARGS_MASK \
380 (BTRFS_BALANCE_ARGS_PROFILES | \
381 BTRFS_BALANCE_ARGS_USAGE | \
382 BTRFS_BALANCE_ARGS_DEVID | \
383 BTRFS_BALANCE_ARGS_DRANGE | \
384 BTRFS_BALANCE_ARGS_VRANGE | \
385 BTRFS_BALANCE_ARGS_LIMIT)
386
379/* 387/*
380 * Profile changing flags. When SOFT is set we won't relocate chunk if 388 * Profile changing flags. When SOFT is set we won't relocate chunk if
381 * it already has the target profile (even though it may be 389 * it already has the target profile (even though it may be
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index aa0dc2573374..afa09fce8151 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -444,6 +444,48 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
444 return 0; 444 return 0;
445} 445}
446 446
447/* Server has provided av pairs/target info in the type 2 challenge
448 * packet and we have plucked it and stored within smb session.
449 * We parse that blob here to find the server given timestamp
450 * as part of ntlmv2 authentication (or local current time as
451 * default in case of failure)
452 */
453static __le64
454find_timestamp(struct cifs_ses *ses)
455{
456 unsigned int attrsize;
457 unsigned int type;
458 unsigned int onesize = sizeof(struct ntlmssp2_name);
459 unsigned char *blobptr;
460 unsigned char *blobend;
461 struct ntlmssp2_name *attrptr;
462
463 if (!ses->auth_key.len || !ses->auth_key.response)
464 return 0;
465
466 blobptr = ses->auth_key.response;
467 blobend = blobptr + ses->auth_key.len;
468
469 while (blobptr + onesize < blobend) {
470 attrptr = (struct ntlmssp2_name *) blobptr;
471 type = le16_to_cpu(attrptr->type);
472 if (type == NTLMSSP_AV_EOL)
473 break;
474 blobptr += 2; /* advance attr type */
475 attrsize = le16_to_cpu(attrptr->length);
476 blobptr += 2; /* advance attr size */
477 if (blobptr + attrsize > blobend)
478 break;
479 if (type == NTLMSSP_AV_TIMESTAMP) {
480 if (attrsize == sizeof(u64))
481 return *((__le64 *)blobptr);
482 }
483 blobptr += attrsize; /* advance attr value */
484 }
485
486 return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
487}
488
447static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, 489static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
448 const struct nls_table *nls_cp) 490 const struct nls_table *nls_cp)
449{ 491{
@@ -641,6 +683,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
641 struct ntlmv2_resp *ntlmv2; 683 struct ntlmv2_resp *ntlmv2;
642 char ntlmv2_hash[16]; 684 char ntlmv2_hash[16];
643 unsigned char *tiblob = NULL; /* target info blob */ 685 unsigned char *tiblob = NULL; /* target info blob */
686 __le64 rsp_timestamp;
644 687
645 if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { 688 if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) {
646 if (!ses->domainName) { 689 if (!ses->domainName) {
@@ -659,6 +702,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
659 } 702 }
660 } 703 }
661 704
705 /* Must be within 5 minutes of the server (or in range +/-2h
706 * in case of Mac OS X), so simply carry over server timestamp
707 * (as Windows 7 does)
708 */
709 rsp_timestamp = find_timestamp(ses);
710
662 baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp); 711 baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
663 tilen = ses->auth_key.len; 712 tilen = ses->auth_key.len;
664 tiblob = ses->auth_key.response; 713 tiblob = ses->auth_key.response;
@@ -675,8 +724,8 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
675 (ses->auth_key.response + CIFS_SESS_KEY_SIZE); 724 (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
676 ntlmv2->blob_signature = cpu_to_le32(0x00000101); 725 ntlmv2->blob_signature = cpu_to_le32(0x00000101);
677 ntlmv2->reserved = 0; 726 ntlmv2->reserved = 0;
678 /* Must be within 5 minutes of the server */ 727 ntlmv2->time = rsp_timestamp;
679 ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 728
680 get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); 729 get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal));
681 ntlmv2->reserved2 = 0; 730 ntlmv2->reserved2 = 0;
682 731
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6a1119e87fbb..e739950ca084 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -325,8 +325,11 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
325static void 325static void
326cifs_show_security(struct seq_file *s, struct cifs_ses *ses) 326cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
327{ 327{
328 if (ses->sectype == Unspecified) 328 if (ses->sectype == Unspecified) {
329 if (ses->user_name == NULL)
330 seq_puts(s, ",sec=none");
329 return; 331 return;
332 }
330 333
331 seq_puts(s, ",sec="); 334 seq_puts(s, ",sec=");
332 335
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 27aea110e923..c3cc1609025f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
136extern const struct export_operations cifs_export_ops; 136extern const struct export_operations cifs_export_ops;
137#endif /* CONFIG_CIFS_NFSD_EXPORT */ 137#endif /* CONFIG_CIFS_NFSD_EXPORT */
138 138
139#define CIFS_VERSION "2.07" 139#define CIFS_VERSION "2.08"
140#endif /* _CIFSFS_H */ 140#endif /* _CIFSFS_H */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e2a6af1508af..62203c387db4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3380,6 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3380 struct page *page, *tpage; 3380 struct page *page, *tpage;
3381 unsigned int expected_index; 3381 unsigned int expected_index;
3382 int rc; 3382 int rc;
3383 gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
3383 3384
3384 INIT_LIST_HEAD(tmplist); 3385 INIT_LIST_HEAD(tmplist);
3385 3386
@@ -3392,7 +3393,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3392 */ 3393 */
3393 __set_page_locked(page); 3394 __set_page_locked(page);
3394 rc = add_to_page_cache_locked(page, mapping, 3395 rc = add_to_page_cache_locked(page, mapping,
3395 page->index, GFP_KERNEL); 3396 page->index, gfp);
3396 3397
3397 /* give up if we can't stick it in the cache */ 3398 /* give up if we can't stick it in the cache */
3398 if (rc) { 3399 if (rc) {
@@ -3418,8 +3419,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3418 break; 3419 break;
3419 3420
3420 __set_page_locked(page); 3421 __set_page_locked(page);
3421 if (add_to_page_cache_locked(page, mapping, page->index, 3422 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3422 GFP_KERNEL)) {
3423 __clear_page_locked(page); 3423 __clear_page_locked(page);
3424 break; 3424 break;
3425 } 3425 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f621b44cb800..6b66dd5d1540 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2034,7 +2034,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
2034 struct tcon_link *tlink = NULL; 2034 struct tcon_link *tlink = NULL;
2035 struct cifs_tcon *tcon = NULL; 2035 struct cifs_tcon *tcon = NULL;
2036 struct TCP_Server_Info *server; 2036 struct TCP_Server_Info *server;
2037 struct cifs_io_parms io_parms;
2038 2037
2039 /* 2038 /*
2040 * To avoid spurious oplock breaks from server, in the case of 2039 * To avoid spurious oplock breaks from server, in the case of
@@ -2056,18 +2055,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
2056 rc = -ENOSYS; 2055 rc = -ENOSYS;
2057 cifsFileInfo_put(open_file); 2056 cifsFileInfo_put(open_file);
2058 cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc); 2057 cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc);
2059 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
2060 unsigned int bytes_written;
2061
2062 io_parms.netfid = open_file->fid.netfid;
2063 io_parms.pid = open_file->pid;
2064 io_parms.tcon = tcon;
2065 io_parms.offset = 0;
2066 io_parms.length = attrs->ia_size;
2067 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
2068 NULL, NULL, 1);
2069 cifs_dbg(FYI, "Wrt seteof rc %d\n", rc);
2070 }
2071 } else 2058 } else
2072 rc = -EINVAL; 2059 rc = -EINVAL;
2073 2060
@@ -2093,28 +2080,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
2093 else 2080 else
2094 rc = -ENOSYS; 2081 rc = -ENOSYS;
2095 cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc); 2082 cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc);
2096 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
2097 __u16 netfid;
2098 int oplock = 0;
2099 2083
2100 rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN,
2101 GENERIC_WRITE, CREATE_NOT_DIR, &netfid,
2102 &oplock, NULL, cifs_sb->local_nls,
2103 cifs_remap(cifs_sb));
2104 if (rc == 0) {
2105 unsigned int bytes_written;
2106
2107 io_parms.netfid = netfid;
2108 io_parms.pid = current->tgid;
2109 io_parms.tcon = tcon;
2110 io_parms.offset = 0;
2111 io_parms.length = attrs->ia_size;
2112 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL,
2113 NULL, 1);
2114 cifs_dbg(FYI, "wrt seteof rc %d\n", rc);
2115 CIFSSMBClose(xid, tcon, netfid);
2116 }
2117 }
2118 if (tlink) 2084 if (tlink)
2119 cifs_put_tlink(tlink); 2085 cifs_put_tlink(tlink);
2120 2086
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index c63f5227b681..28a77bf1d559 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
67 goto out_drop_write; 67 goto out_drop_write;
68 } 68 }
69 69
70 if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
71 rc = -EBADF;
72 cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
73 goto out_fput;
74 }
75
70 if ((!src_file.file->private_data) || (!dst_file->private_data)) { 76 if ((!src_file.file->private_data) || (!dst_file->private_data)) {
71 rc = -EBADF; 77 rc = -EBADF;
72 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); 78 cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index df91bcf56d67..18da19f4f811 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -50,9 +50,13 @@ change_conf(struct TCP_Server_Info *server)
50 break; 50 break;
51 default: 51 default:
52 server->echoes = true; 52 server->echoes = true;
53 server->oplocks = true; 53 if (enable_oplocks) {
54 server->oplocks = true;
55 server->oplock_credits = 1;
56 } else
57 server->oplocks = false;
58
54 server->echo_credits = 1; 59 server->echo_credits = 1;
55 server->oplock_credits = 1;
56 } 60 }
57 server->credits -= server->echo_credits + server->oplock_credits; 61 server->credits -= server->echo_credits + server->oplock_credits;
58 return 0; 62 return 0;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 070fb2ad85ce..597a417ba94d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -46,6 +46,7 @@
46#include "smb2status.h" 46#include "smb2status.h"
47#include "smb2glob.h" 47#include "smb2glob.h"
48#include "cifspdu.h" 48#include "cifspdu.h"
49#include "cifs_spnego.h"
49 50
50/* 51/*
51 * The following table defines the expected "StructureSize" of SMB2 requests 52 * The following table defines the expected "StructureSize" of SMB2 requests
@@ -486,19 +487,15 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
486 cifs_dbg(FYI, "missing security blob on negprot\n"); 487 cifs_dbg(FYI, "missing security blob on negprot\n");
487 488
488 rc = cifs_enable_signing(server, ses->sign); 489 rc = cifs_enable_signing(server, ses->sign);
489#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */
490 if (rc) 490 if (rc)
491 goto neg_exit; 491 goto neg_exit;
492 if (blob_length) 492 if (blob_length) {
493 rc = decode_negTokenInit(security_blob, blob_length, server); 493 rc = decode_negTokenInit(security_blob, blob_length, server);
494 if (rc == 1) 494 if (rc == 1)
495 rc = 0; 495 rc = 0;
496 else if (rc == 0) { 496 else if (rc == 0)
497 rc = -EIO; 497 rc = -EIO;
498 goto neg_exit;
499 } 498 }
500#endif
501
502neg_exit: 499neg_exit:
503 free_rsp_buf(resp_buftype, rsp); 500 free_rsp_buf(resp_buftype, rsp);
504 return rc; 501 return rc;
@@ -592,7 +589,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
592 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ 589 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
593 struct TCP_Server_Info *server = ses->server; 590 struct TCP_Server_Info *server = ses->server;
594 u16 blob_length = 0; 591 u16 blob_length = 0;
595 char *security_blob; 592 struct key *spnego_key = NULL;
593 char *security_blob = NULL;
596 char *ntlmssp_blob = NULL; 594 char *ntlmssp_blob = NULL;
597 bool use_spnego = false; /* else use raw ntlmssp */ 595 bool use_spnego = false; /* else use raw ntlmssp */
598 596
@@ -620,7 +618,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
620 ses->ntlmssp->sesskey_per_smbsess = true; 618 ses->ntlmssp->sesskey_per_smbsess = true;
621 619
622 /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */ 620 /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
623 ses->sectype = RawNTLMSSP; 621 if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
622 ses->sectype = RawNTLMSSP;
624 623
625ssetup_ntlmssp_authenticate: 624ssetup_ntlmssp_authenticate:
626 if (phase == NtLmChallenge) 625 if (phase == NtLmChallenge)
@@ -649,7 +648,48 @@ ssetup_ntlmssp_authenticate:
649 iov[0].iov_base = (char *)req; 648 iov[0].iov_base = (char *)req;
650 /* 4 for rfc1002 length field and 1 for pad */ 649 /* 4 for rfc1002 length field and 1 for pad */
651 iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; 650 iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
652 if (phase == NtLmNegotiate) { 651
652 if (ses->sectype == Kerberos) {
653#ifdef CONFIG_CIFS_UPCALL
654 struct cifs_spnego_msg *msg;
655
656 spnego_key = cifs_get_spnego_key(ses);
657 if (IS_ERR(spnego_key)) {
658 rc = PTR_ERR(spnego_key);
659 spnego_key = NULL;
660 goto ssetup_exit;
661 }
662
663 msg = spnego_key->payload.data;
664 /*
665 * check version field to make sure that cifs.upcall is
666 * sending us a response in an expected form
667 */
668 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
669 cifs_dbg(VFS,
670 "bad cifs.upcall version. Expected %d got %d",
671 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
672 rc = -EKEYREJECTED;
673 goto ssetup_exit;
674 }
675 ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
676 GFP_KERNEL);
677 if (!ses->auth_key.response) {
678 cifs_dbg(VFS,
679 "Kerberos can't allocate (%u bytes) memory",
680 msg->sesskey_len);
681 rc = -ENOMEM;
682 goto ssetup_exit;
683 }
684 ses->auth_key.len = msg->sesskey_len;
685 blob_length = msg->secblob_len;
686 iov[1].iov_base = msg->data + msg->sesskey_len;
687 iov[1].iov_len = blob_length;
688#else
689 rc = -EOPNOTSUPP;
690 goto ssetup_exit;
691#endif /* CONFIG_CIFS_UPCALL */
692 } else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */
653 ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE), 693 ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
654 GFP_KERNEL); 694 GFP_KERNEL);
655 if (ntlmssp_blob == NULL) { 695 if (ntlmssp_blob == NULL) {
@@ -672,6 +712,8 @@ ssetup_ntlmssp_authenticate:
672 /* with raw NTLMSSP we don't encapsulate in SPNEGO */ 712 /* with raw NTLMSSP we don't encapsulate in SPNEGO */
673 security_blob = ntlmssp_blob; 713 security_blob = ntlmssp_blob;
674 } 714 }
715 iov[1].iov_base = security_blob;
716 iov[1].iov_len = blob_length;
675 } else if (phase == NtLmAuthenticate) { 717 } else if (phase == NtLmAuthenticate) {
676 req->hdr.SessionId = ses->Suid; 718 req->hdr.SessionId = ses->Suid;
677 ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500, 719 ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
@@ -699,6 +741,8 @@ ssetup_ntlmssp_authenticate:
699 } else { 741 } else {
700 security_blob = ntlmssp_blob; 742 security_blob = ntlmssp_blob;
701 } 743 }
744 iov[1].iov_base = security_blob;
745 iov[1].iov_len = blob_length;
702 } else { 746 } else {
703 cifs_dbg(VFS, "illegal ntlmssp phase\n"); 747 cifs_dbg(VFS, "illegal ntlmssp phase\n");
704 rc = -EIO; 748 rc = -EIO;
@@ -710,8 +754,6 @@ ssetup_ntlmssp_authenticate:
710 cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 754 cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
711 1 /* pad */ - 4 /* rfc1001 len */); 755 1 /* pad */ - 4 /* rfc1001 len */);
712 req->SecurityBufferLength = cpu_to_le16(blob_length); 756 req->SecurityBufferLength = cpu_to_le16(blob_length);
713 iov[1].iov_base = security_blob;
714 iov[1].iov_len = blob_length;
715 757
716 inc_rfc1001_len(req, blob_length - 1 /* pad */); 758 inc_rfc1001_len(req, blob_length - 1 /* pad */);
717 759
@@ -722,6 +764,7 @@ ssetup_ntlmssp_authenticate:
722 764
723 kfree(security_blob); 765 kfree(security_blob);
724 rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; 766 rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
767 ses->Suid = rsp->hdr.SessionId;
725 if (resp_buftype != CIFS_NO_BUFFER && 768 if (resp_buftype != CIFS_NO_BUFFER &&
726 rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { 769 rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
727 if (phase != NtLmNegotiate) { 770 if (phase != NtLmNegotiate) {
@@ -739,7 +782,6 @@ ssetup_ntlmssp_authenticate:
739 /* NTLMSSP Negotiate sent now processing challenge (response) */ 782 /* NTLMSSP Negotiate sent now processing challenge (response) */
740 phase = NtLmChallenge; /* process ntlmssp challenge */ 783 phase = NtLmChallenge; /* process ntlmssp challenge */
741 rc = 0; /* MORE_PROCESSING is not an error here but expected */ 784 rc = 0; /* MORE_PROCESSING is not an error here but expected */
742 ses->Suid = rsp->hdr.SessionId;
743 rc = decode_ntlmssp_challenge(rsp->Buffer, 785 rc = decode_ntlmssp_challenge(rsp->Buffer,
744 le16_to_cpu(rsp->SecurityBufferLength), ses); 786 le16_to_cpu(rsp->SecurityBufferLength), ses);
745 } 787 }
@@ -796,6 +838,10 @@ keygen_exit:
796 kfree(ses->auth_key.response); 838 kfree(ses->auth_key.response);
797 ses->auth_key.response = NULL; 839 ses->auth_key.response = NULL;
798 } 840 }
841 if (spnego_key) {
842 key_invalidate(spnego_key);
843 key_put(spnego_key);
844 }
799 kfree(ses->ntlmssp); 845 kfree(ses->ntlmssp);
800 846
801 return rc; 847 return rc;
@@ -876,6 +922,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
876 if (tcon && tcon->bad_network_name) 922 if (tcon && tcon->bad_network_name)
877 return -ENOENT; 923 return -ENOENT;
878 924
925 if ((tcon && tcon->seal) &&
926 ((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) {
927 cifs_dbg(VFS, "encryption requested but no server support");
928 return -EOPNOTSUPP;
929 }
930
879 unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL); 931 unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL);
880 if (unc_path == NULL) 932 if (unc_path == NULL)
881 return -ENOMEM; 933 return -ENOMEM;
@@ -955,6 +1007,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
955 ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) 1007 ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
956 cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); 1008 cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
957 init_copy_chunk_defaults(tcon); 1009 init_copy_chunk_defaults(tcon);
1010 if (tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA)
1011 cifs_dbg(VFS, "Encrypted shares not supported");
958 if (tcon->ses->server->ops->validate_negotiate) 1012 if (tcon->ses->server->ops->validate_negotiate)
959 rc = tcon->ses->server->ops->validate_negotiate(xid, tcon); 1013 rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
960tcon_exit: 1014tcon_exit:
diff --git a/fs/dax.c b/fs/dax.c
index 93bf2f990ace..a86d3cc2b389 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -119,7 +119,8 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
119 size_t len; 119 size_t len;
120 if (pos == max) { 120 if (pos == max) {
121 unsigned blkbits = inode->i_blkbits; 121 unsigned blkbits = inode->i_blkbits;
122 sector_t block = pos >> blkbits; 122 long page = pos >> PAGE_SHIFT;
123 sector_t block = page << (PAGE_SHIFT - blkbits);
123 unsigned first = pos - (block << blkbits); 124 unsigned first = pos - (block << blkbits);
124 long size; 125 long size;
125 126
@@ -284,6 +285,7 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh,
284static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, 285static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
285 struct vm_area_struct *vma, struct vm_fault *vmf) 286 struct vm_area_struct *vma, struct vm_fault *vmf)
286{ 287{
288 struct address_space *mapping = inode->i_mapping;
287 sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); 289 sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
288 unsigned long vaddr = (unsigned long)vmf->virtual_address; 290 unsigned long vaddr = (unsigned long)vmf->virtual_address;
289 void __pmem *addr; 291 void __pmem *addr;
@@ -291,6 +293,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
291 pgoff_t size; 293 pgoff_t size;
292 int error; 294 int error;
293 295
296 i_mmap_lock_read(mapping);
297
294 /* 298 /*
295 * Check truncate didn't happen while we were allocating a block. 299 * Check truncate didn't happen while we were allocating a block.
296 * If it did, this block may or may not be still allocated to the 300 * If it did, this block may or may not be still allocated to the
@@ -320,6 +324,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
320 error = vm_insert_mixed(vma, vaddr, pfn); 324 error = vm_insert_mixed(vma, vaddr, pfn);
321 325
322 out: 326 out:
327 i_mmap_unlock_read(mapping);
328
323 return error; 329 return error;
324} 330}
325 331
@@ -381,17 +387,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
381 * from a read fault and we've raced with a truncate 387 * from a read fault and we've raced with a truncate
382 */ 388 */
383 error = -EIO; 389 error = -EIO;
384 goto unlock; 390 goto unlock_page;
385 } 391 }
386 } else {
387 i_mmap_lock_write(mapping);
388 } 392 }
389 393
390 error = get_block(inode, block, &bh, 0); 394 error = get_block(inode, block, &bh, 0);
391 if (!error && (bh.b_size < PAGE_SIZE)) 395 if (!error && (bh.b_size < PAGE_SIZE))
392 error = -EIO; /* fs corruption? */ 396 error = -EIO; /* fs corruption? */
393 if (error) 397 if (error)
394 goto unlock; 398 goto unlock_page;
395 399
396 if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) { 400 if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
397 if (vmf->flags & FAULT_FLAG_WRITE) { 401 if (vmf->flags & FAULT_FLAG_WRITE) {
@@ -402,9 +406,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
402 if (!error && (bh.b_size < PAGE_SIZE)) 406 if (!error && (bh.b_size < PAGE_SIZE))
403 error = -EIO; 407 error = -EIO;
404 if (error) 408 if (error)
405 goto unlock; 409 goto unlock_page;
406 } else { 410 } else {
407 i_mmap_unlock_write(mapping);
408 return dax_load_hole(mapping, page, vmf); 411 return dax_load_hole(mapping, page, vmf);
409 } 412 }
410 } 413 }
@@ -416,15 +419,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
416 else 419 else
417 clear_user_highpage(new_page, vaddr); 420 clear_user_highpage(new_page, vaddr);
418 if (error) 421 if (error)
419 goto unlock; 422 goto unlock_page;
420 vmf->page = page; 423 vmf->page = page;
421 if (!page) { 424 if (!page) {
425 i_mmap_lock_read(mapping);
422 /* Check we didn't race with truncate */ 426 /* Check we didn't race with truncate */
423 size = (i_size_read(inode) + PAGE_SIZE - 1) >> 427 size = (i_size_read(inode) + PAGE_SIZE - 1) >>
424 PAGE_SHIFT; 428 PAGE_SHIFT;
425 if (vmf->pgoff >= size) { 429 if (vmf->pgoff >= size) {
430 i_mmap_unlock_read(mapping);
426 error = -EIO; 431 error = -EIO;
427 goto unlock; 432 goto out;
428 } 433 }
429 } 434 }
430 return VM_FAULT_LOCKED; 435 return VM_FAULT_LOCKED;
@@ -460,8 +465,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
460 WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); 465 WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
461 } 466 }
462 467
463 if (!page)
464 i_mmap_unlock_write(mapping);
465 out: 468 out:
466 if (error == -ENOMEM) 469 if (error == -ENOMEM)
467 return VM_FAULT_OOM | major; 470 return VM_FAULT_OOM | major;
@@ -470,14 +473,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
470 return VM_FAULT_SIGBUS | major; 473 return VM_FAULT_SIGBUS | major;
471 return VM_FAULT_NOPAGE | major; 474 return VM_FAULT_NOPAGE | major;
472 475
473 unlock: 476 unlock_page:
474 if (page) { 477 if (page) {
475 unlock_page(page); 478 unlock_page(page);
476 page_cache_release(page); 479 page_cache_release(page);
477 } else {
478 i_mmap_unlock_write(mapping);
479 } 480 }
480
481 goto out; 481 goto out;
482} 482}
483EXPORT_SYMBOL(__dax_fault); 483EXPORT_SYMBOL(__dax_fault);
@@ -555,10 +555,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
555 block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); 555 block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
556 556
557 bh.b_size = PMD_SIZE; 557 bh.b_size = PMD_SIZE;
558 i_mmap_lock_write(mapping);
559 length = get_block(inode, block, &bh, write); 558 length = get_block(inode, block, &bh, write);
560 if (length) 559 if (length)
561 return VM_FAULT_SIGBUS; 560 return VM_FAULT_SIGBUS;
561 i_mmap_lock_read(mapping);
562 562
563 /* 563 /*
564 * If the filesystem isn't willing to tell us the length of a hole, 564 * If the filesystem isn't willing to tell us the length of a hole,
@@ -568,24 +568,14 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
568 if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) 568 if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
569 goto fallback; 569 goto fallback;
570 570
571 if (buffer_unwritten(&bh) || buffer_new(&bh)) {
572 int i;
573 for (i = 0; i < PTRS_PER_PMD; i++)
574 clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
575 wmb_pmem();
576 count_vm_event(PGMAJFAULT);
577 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
578 result |= VM_FAULT_MAJOR;
579 }
580
581 /* 571 /*
582 * If we allocated new storage, make sure no process has any 572 * If we allocated new storage, make sure no process has any
583 * zero pages covering this hole 573 * zero pages covering this hole
584 */ 574 */
585 if (buffer_new(&bh)) { 575 if (buffer_new(&bh)) {
586 i_mmap_unlock_write(mapping); 576 i_mmap_unlock_read(mapping);
587 unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0); 577 unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
588 i_mmap_lock_write(mapping); 578 i_mmap_lock_read(mapping);
589 } 579 }
590 580
591 /* 581 /*
@@ -632,15 +622,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
632 if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) 622 if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
633 goto fallback; 623 goto fallback;
634 624
625 if (buffer_unwritten(&bh) || buffer_new(&bh)) {
626 int i;
627 for (i = 0; i < PTRS_PER_PMD; i++)
628 clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
629 wmb_pmem();
630 count_vm_event(PGMAJFAULT);
631 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
632 result |= VM_FAULT_MAJOR;
633 }
634
635 result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write); 635 result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
636 } 636 }
637 637
638 out: 638 out:
639 i_mmap_unlock_read(mapping);
640
639 if (buffer_unwritten(&bh)) 641 if (buffer_unwritten(&bh))
640 complete_unwritten(&bh, !(result & VM_FAULT_ERROR)); 642 complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
641 643
642 i_mmap_unlock_write(mapping);
643
644 return result; 644 return result;
645 645
646 fallback: 646 fallback:
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 47728da7702c..b46e9fc64196 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -63,7 +63,7 @@ config EXT4_FS
63 If unsure, say N. 63 If unsure, say N.
64 64
65config EXT4_USE_FOR_EXT2 65config EXT4_USE_FOR_EXT2
66 bool "Use ext4 for ext2/ext3 file systems" 66 bool "Use ext4 for ext2 file systems"
67 depends on EXT4_FS 67 depends on EXT4_FS
68 depends on EXT2_FS=n 68 depends on EXT2_FS=n
69 default y 69 default y
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index e26803fb210d..560af0437704 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -165,8 +165,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
165 if (pages) { 165 if (pages) {
166 page = list_entry(pages->prev, struct page, lru); 166 page = list_entry(pages->prev, struct page, lru);
167 list_del(&page->lru); 167 list_del(&page->lru);
168 if (add_to_page_cache_lru(page, mapping, 168 if (add_to_page_cache_lru(page, mapping, page->index,
169 page->index, GFP_KERNEL)) 169 GFP_KERNEL & mapping_gfp_mask(mapping)))
170 goto next_page; 170 goto next_page;
171 } 171 }
172 172
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 587ac08eabb6..091a36444972 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1481,6 +1481,21 @@ static long writeback_sb_inodes(struct super_block *sb,
1481 wbc_detach_inode(&wbc); 1481 wbc_detach_inode(&wbc);
1482 work->nr_pages -= write_chunk - wbc.nr_to_write; 1482 work->nr_pages -= write_chunk - wbc.nr_to_write;
1483 wrote += write_chunk - wbc.nr_to_write; 1483 wrote += write_chunk - wbc.nr_to_write;
1484
1485 if (need_resched()) {
1486 /*
1487 * We're trying to balance between building up a nice
1488 * long list of IOs to improve our merge rate, and
1489 * getting those IOs out quickly for anyone throttling
1490 * in balance_dirty_pages(). cond_resched() doesn't
1491 * unplug, so get our IOs out the door before we
1492 * give up the CPU.
1493 */
1494 blk_flush_plug(current);
1495 cond_resched();
1496 }
1497
1498
1484 spin_lock(&wb->list_lock); 1499 spin_lock(&wb->list_lock);
1485 spin_lock(&inode->i_lock); 1500 spin_lock(&inode->i_lock);
1486 if (!(inode->i_state & I_DIRTY_ALL)) 1501 if (!(inode->i_state & I_DIRTY_ALL))
@@ -1488,7 +1503,7 @@ static long writeback_sb_inodes(struct super_block *sb,
1488 requeue_inode(inode, wb, &wbc); 1503 requeue_inode(inode, wb, &wbc);
1489 inode_sync_complete(inode); 1504 inode_sync_complete(inode);
1490 spin_unlock(&inode->i_lock); 1505 spin_unlock(&inode->i_lock);
1491 cond_resched_lock(&wb->list_lock); 1506
1492 /* 1507 /*
1493 * bail out to wb_writeback() often enough to check 1508 * bail out to wb_writeback() often enough to check
1494 * background threshold and other termination conditions. 1509 * background threshold and other termination conditions.
diff --git a/fs/mpage.c b/fs/mpage.c
index 778a4ddef77a..a7c34274f207 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -139,7 +139,8 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
139static struct bio * 139static struct bio *
140do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, 140do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
141 sector_t *last_block_in_bio, struct buffer_head *map_bh, 141 sector_t *last_block_in_bio, struct buffer_head *map_bh,
142 unsigned long *first_logical_block, get_block_t get_block) 142 unsigned long *first_logical_block, get_block_t get_block,
143 gfp_t gfp)
143{ 144{
144 struct inode *inode = page->mapping->host; 145 struct inode *inode = page->mapping->host;
145 const unsigned blkbits = inode->i_blkbits; 146 const unsigned blkbits = inode->i_blkbits;
@@ -277,8 +278,7 @@ alloc_new:
277 goto out; 278 goto out;
278 } 279 }
279 bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), 280 bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
280 min_t(int, nr_pages, BIO_MAX_PAGES), 281 min_t(int, nr_pages, BIO_MAX_PAGES), gfp);
281 GFP_KERNEL);
282 if (bio == NULL) 282 if (bio == NULL)
283 goto confused; 283 goto confused;
284 } 284 }
@@ -361,6 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
361 sector_t last_block_in_bio = 0; 361 sector_t last_block_in_bio = 0;
362 struct buffer_head map_bh; 362 struct buffer_head map_bh;
363 unsigned long first_logical_block = 0; 363 unsigned long first_logical_block = 0;
364 gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
364 365
365 map_bh.b_state = 0; 366 map_bh.b_state = 0;
366 map_bh.b_size = 0; 367 map_bh.b_size = 0;
@@ -370,12 +371,13 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
370 prefetchw(&page->flags); 371 prefetchw(&page->flags);
371 list_del(&page->lru); 372 list_del(&page->lru);
372 if (!add_to_page_cache_lru(page, mapping, 373 if (!add_to_page_cache_lru(page, mapping,
373 page->index, GFP_KERNEL)) { 374 page->index,
375 gfp)) {
374 bio = do_mpage_readpage(bio, page, 376 bio = do_mpage_readpage(bio, page,
375 nr_pages - page_idx, 377 nr_pages - page_idx,
376 &last_block_in_bio, &map_bh, 378 &last_block_in_bio, &map_bh,
377 &first_logical_block, 379 &first_logical_block,
378 get_block); 380 get_block, gfp);
379 } 381 }
380 page_cache_release(page); 382 page_cache_release(page);
381 } 383 }
@@ -395,11 +397,12 @@ int mpage_readpage(struct page *page, get_block_t get_block)
395 sector_t last_block_in_bio = 0; 397 sector_t last_block_in_bio = 0;
396 struct buffer_head map_bh; 398 struct buffer_head map_bh;
397 unsigned long first_logical_block = 0; 399 unsigned long first_logical_block = 0;
400 gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping);
398 401
399 map_bh.b_state = 0; 402 map_bh.b_state = 0;
400 map_bh.b_size = 0; 403 map_bh.b_size = 0;
401 bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, 404 bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
402 &map_bh, &first_logical_block, get_block); 405 &map_bh, &first_logical_block, get_block, gfp);
403 if (bio) 406 if (bio)
404 mpage_bio_submit(READ, bio); 407 mpage_bio_submit(READ, bio);
405 return 0; 408 return 0;
diff --git a/fs/namei.c b/fs/namei.c
index 726d211db484..33e9495a3129 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1558,8 +1558,6 @@ static int lookup_fast(struct nameidata *nd,
1558 negative = d_is_negative(dentry); 1558 negative = d_is_negative(dentry);
1559 if (read_seqcount_retry(&dentry->d_seq, seq)) 1559 if (read_seqcount_retry(&dentry->d_seq, seq))
1560 return -ECHILD; 1560 return -ECHILD;
1561 if (negative)
1562 return -ENOENT;
1563 1561
1564 /* 1562 /*
1565 * This sequence count validates that the parent had no 1563 * This sequence count validates that the parent had no
@@ -1580,6 +1578,12 @@ static int lookup_fast(struct nameidata *nd,
1580 goto unlazy; 1578 goto unlazy;
1581 } 1579 }
1582 } 1580 }
1581 /*
1582 * Note: do negative dentry check after revalidation in
1583 * case that drops it.
1584 */
1585 if (negative)
1586 return -ENOENT;
1583 path->mnt = mnt; 1587 path->mnt = mnt;
1584 path->dentry = dentry; 1588 path->dentry = dentry;
1585 if (likely(__follow_mount_rcu(nd, path, inode, seqp))) 1589 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 2714ef835bdd..be806ead7f4d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -113,7 +113,8 @@ out:
113 return status; 113 return status;
114} 114}
115 115
116static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) 116static int nfs_delegation_claim_opens(struct inode *inode,
117 const nfs4_stateid *stateid, fmode_t type)
117{ 118{
118 struct nfs_inode *nfsi = NFS_I(inode); 119 struct nfs_inode *nfsi = NFS_I(inode);
119 struct nfs_open_context *ctx; 120 struct nfs_open_context *ctx;
@@ -140,7 +141,7 @@ again:
140 /* Block nfs4_proc_unlck */ 141 /* Block nfs4_proc_unlck */
141 mutex_lock(&sp->so_delegreturn_mutex); 142 mutex_lock(&sp->so_delegreturn_mutex);
142 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 143 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
143 err = nfs4_open_delegation_recall(ctx, state, stateid); 144 err = nfs4_open_delegation_recall(ctx, state, stateid, type);
144 if (!err) 145 if (!err)
145 err = nfs_delegation_claim_locks(ctx, state, stateid); 146 err = nfs_delegation_claim_locks(ctx, state, stateid);
146 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) 147 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -411,7 +412,8 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
411 do { 412 do {
412 if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) 413 if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
413 break; 414 break;
414 err = nfs_delegation_claim_opens(inode, &delegation->stateid); 415 err = nfs_delegation_claim_opens(inode, &delegation->stateid,
416 delegation->type);
415 if (!issync || err != -EAGAIN) 417 if (!issync || err != -EAGAIN)
416 break; 418 break;
417 /* 419 /*
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index a44829173e57..333063e032f0 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -54,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
54 54
55/* NFSv4 delegation-related procedures */ 55/* NFSv4 delegation-related procedures */
56int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); 56int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
57int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); 57int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
58int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); 58int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
59bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); 59bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
60 60
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 38678d9a5cc4..4b1d08f56aba 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -166,8 +166,11 @@ nfs_direct_select_verf(struct nfs_direct_req *dreq,
166 struct nfs_writeverf *verfp = &dreq->verf; 166 struct nfs_writeverf *verfp = &dreq->verf;
167 167
168#ifdef CONFIG_NFS_V4_1 168#ifdef CONFIG_NFS_V4_1
169 if (ds_clp) { 169 /*
170 /* pNFS is in use, use the DS verf */ 170 * pNFS is in use, use the DS verf except commit_through_mds is set
171 * for layout segment where nbuckets is zero.
172 */
173 if (ds_clp && dreq->ds_cinfo.nbuckets > 0) {
171 if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) 174 if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets)
172 verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; 175 verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf;
173 else 176 else
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index b34f2e228601..02ec07973bc4 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -629,23 +629,18 @@ out_put:
629 goto out; 629 goto out;
630} 630}
631 631
632static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) 632static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
633{ 633{
634 int i; 634 int i;
635 635
636 for (i = 0; i < fl->num_fh; i++) { 636 if (fl->fh_array) {
637 if (!fl->fh_array[i]) 637 for (i = 0; i < fl->num_fh; i++) {
638 break; 638 if (!fl->fh_array[i])
639 kfree(fl->fh_array[i]); 639 break;
640 kfree(fl->fh_array[i]);
641 }
642 kfree(fl->fh_array);
640 } 643 }
641 kfree(fl->fh_array);
642 fl->fh_array = NULL;
643}
644
645static void
646_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
647{
648 filelayout_free_fh_array(fl);
649 kfree(fl); 644 kfree(fl);
650} 645}
651 646
@@ -716,21 +711,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
716 /* Do we want to use a mempool here? */ 711 /* Do we want to use a mempool here? */
717 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); 712 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
718 if (!fl->fh_array[i]) 713 if (!fl->fh_array[i])
719 goto out_err_free; 714 goto out_err;
720 715
721 p = xdr_inline_decode(&stream, 4); 716 p = xdr_inline_decode(&stream, 4);
722 if (unlikely(!p)) 717 if (unlikely(!p))
723 goto out_err_free; 718 goto out_err;
724 fl->fh_array[i]->size = be32_to_cpup(p++); 719 fl->fh_array[i]->size = be32_to_cpup(p++);
725 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 720 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
726 printk(KERN_ERR "NFS: Too big fh %d received %d\n", 721 printk(KERN_ERR "NFS: Too big fh %d received %d\n",
727 i, fl->fh_array[i]->size); 722 i, fl->fh_array[i]->size);
728 goto out_err_free; 723 goto out_err;
729 } 724 }
730 725
731 p = xdr_inline_decode(&stream, fl->fh_array[i]->size); 726 p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
732 if (unlikely(!p)) 727 if (unlikely(!p))
733 goto out_err_free; 728 goto out_err;
734 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); 729 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
735 dprintk("DEBUG: %s: fh len %d\n", __func__, 730 dprintk("DEBUG: %s: fh len %d\n", __func__,
736 fl->fh_array[i]->size); 731 fl->fh_array[i]->size);
@@ -739,8 +734,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
739 __free_page(scratch); 734 __free_page(scratch);
740 return 0; 735 return 0;
741 736
742out_err_free:
743 filelayout_free_fh_array(fl);
744out_err: 737out_err:
745 __free_page(scratch); 738 __free_page(scratch);
746 return -EIO; 739 return -EIO;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d731bbf974aa..0f020e4d8421 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -175,10 +175,12 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
175{ 175{
176 struct nfs_server *server = NFS_SERVER(file_inode(filep)); 176 struct nfs_server *server = NFS_SERVER(file_inode(filep));
177 struct nfs4_exception exception = { }; 177 struct nfs4_exception exception = { };
178 int err; 178 loff_t err;
179 179
180 do { 180 do {
181 err = _nfs42_proc_llseek(filep, offset, whence); 181 err = _nfs42_proc_llseek(filep, offset, whence);
182 if (err >= 0)
183 break;
182 if (err == -ENOTSUPP) 184 if (err == -ENOTSUPP)
183 return -EOPNOTSUPP; 185 return -EOPNOTSUPP;
184 err = nfs4_handle_exception(server, err, &exception); 186 err = nfs4_handle_exception(server, err, &exception);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 693b903b48bd..5133bb18830e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1127,6 +1127,21 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
1127 return ret; 1127 return ret;
1128} 1128}
1129 1129
1130static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
1131 fmode_t fmode)
1132{
1133 switch(fmode & (FMODE_READ|FMODE_WRITE)) {
1134 case FMODE_READ|FMODE_WRITE:
1135 return state->n_rdwr != 0;
1136 case FMODE_WRITE:
1137 return state->n_wronly != 0;
1138 case FMODE_READ:
1139 return state->n_rdonly != 0;
1140 }
1141 WARN_ON_ONCE(1);
1142 return false;
1143}
1144
1130static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) 1145static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
1131{ 1146{
1132 int ret = 0; 1147 int ret = 0;
@@ -1443,12 +1458,18 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
1443 if (delegation) 1458 if (delegation)
1444 delegation_flags = delegation->flags; 1459 delegation_flags = delegation->flags;
1445 rcu_read_unlock(); 1460 rcu_read_unlock();
1446 if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { 1461 switch (data->o_arg.claim) {
1462 default:
1463 break;
1464 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
1465 case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
1447 pr_err_ratelimited("NFS: Broken NFSv4 server %s is " 1466 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1448 "returning a delegation for " 1467 "returning a delegation for "
1449 "OPEN(CLAIM_DELEGATE_CUR)\n", 1468 "OPEN(CLAIM_DELEGATE_CUR)\n",
1450 clp->cl_hostname); 1469 clp->cl_hostname);
1451 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) 1470 return;
1471 }
1472 if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1452 nfs_inode_set_delegation(state->inode, 1473 nfs_inode_set_delegation(state->inode,
1453 data->owner->so_cred, 1474 data->owner->so_cred,
1454 &data->o_res); 1475 &data->o_res);
@@ -1571,17 +1592,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
1571 return opendata; 1592 return opendata;
1572} 1593}
1573 1594
1574static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res) 1595static int nfs4_open_recover_helper(struct nfs4_opendata *opendata,
1596 fmode_t fmode)
1575{ 1597{
1576 struct nfs4_state *newstate; 1598 struct nfs4_state *newstate;
1577 int ret; 1599 int ret;
1578 1600
1579 if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || 1601 if (!nfs4_mode_match_open_stateid(opendata->state, fmode))
1580 opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) &&
1581 (opendata->o_arg.u.delegation_type & fmode) != fmode)
1582 /* This mode can't have been delegated, so we must have
1583 * a valid open_stateid to cover it - not need to reclaim.
1584 */
1585 return 0; 1602 return 0;
1586 opendata->o_arg.open_flags = 0; 1603 opendata->o_arg.open_flags = 0;
1587 opendata->o_arg.fmode = fmode; 1604 opendata->o_arg.fmode = fmode;
@@ -1597,14 +1614,14 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1597 newstate = nfs4_opendata_to_nfs4_state(opendata); 1614 newstate = nfs4_opendata_to_nfs4_state(opendata);
1598 if (IS_ERR(newstate)) 1615 if (IS_ERR(newstate))
1599 return PTR_ERR(newstate); 1616 return PTR_ERR(newstate);
1617 if (newstate != opendata->state)
1618 ret = -ESTALE;
1600 nfs4_close_state(newstate, fmode); 1619 nfs4_close_state(newstate, fmode);
1601 *res = newstate; 1620 return ret;
1602 return 0;
1603} 1621}
1604 1622
1605static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) 1623static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
1606{ 1624{
1607 struct nfs4_state *newstate;
1608 int ret; 1625 int ret;
1609 1626
1610 /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ 1627 /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
@@ -1615,27 +1632,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1615 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1632 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1616 clear_bit(NFS_OPEN_STATE, &state->flags); 1633 clear_bit(NFS_OPEN_STATE, &state->flags);
1617 smp_rmb(); 1634 smp_rmb();
1618 if (state->n_rdwr != 0) { 1635 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
1619 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); 1636 if (ret != 0)
1620 if (ret != 0) 1637 return ret;
1621 return ret; 1638 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE);
1622 if (newstate != state) 1639 if (ret != 0)
1623 return -ESTALE; 1640 return ret;
1624 } 1641 ret = nfs4_open_recover_helper(opendata, FMODE_READ);
1625 if (state->n_wronly != 0) { 1642 if (ret != 0)
1626 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); 1643 return ret;
1627 if (ret != 0)
1628 return ret;
1629 if (newstate != state)
1630 return -ESTALE;
1631 }
1632 if (state->n_rdonly != 0) {
1633 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
1634 if (ret != 0)
1635 return ret;
1636 if (newstate != state)
1637 return -ESTALE;
1638 }
1639 /* 1644 /*
1640 * We may have performed cached opens for all three recoveries. 1645 * We may have performed cached opens for all three recoveries.
1641 * Check if we need to update the current stateid. 1646 * Check if we need to update the current stateid.
@@ -1759,18 +1764,35 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
1759 return err; 1764 return err;
1760} 1765}
1761 1766
1762int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) 1767int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
1768 struct nfs4_state *state, const nfs4_stateid *stateid,
1769 fmode_t type)
1763{ 1770{
1764 struct nfs_server *server = NFS_SERVER(state->inode); 1771 struct nfs_server *server = NFS_SERVER(state->inode);
1765 struct nfs4_opendata *opendata; 1772 struct nfs4_opendata *opendata;
1766 int err; 1773 int err = 0;
1767 1774
1768 opendata = nfs4_open_recoverdata_alloc(ctx, state, 1775 opendata = nfs4_open_recoverdata_alloc(ctx, state,
1769 NFS4_OPEN_CLAIM_DELEG_CUR_FH); 1776 NFS4_OPEN_CLAIM_DELEG_CUR_FH);
1770 if (IS_ERR(opendata)) 1777 if (IS_ERR(opendata))
1771 return PTR_ERR(opendata); 1778 return PTR_ERR(opendata);
1772 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); 1779 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
1773 err = nfs4_open_recover(opendata, state); 1780 write_seqlock(&state->seqlock);
1781 nfs4_stateid_copy(&state->stateid, &state->open_stateid);
1782 write_sequnlock(&state->seqlock);
1783 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1784 switch (type & (FMODE_READ|FMODE_WRITE)) {
1785 case FMODE_READ|FMODE_WRITE:
1786 case FMODE_WRITE:
1787 err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
1788 if (err)
1789 break;
1790 err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
1791 if (err)
1792 break;
1793 case FMODE_READ:
1794 err = nfs4_open_recover_helper(opendata, FMODE_READ);
1795 }
1774 nfs4_opendata_put(opendata); 1796 nfs4_opendata_put(opendata);
1775 return nfs4_handle_delegation_recall_error(server, state, stateid, err); 1797 return nfs4_handle_delegation_recall_error(server, state, stateid, err);
1776} 1798}
@@ -1850,6 +1872,8 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
1850 data->rpc_done = 0; 1872 data->rpc_done = 0;
1851 data->rpc_status = 0; 1873 data->rpc_status = 0;
1852 data->timestamp = jiffies; 1874 data->timestamp = jiffies;
1875 if (data->is_recover)
1876 nfs4_set_sequence_privileged(&data->c_arg.seq_args);
1853 task = rpc_run_task(&task_setup_data); 1877 task = rpc_run_task(&task_setup_data);
1854 if (IS_ERR(task)) 1878 if (IS_ERR(task))
1855 return PTR_ERR(task); 1879 return PTR_ERR(task);
@@ -2645,6 +2669,15 @@ out:
2645 return err; 2669 return err;
2646} 2670}
2647 2671
2672static bool
2673nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task)
2674{
2675 if (inode == NULL || !nfs_have_layout(inode))
2676 return false;
2677
2678 return pnfs_wait_on_layoutreturn(inode, task);
2679}
2680
2648struct nfs4_closedata { 2681struct nfs4_closedata {
2649 struct inode *inode; 2682 struct inode *inode;
2650 struct nfs4_state *state; 2683 struct nfs4_state *state;
@@ -2763,6 +2796,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2763 goto out_no_action; 2796 goto out_no_action;
2764 } 2797 }
2765 2798
2799 if (nfs4_wait_on_layoutreturn(inode, task)) {
2800 nfs_release_seqid(calldata->arg.seqid);
2801 goto out_wait;
2802 }
2803
2766 if (calldata->arg.fmode == 0) 2804 if (calldata->arg.fmode == 0)
2767 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2805 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2768 if (calldata->roc) 2806 if (calldata->roc)
@@ -5308,6 +5346,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
5308 5346
5309 d_data = (struct nfs4_delegreturndata *)data; 5347 d_data = (struct nfs4_delegreturndata *)data;
5310 5348
5349 if (nfs4_wait_on_layoutreturn(d_data->inode, task))
5350 return;
5351
5311 if (d_data->roc) 5352 if (d_data->roc)
5312 pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier); 5353 pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier);
5313 5354
@@ -7800,39 +7841,46 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
7800 dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", 7841 dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
7801 __func__, delay); 7842 __func__, delay);
7802 rpc_delay(task, delay); 7843 rpc_delay(task, delay);
7803 task->tk_status = 0; 7844 /* Do not call nfs4_async_handle_error() */
7804 rpc_restart_call_prepare(task); 7845 goto out_restart;
7805 goto out; /* Do not call nfs4_async_handle_error() */
7806 } 7846 }
7807 break; 7847 break;
7808 case -NFS4ERR_EXPIRED: 7848 case -NFS4ERR_EXPIRED:
7809 case -NFS4ERR_BAD_STATEID: 7849 case -NFS4ERR_BAD_STATEID:
7810 spin_lock(&inode->i_lock); 7850 spin_lock(&inode->i_lock);
7811 lo = NFS_I(inode)->layout; 7851 if (nfs4_stateid_match(&lgp->args.stateid,
7812 if (!lo || list_empty(&lo->plh_segs)) { 7852 &lgp->args.ctx->state->stateid)) {
7813 spin_unlock(&inode->i_lock); 7853 spin_unlock(&inode->i_lock);
7814 /* If the open stateid was bad, then recover it. */ 7854 /* If the open stateid was bad, then recover it. */
7815 state = lgp->args.ctx->state; 7855 state = lgp->args.ctx->state;
7816 } else { 7856 break;
7857 }
7858 lo = NFS_I(inode)->layout;
7859 if (lo && nfs4_stateid_match(&lgp->args.stateid,
7860 &lo->plh_stateid)) {
7817 LIST_HEAD(head); 7861 LIST_HEAD(head);
7818 7862
7819 /* 7863 /*
7820 * Mark the bad layout state as invalid, then retry 7864 * Mark the bad layout state as invalid, then retry
7821 * with the current stateid. 7865 * with the current stateid.
7822 */ 7866 */
7867 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
7823 pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); 7868 pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
7824 spin_unlock(&inode->i_lock); 7869 spin_unlock(&inode->i_lock);
7825 pnfs_free_lseg_list(&head); 7870 pnfs_free_lseg_list(&head);
7826 7871 } else
7827 task->tk_status = 0; 7872 spin_unlock(&inode->i_lock);
7828 rpc_restart_call_prepare(task); 7873 goto out_restart;
7829 }
7830 } 7874 }
7831 if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) 7875 if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
7832 rpc_restart_call_prepare(task); 7876 goto out_restart;
7833out: 7877out:
7834 dprintk("<-- %s\n", __func__); 7878 dprintk("<-- %s\n", __func__);
7835 return; 7879 return;
7880out_restart:
7881 task->tk_status = 0;
7882 rpc_restart_call_prepare(task);
7883 return;
7836out_overflow: 7884out_overflow:
7837 task->tk_status = -EOVERFLOW; 7885 task->tk_status = -EOVERFLOW;
7838 goto out; 7886 goto out;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index da73bc443238..d854693a15b0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1481,7 +1481,7 @@ restart:
1481 spin_unlock(&state->state_lock); 1481 spin_unlock(&state->state_lock);
1482 } 1482 }
1483 nfs4_put_open_state(state); 1483 nfs4_put_open_state(state);
1484 clear_bit(NFS4CLNT_RECLAIM_NOGRACE, 1484 clear_bit(NFS_STATE_RECLAIM_NOGRACE,
1485 &state->flags); 1485 &state->flags);
1486 spin_lock(&sp->so_lock); 1486 spin_lock(&sp->so_lock);
1487 goto restart; 1487 goto restart;
@@ -1725,7 +1725,8 @@ restart:
1725 if (!test_and_clear_bit(ops->owner_flag_bit, 1725 if (!test_and_clear_bit(ops->owner_flag_bit,
1726 &sp->so_flags)) 1726 &sp->so_flags))
1727 continue; 1727 continue;
1728 atomic_inc(&sp->so_count); 1728 if (!atomic_inc_not_zero(&sp->so_count))
1729 continue;
1729 spin_unlock(&clp->cl_lock); 1730 spin_unlock(&clp->cl_lock);
1730 rcu_read_unlock(); 1731 rcu_read_unlock();
1731 1732
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 28df12e525ba..671cf68fe56b 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -409,7 +409,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
409 __entry->flags = flags; 409 __entry->flags = flags;
410 __entry->fmode = (__force unsigned int)ctx->mode; 410 __entry->fmode = (__force unsigned int)ctx->mode;
411 __entry->dev = ctx->dentry->d_sb->s_dev; 411 __entry->dev = ctx->dentry->d_sb->s_dev;
412 if (!IS_ERR(state)) 412 if (!IS_ERR_OR_NULL(state))
413 inode = state->inode; 413 inode = state->inode;
414 if (inode != NULL) { 414 if (inode != NULL) {
415 __entry->fileid = NFS_FILEID(inode); 415 __entry->fileid = NFS_FILEID(inode);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7c5718ba625e..fe3ddd20ff89 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
508 * for it without upsetting the slab allocator. 508 * for it without upsetting the slab allocator.
509 */ 509 */
510 if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * 510 if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) *
511 sizeof(struct page) > PAGE_SIZE) 511 sizeof(struct page *) > PAGE_SIZE)
512 return 0; 512 return 0;
513 513
514 return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); 514 return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ba1246433794..8abe27165ad0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1104,20 +1104,15 @@ bool pnfs_roc(struct inode *ino)
1104 mark_lseg_invalid(lseg, &tmp_list); 1104 mark_lseg_invalid(lseg, &tmp_list);
1105 found = true; 1105 found = true;
1106 } 1106 }
1107 /* pnfs_prepare_layoutreturn() grabs lo ref and it will be put 1107 /* ROC in two conditions:
1108 * in pnfs_roc_release(). We don't really send a layoutreturn but
1109 * still want others to view us like we are sending one!
1110 *
1111 * If pnfs_prepare_layoutreturn() fails, it means someone else is doing
1112 * LAYOUTRETURN, so we proceed like there are no layouts to return.
1113 *
1114 * ROC in three conditions:
1115 * 1. there are ROC lsegs 1108 * 1. there are ROC lsegs
1116 * 2. we don't send layoutreturn 1109 * 2. we don't send layoutreturn
1117 * 3. no others are sending layoutreturn
1118 */ 1110 */
1119 if (found && !layoutreturn && pnfs_prepare_layoutreturn(lo)) 1111 if (found && !layoutreturn) {
1112 /* lo ref dropped in pnfs_roc_release() */
1113 pnfs_get_layout_hdr(lo);
1120 roc = true; 1114 roc = true;
1115 }
1121 1116
1122out_noroc: 1117out_noroc:
1123 spin_unlock(&ino->i_lock); 1118 spin_unlock(&ino->i_lock);
@@ -1172,6 +1167,26 @@ void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
1172 spin_unlock(&ino->i_lock); 1167 spin_unlock(&ino->i_lock);
1173} 1168}
1174 1169
1170bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
1171{
1172 struct nfs_inode *nfsi = NFS_I(ino);
1173 struct pnfs_layout_hdr *lo;
1174 bool sleep = false;
1175
1176 /* we might not have grabbed lo reference. so need to check under
1177 * i_lock */
1178 spin_lock(&ino->i_lock);
1179 lo = nfsi->layout;
1180 if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
1181 sleep = true;
1182 spin_unlock(&ino->i_lock);
1183
1184 if (sleep)
1185 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
1186
1187 return sleep;
1188}
1189
1175/* 1190/*
1176 * Compare two layout segments for sorting into layout cache. 1191 * Compare two layout segments for sorting into layout cache.
1177 * We want to preferentially return RW over RO layouts, so ensure those 1192 * We want to preferentially return RW over RO layouts, so ensure those
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 78c9351ff117..d1990e90e7a0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -270,6 +270,7 @@ bool pnfs_roc(struct inode *ino);
270void pnfs_roc_release(struct inode *ino); 270void pnfs_roc_release(struct inode *ino);
271void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 271void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
272void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier); 272void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier);
273bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task);
273void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); 274void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
274void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 275void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
275int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 276int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
@@ -639,6 +640,12 @@ pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
639{ 640{
640} 641}
641 642
643static inline bool
644pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
645{
646 return false;
647}
648
642static inline void set_pnfs_layoutdriver(struct nfs_server *s, 649static inline void set_pnfs_layoutdriver(struct nfs_server *s,
643 const struct nfs_fh *mntfh, u32 id) 650 const struct nfs_fh *mntfh, u32 id)
644{ 651{
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ae0ff7a11b40..01b8cc8e8cfc 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
72{ 72{
73 struct nfs_pgio_mirror *mirror; 73 struct nfs_pgio_mirror *mirror;
74 74
75 if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
76 pgio->pg_ops->pg_cleanup(pgio);
77
75 pgio->pg_ops = &nfs_pgio_rw_ops; 78 pgio->pg_ops = &nfs_pgio_rw_ops;
76 79
77 /* read path should never have more than one mirror */ 80 /* read path should never have more than one mirror */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 388f48079c43..75ab7622e0cc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -569,19 +569,17 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
569 if (!nfs_pageio_add_request(pgio, req)) { 569 if (!nfs_pageio_add_request(pgio, req)) {
570 nfs_redirty_request(req); 570 nfs_redirty_request(req);
571 ret = pgio->pg_error; 571 ret = pgio->pg_error;
572 } 572 } else
573 nfs_add_stats(page_file_mapping(page)->host,
574 NFSIOS_WRITEPAGES, 1);
573out: 575out:
574 return ret; 576 return ret;
575} 577}
576 578
577static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 579static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
578{ 580{
579 struct inode *inode = page_file_mapping(page)->host;
580 int ret; 581 int ret;
581 582
582 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
583 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
584
585 nfs_pageio_cond_complete(pgio, page_file_index(page)); 583 nfs_pageio_cond_complete(pgio, page_file_index(page));
586 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 584 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
587 if (ret == -EAGAIN) { 585 if (ret == -EAGAIN) {
@@ -597,9 +595,11 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
597static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 595static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
598{ 596{
599 struct nfs_pageio_descriptor pgio; 597 struct nfs_pageio_descriptor pgio;
598 struct inode *inode = page_file_mapping(page)->host;
600 int err; 599 int err;
601 600
602 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), 601 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
602 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
603 false, &nfs_async_write_completion_ops); 603 false, &nfs_async_write_completion_ops);
604 err = nfs_do_writepage(page, wbc, &pgio); 604 err = nfs_do_writepage(page, wbc, &pgio);
605 nfs_pageio_complete(&pgio); 605 nfs_pageio_complete(&pgio);
@@ -1223,7 +1223,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino
1223 return 1; 1223 return 1;
1224 if (!flctx || (list_empty_careful(&flctx->flc_flock) && 1224 if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
1225 list_empty_careful(&flctx->flc_posix))) 1225 list_empty_careful(&flctx->flc_posix)))
1226 return 0; 1226 return 1;
1227 1227
1228 /* Check to see if there are whole file write locks */ 1228 /* Check to see if there are whole file write locks */
1229 ret = 0; 1229 ret = 0;
@@ -1351,6 +1351,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1351{ 1351{
1352 struct nfs_pgio_mirror *mirror; 1352 struct nfs_pgio_mirror *mirror;
1353 1353
1354 if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
1355 pgio->pg_ops->pg_cleanup(pgio);
1356
1354 pgio->pg_ops = &nfs_pgio_rw_ops; 1357 pgio->pg_ops = &nfs_pgio_rw_ops;
1355 1358
1356 nfs_pageio_stop_mirroring(pgio); 1359 nfs_pageio_stop_mirroring(pgio);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdefaa331a07..c29d9421bd5e 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -56,14 +56,6 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
56 u32 device_generation = 0; 56 u32 device_generation = 0;
57 int error; 57 int error;
58 58
59 /*
60 * We do not attempt to support I/O smaller than the fs block size,
61 * or not aligned to it.
62 */
63 if (args->lg_minlength < block_size) {
64 dprintk("pnfsd: I/O too small\n");
65 goto out_layoutunavailable;
66 }
67 if (seg->offset & (block_size - 1)) { 59 if (seg->offset & (block_size - 1)) {
68 dprintk("pnfsd: I/O misaligned\n"); 60 dprintk("pnfsd: I/O misaligned\n");
69 goto out_layoutunavailable; 61 goto out_layoutunavailable;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 46b8b2bbc95a..ee5aa4daaea0 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1439,6 +1439,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
1439 int found, ret; 1439 int found, ret;
1440 int set_maybe; 1440 int set_maybe;
1441 int dispatch_assert = 0; 1441 int dispatch_assert = 0;
1442 int dispatched = 0;
1442 1443
1443 if (!dlm_grab(dlm)) 1444 if (!dlm_grab(dlm))
1444 return DLM_MASTER_RESP_NO; 1445 return DLM_MASTER_RESP_NO;
@@ -1658,15 +1659,18 @@ send_response:
1658 mlog(ML_ERROR, "failed to dispatch assert master work\n"); 1659 mlog(ML_ERROR, "failed to dispatch assert master work\n");
1659 response = DLM_MASTER_RESP_ERROR; 1660 response = DLM_MASTER_RESP_ERROR;
1660 dlm_lockres_put(res); 1661 dlm_lockres_put(res);
1661 } else 1662 } else {
1663 dispatched = 1;
1662 __dlm_lockres_grab_inflight_worker(dlm, res); 1664 __dlm_lockres_grab_inflight_worker(dlm, res);
1665 }
1663 spin_unlock(&res->spinlock); 1666 spin_unlock(&res->spinlock);
1664 } else { 1667 } else {
1665 if (res) 1668 if (res)
1666 dlm_lockres_put(res); 1669 dlm_lockres_put(res);
1667 } 1670 }
1668 1671
1669 dlm_put(dlm); 1672 if (!dispatched)
1673 dlm_put(dlm);
1670 return response; 1674 return response;
1671} 1675}
1672 1676
@@ -2090,7 +2094,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
2090 2094
2091 2095
2092 /* queue up work for dlm_assert_master_worker */ 2096 /* queue up work for dlm_assert_master_worker */
2093 dlm_grab(dlm); /* get an extra ref for the work item */
2094 dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); 2097 dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL);
2095 item->u.am.lockres = res; /* already have a ref */ 2098 item->u.am.lockres = res; /* already have a ref */
2096 /* can optionally ignore node numbers higher than this node */ 2099 /* can optionally ignore node numbers higher than this node */
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index ce12e0b1a31f..3d90ad7ff91f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1694,6 +1694,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1694 unsigned int hash; 1694 unsigned int hash;
1695 int master = DLM_LOCK_RES_OWNER_UNKNOWN; 1695 int master = DLM_LOCK_RES_OWNER_UNKNOWN;
1696 u32 flags = DLM_ASSERT_MASTER_REQUERY; 1696 u32 flags = DLM_ASSERT_MASTER_REQUERY;
1697 int dispatched = 0;
1697 1698
1698 if (!dlm_grab(dlm)) { 1699 if (!dlm_grab(dlm)) {
1699 /* since the domain has gone away on this 1700 /* since the domain has gone away on this
@@ -1719,8 +1720,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1719 dlm_put(dlm); 1720 dlm_put(dlm);
1720 /* sender will take care of this and retry */ 1721 /* sender will take care of this and retry */
1721 return ret; 1722 return ret;
1722 } else 1723 } else {
1724 dispatched = 1;
1723 __dlm_lockres_grab_inflight_worker(dlm, res); 1725 __dlm_lockres_grab_inflight_worker(dlm, res);
1726 }
1724 spin_unlock(&res->spinlock); 1727 spin_unlock(&res->spinlock);
1725 } else { 1728 } else {
1726 /* put.. incase we are not the master */ 1729 /* put.. incase we are not the master */
@@ -1730,7 +1733,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
1730 } 1733 }
1731 spin_unlock(&dlm->spinlock); 1734 spin_unlock(&dlm->spinlock);
1732 1735
1733 dlm_put(dlm); 1736 if (!dispatched)
1737 dlm_put(dlm);
1734 return master; 1738 return master;
1735} 1739}
1736 1740
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index ba1323a94924..a586467f6ff6 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -70,6 +70,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
70 unsigned order; 70 unsigned order;
71 void *data; 71 void *data;
72 int ret; 72 int ret;
73 gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
73 74
74 /* make various checks */ 75 /* make various checks */
75 order = get_order(newsize); 76 order = get_order(newsize);
@@ -84,7 +85,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
84 85
85 /* allocate enough contiguous pages to be able to satisfy the 86 /* allocate enough contiguous pages to be able to satisfy the
86 * request */ 87 * request */
87 pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order); 88 pages = alloc_pages(gfp, order);
88 if (!pages) 89 if (!pages)
89 return -ENOMEM; 90 return -ENOMEM;
90 91
@@ -108,7 +109,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
108 struct page *page = pages + loop; 109 struct page *page = pages + loop;
109 110
110 ret = add_to_page_cache_lru(page, inode->i_mapping, loop, 111 ret = add_to_page_cache_lru(page, inode->i_mapping, loop,
111 GFP_KERNEL); 112 gfp);
112 if (ret < 0) 113 if (ret < 0)
113 goto add_error; 114 goto add_error;
114 115
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 96f3448b6eb4..fd65b3f1923c 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -652,11 +652,8 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
652{ 652{
653 int err; 653 int err;
654 654
655 mutex_lock(&inode->i_mutex);
656 err = security_inode_init_security(inode, dentry, qstr, 655 err = security_inode_init_security(inode, dentry, qstr,
657 &init_xattrs, 0); 656 &init_xattrs, 0);
658 mutex_unlock(&inode->i_mutex);
659
660 if (err) { 657 if (err) {
661 struct ubifs_info *c = dentry->i_sb->s_fs_info; 658 struct ubifs_info *c = dentry->i_sb->s_fs_info;
662 ubifs_err(c, "cannot initialize security for inode %lu, error %d", 659 ubifs_err(c, "cannot initialize security for inode %lu, error %d",
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 634e676072cb..50311703135b 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -467,8 +467,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
467 * the fault_*wqh. 467 * the fault_*wqh.
468 */ 468 */
469 spin_lock(&ctx->fault_pending_wqh.lock); 469 spin_lock(&ctx->fault_pending_wqh.lock);
470 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, &range); 470 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
471 __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, &range); 471 __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range);
472 spin_unlock(&ctx->fault_pending_wqh.lock); 472 spin_unlock(&ctx->fault_pending_wqh.lock);
473 473
474 wake_up_poll(&ctx->fd_wqh, POLLHUP); 474 wake_up_poll(&ctx->fd_wqh, POLLHUP);
@@ -650,10 +650,10 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx,
650 spin_lock(&ctx->fault_pending_wqh.lock); 650 spin_lock(&ctx->fault_pending_wqh.lock);
651 /* wake all in the range and autoremove */ 651 /* wake all in the range and autoremove */
652 if (waitqueue_active(&ctx->fault_pending_wqh)) 652 if (waitqueue_active(&ctx->fault_pending_wqh))
653 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, 653 __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
654 range); 654 range);
655 if (waitqueue_active(&ctx->fault_wqh)) 655 if (waitqueue_active(&ctx->fault_wqh))
656 __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, range); 656 __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range);
657 spin_unlock(&ctx->fault_pending_wqh.lock); 657 spin_unlock(&ctx->fault_pending_wqh.lock);
658} 658}
659 659
@@ -1287,8 +1287,10 @@ static struct file *userfaultfd_file_create(int flags)
1287 1287
1288 file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, 1288 file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
1289 O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); 1289 O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
1290 if (IS_ERR(file)) 1290 if (IS_ERR(file)) {
1291 mmput(ctx->mm);
1291 kmem_cache_free(userfaultfd_ctx_cachep, ctx); 1292 kmem_cache_free(userfaultfd_ctx_cachep, ctx);
1293 }
1292out: 1294out:
1293 return file; 1295 return file;
1294} 1296}