aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-10-09 03:02:35 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-10-09 03:02:35 -0400
commit1236d6bb6e19fc72ffc6bbcdeb1bfefe450e54ee (patch)
tree47da3feee8e263e8c9352c85cf518e624be3c211 /fs
parent750b1a6894ecc9b178c6e3d0a1170122971b2036 (diff)
parent8a5776a5f49812d29fe4b2d0a2d71675c3facf3f (diff)
Merge 4.14-rc4 into staging-next
We want the staging/iio fixes in here as well to handle merge issues. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_misc.c56
-rw-r--r--fs/binfmt_script.c17
-rw-r--r--fs/btrfs/compression.c18
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/inode.c27
-rw-r--r--fs/btrfs/ioctl.c12
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/tree-log.c12
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/ceph/mds_client.c18
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/ceph/snap.c8
-rw-r--r--fs/cifs/cifs_debug.c9
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h6
-rw-r--r--fs/cifs/connect.c32
-rw-r--r--fs/cifs/file.c19
-rw-r--r--fs/cifs/inode.c15
-rw-r--r--fs/cifs/smb2ops.c42
-rw-r--r--fs/cifs/smb2pdu.c111
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/direct-io.c49
-rw-r--r--fs/exec.c2
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/gfs2/glock.c14
-rw-r--r--fs/iomap.c43
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/dir.c20
-rw-r--r--fs/overlayfs/namei.c1
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/ovl_entry.h3
-rw-r--r--fs/overlayfs/readdir.c6
-rw-r--r--fs/overlayfs/super.c27
-rw-r--r--fs/overlayfs/util.c24
-rw-r--r--fs/proc/array.c44
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/quota/quota_v2.c4
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/userfaultfd.c66
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c12
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c17
-rw-r--r--fs/xfs/xfs_aops.c3
-rw-r--r--fs/xfs/xfs_bmap_util.c38
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_file.c39
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_iomap.c7
-rw-r--r--fs/xfs/xfs_iomap.h2
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_reflink.c9
-rw-r--r--fs/xfs/xfs_super.c10
62 files changed, 657 insertions, 272 deletions
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ce7181ea60fa..2a46762def31 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -54,7 +54,7 @@ typedef struct {
54 int size; /* size of magic/mask */ 54 int size; /* size of magic/mask */
55 char *magic; /* magic or filename extension */ 55 char *magic; /* magic or filename extension */
56 char *mask; /* mask, NULL for exact match */ 56 char *mask; /* mask, NULL for exact match */
57 char *interpreter; /* filename of interpreter */ 57 const char *interpreter; /* filename of interpreter */
58 char *name; 58 char *name;
59 struct dentry *dentry; 59 struct dentry *dentry;
60 struct file *interp_file; 60 struct file *interp_file;
@@ -131,27 +131,26 @@ static int load_misc_binary(struct linux_binprm *bprm)
131{ 131{
132 Node *fmt; 132 Node *fmt;
133 struct file *interp_file = NULL; 133 struct file *interp_file = NULL;
134 char iname[BINPRM_BUF_SIZE];
135 const char *iname_addr = iname;
136 int retval; 134 int retval;
137 int fd_binary = -1; 135 int fd_binary = -1;
138 136
139 retval = -ENOEXEC; 137 retval = -ENOEXEC;
140 if (!enabled) 138 if (!enabled)
141 goto ret; 139 return retval;
142 140
143 /* to keep locking time low, we copy the interpreter string */ 141 /* to keep locking time low, we copy the interpreter string */
144 read_lock(&entries_lock); 142 read_lock(&entries_lock);
145 fmt = check_file(bprm); 143 fmt = check_file(bprm);
146 if (fmt) 144 if (fmt)
147 strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE); 145 dget(fmt->dentry);
148 read_unlock(&entries_lock); 146 read_unlock(&entries_lock);
149 if (!fmt) 147 if (!fmt)
150 goto ret; 148 return retval;
151 149
152 /* Need to be able to load the file after exec */ 150 /* Need to be able to load the file after exec */
151 retval = -ENOENT;
153 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) 152 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
154 return -ENOENT; 153 goto ret;
155 154
156 if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { 155 if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
157 retval = remove_arg_zero(bprm); 156 retval = remove_arg_zero(bprm);
@@ -195,22 +194,22 @@ static int load_misc_binary(struct linux_binprm *bprm)
195 bprm->argc++; 194 bprm->argc++;
196 195
197 /* add the interp as argv[0] */ 196 /* add the interp as argv[0] */
198 retval = copy_strings_kernel(1, &iname_addr, bprm); 197 retval = copy_strings_kernel(1, &fmt->interpreter, bprm);
199 if (retval < 0) 198 if (retval < 0)
200 goto error; 199 goto error;
201 bprm->argc++; 200 bprm->argc++;
202 201
203 /* Update interp in case binfmt_script needs it. */ 202 /* Update interp in case binfmt_script needs it. */
204 retval = bprm_change_interp(iname, bprm); 203 retval = bprm_change_interp(fmt->interpreter, bprm);
205 if (retval < 0) 204 if (retval < 0)
206 goto error; 205 goto error;
207 206
208 if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) { 207 if (fmt->flags & MISC_FMT_OPEN_FILE) {
209 interp_file = filp_clone_open(fmt->interp_file); 208 interp_file = filp_clone_open(fmt->interp_file);
210 if (!IS_ERR(interp_file)) 209 if (!IS_ERR(interp_file))
211 deny_write_access(interp_file); 210 deny_write_access(interp_file);
212 } else { 211 } else {
213 interp_file = open_exec(iname); 212 interp_file = open_exec(fmt->interpreter);
214 } 213 }
215 retval = PTR_ERR(interp_file); 214 retval = PTR_ERR(interp_file);
216 if (IS_ERR(interp_file)) 215 if (IS_ERR(interp_file))
@@ -238,6 +237,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
238 goto error; 237 goto error;
239 238
240ret: 239ret:
240 dput(fmt->dentry);
241 return retval; 241 return retval;
242error: 242error:
243 if (fd_binary > 0) 243 if (fd_binary > 0)
@@ -594,8 +594,13 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
594 594
595static void bm_evict_inode(struct inode *inode) 595static void bm_evict_inode(struct inode *inode)
596{ 596{
597 Node *e = inode->i_private;
598
599 if (e->flags & MISC_FMT_OPEN_FILE)
600 filp_close(e->interp_file, NULL);
601
597 clear_inode(inode); 602 clear_inode(inode);
598 kfree(inode->i_private); 603 kfree(e);
599} 604}
600 605
601static void kill_node(Node *e) 606static void kill_node(Node *e)
@@ -603,24 +608,14 @@ static void kill_node(Node *e)
603 struct dentry *dentry; 608 struct dentry *dentry;
604 609
605 write_lock(&entries_lock); 610 write_lock(&entries_lock);
606 dentry = e->dentry; 611 list_del_init(&e->list);
607 if (dentry) {
608 list_del_init(&e->list);
609 e->dentry = NULL;
610 }
611 write_unlock(&entries_lock); 612 write_unlock(&entries_lock);
612 613
613 if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) { 614 dentry = e->dentry;
614 filp_close(e->interp_file, NULL); 615 drop_nlink(d_inode(dentry));
615 e->interp_file = NULL; 616 d_drop(dentry);
616 } 617 dput(dentry);
617 618 simple_release_fs(&bm_mnt, &entry_count);
618 if (dentry) {
619 drop_nlink(d_inode(dentry));
620 d_drop(dentry);
621 dput(dentry);
622 simple_release_fs(&bm_mnt, &entry_count);
623 }
624} 619}
625 620
626/* /<entry> */ 621/* /<entry> */
@@ -665,7 +660,8 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
665 root = file_inode(file)->i_sb->s_root; 660 root = file_inode(file)->i_sb->s_root;
666 inode_lock(d_inode(root)); 661 inode_lock(d_inode(root));
667 662
668 kill_node(e); 663 if (!list_empty(&e->list))
664 kill_node(e);
669 665
670 inode_unlock(d_inode(root)); 666 inode_unlock(d_inode(root));
671 break; 667 break;
@@ -794,7 +790,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
794 inode_lock(d_inode(root)); 790 inode_lock(d_inode(root));
795 791
796 while (!list_empty(&entries)) 792 while (!list_empty(&entries))
797 kill_node(list_entry(entries.next, Node, list)); 793 kill_node(list_first_entry(&entries, Node, list));
798 794
799 inode_unlock(d_inode(root)); 795 inode_unlock(d_inode(root));
800 break; 796 break;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index afdf4e3cafc2..7cde3f46ad26 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -19,7 +19,6 @@ static int load_script(struct linux_binprm *bprm)
19 const char *i_arg, *i_name; 19 const char *i_arg, *i_name;
20 char *cp; 20 char *cp;
21 struct file *file; 21 struct file *file;
22 char interp[BINPRM_BUF_SIZE];
23 int retval; 22 int retval;
24 23
25 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) 24 if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
@@ -55,7 +54,7 @@ static int load_script(struct linux_binprm *bprm)
55 break; 54 break;
56 } 55 }
57 for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++); 56 for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++);
58 if (*cp == '\0') 57 if (*cp == '\0')
59 return -ENOEXEC; /* No interpreter name found */ 58 return -ENOEXEC; /* No interpreter name found */
60 i_name = cp; 59 i_name = cp;
61 i_arg = NULL; 60 i_arg = NULL;
@@ -65,7 +64,6 @@ static int load_script(struct linux_binprm *bprm)
65 *cp++ = '\0'; 64 *cp++ = '\0';
66 if (*cp) 65 if (*cp)
67 i_arg = cp; 66 i_arg = cp;
68 strcpy (interp, i_name);
69 /* 67 /*
70 * OK, we've parsed out the interpreter name and 68 * OK, we've parsed out the interpreter name and
71 * (optional) argument. 69 * (optional) argument.
@@ -80,24 +78,27 @@ static int load_script(struct linux_binprm *bprm)
80 if (retval) 78 if (retval)
81 return retval; 79 return retval;
82 retval = copy_strings_kernel(1, &bprm->interp, bprm); 80 retval = copy_strings_kernel(1, &bprm->interp, bprm);
83 if (retval < 0) return retval; 81 if (retval < 0)
82 return retval;
84 bprm->argc++; 83 bprm->argc++;
85 if (i_arg) { 84 if (i_arg) {
86 retval = copy_strings_kernel(1, &i_arg, bprm); 85 retval = copy_strings_kernel(1, &i_arg, bprm);
87 if (retval < 0) return retval; 86 if (retval < 0)
87 return retval;
88 bprm->argc++; 88 bprm->argc++;
89 } 89 }
90 retval = copy_strings_kernel(1, &i_name, bprm); 90 retval = copy_strings_kernel(1, &i_name, bprm);
91 if (retval) return retval; 91 if (retval)
92 return retval;
92 bprm->argc++; 93 bprm->argc++;
93 retval = bprm_change_interp(interp, bprm); 94 retval = bprm_change_interp(i_name, bprm);
94 if (retval < 0) 95 if (retval < 0)
95 return retval; 96 return retval;
96 97
97 /* 98 /*
98 * OK, now restart the process with the interpreter's dentry. 99 * OK, now restart the process with the interpreter's dentry.
99 */ 100 */
100 file = open_exec(interp); 101 file = open_exec(i_name);
101 if (IS_ERR(file)) 102 if (IS_ERR(file))
102 return PTR_ERR(file); 103 return PTR_ERR(file);
103 104
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b51d23f5cafa..280384bf34f1 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -107,7 +107,8 @@ static void end_compressed_bio_read(struct bio *bio)
107 struct inode *inode; 107 struct inode *inode;
108 struct page *page; 108 struct page *page;
109 unsigned long index; 109 unsigned long index;
110 int ret; 110 unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
111 int ret = 0;
111 112
112 if (bio->bi_status) 113 if (bio->bi_status)
113 cb->errors = 1; 114 cb->errors = 1;
@@ -118,6 +119,21 @@ static void end_compressed_bio_read(struct bio *bio)
118 if (!refcount_dec_and_test(&cb->pending_bios)) 119 if (!refcount_dec_and_test(&cb->pending_bios))
119 goto out; 120 goto out;
120 121
122 /*
123 * Record the correct mirror_num in cb->orig_bio so that
124 * read-repair can work properly.
125 */
126 ASSERT(btrfs_io_bio(cb->orig_bio));
127 btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
128 cb->mirror_num = mirror;
129
130 /*
131 * Some IO in this cb have failed, just skip checksum as there
132 * is no way it could be correct.
133 */
134 if (cb->errors == 1)
135 goto csum_failed;
136
121 inode = cb->inode; 137 inode = cb->inode;
122 ret = check_compressed_csum(BTRFS_I(inode), cb, 138 ret = check_compressed_csum(BTRFS_I(inode), cb,
123 (u64)bio->bi_iter.bi_sector << 9); 139 (u64)bio->bi_iter.bi_sector << 9);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5a8933da39a7..8fc690384c58 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -709,7 +709,6 @@ struct btrfs_delayed_root;
709#define BTRFS_FS_OPEN 5 709#define BTRFS_FS_OPEN 5
710#define BTRFS_FS_QUOTA_ENABLED 6 710#define BTRFS_FS_QUOTA_ENABLED 6
711#define BTRFS_FS_QUOTA_ENABLING 7 711#define BTRFS_FS_QUOTA_ENABLING 7
712#define BTRFS_FS_QUOTA_DISABLING 8
713#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9 712#define BTRFS_FS_UPDATE_UUID_TREE_GEN 9
714#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10 713#define BTRFS_FS_CREATING_FREE_SPACE_TREE 10
715#define BTRFS_FS_BTREE_ERR 11 714#define BTRFS_FS_BTREE_ERR 11
@@ -723,7 +722,7 @@ struct btrfs_delayed_root;
723 * Indicate that a whole-filesystem exclusive operation is running 722 * Indicate that a whole-filesystem exclusive operation is running
724 * (device replace, resize, device add/delete, balance) 723 * (device replace, resize, device add/delete, balance)
725 */ 724 */
726#define BTRFS_FS_EXCL_OP 14 725#define BTRFS_FS_EXCL_OP 16
727 726
728struct btrfs_fs_info { 727struct btrfs_fs_info {
729 u8 fsid[BTRFS_FSID_SIZE]; 728 u8 fsid[BTRFS_FSID_SIZE];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 487bbe4fb3c6..dfdab849037b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3643,7 +3643,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
3643 u64 flags; 3643 u64 flags;
3644 3644
3645 do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); 3645 do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
3646 backup_super_roots(fs_info); 3646
3647 /*
3648 * max_mirrors == 0 indicates we're from commit_transaction,
3649 * not from fsync where the tree roots in fs_info have not
3650 * been consistent on disk.
3651 */
3652 if (max_mirrors == 0)
3653 backup_super_roots(fs_info);
3647 3654
3648 sb = fs_info->super_for_commit; 3655 sb = fs_info->super_for_commit;
3649 dev_item = &sb->dev_item; 3656 dev_item = &sb->dev_item;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e5bb0cdd3cd..970190cd347e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2801,7 +2801,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2801 } 2801 }
2802 } 2802 }
2803 2803
2804 bio = btrfs_bio_alloc(bdev, sector << 9); 2804 bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
2805 bio_add_page(bio, page, page_size, offset); 2805 bio_add_page(bio, page, page_size, offset);
2806 bio->bi_end_io = end_io_func; 2806 bio->bi_end_io = end_io_func;
2807 bio->bi_private = tree; 2807 bio->bi_private = tree;
@@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3471 unsigned int write_flags = 0; 3471 unsigned int write_flags = 0;
3472 unsigned long nr_written = 0; 3472 unsigned long nr_written = 0;
3473 3473
3474 if (wbc->sync_mode == WB_SYNC_ALL) 3474 write_flags = wbc_to_write_flags(wbc);
3475 write_flags = REQ_SYNC;
3476 3475
3477 trace___extent_writepage(page, inode, wbc); 3476 trace___extent_writepage(page, inode, wbc);
3478 3477
@@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3718 unsigned long i, num_pages; 3717 unsigned long i, num_pages;
3719 unsigned long bio_flags = 0; 3718 unsigned long bio_flags = 0;
3720 unsigned long start, end; 3719 unsigned long start, end;
3721 unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; 3720 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3722 int ret = 0; 3721 int ret = 0;
3723 3722
3724 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); 3723 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -4063,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
4063 if (epd->bio) { 4062 if (epd->bio) {
4064 int ret; 4063 int ret;
4065 4064
4066 bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
4067 epd->sync_io ? REQ_SYNC : 0);
4068
4069 ret = submit_one_bio(epd->bio, 0, epd->bio_flags); 4065 ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
4070 BUG_ON(ret < 0); /* -ENOMEM */ 4066 BUG_ON(ret < 0); /* -ENOMEM */
4071 epd->bio = NULL; 4067 epd->bio = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 128f3e58634f..d94e3f68b9b1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
135 const u64 offset, 135 const u64 offset,
136 const u64 bytes) 136 const u64 bytes)
137{ 137{
138 unsigned long index = offset >> PAGE_SHIFT;
139 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
140 struct page *page;
141
142 while (index <= end_index) {
143 page = find_get_page(inode->i_mapping, index);
144 index++;
145 if (!page)
146 continue;
147 ClearPagePrivate2(page);
148 put_page(page);
149 }
138 return __endio_write_update_ordered(inode, offset + PAGE_SIZE, 150 return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
139 bytes - PAGE_SIZE, false); 151 bytes - PAGE_SIZE, false);
140} 152}
@@ -8357,11 +8369,8 @@ static void btrfs_endio_direct_read(struct bio *bio)
8357 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 8369 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8358 blk_status_t err = bio->bi_status; 8370 blk_status_t err = bio->bi_status;
8359 8371
8360 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) { 8372 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8361 err = btrfs_subio_endio_read(inode, io_bio, err); 8373 err = btrfs_subio_endio_read(inode, io_bio, err);
8362 if (!err)
8363 bio->bi_status = 0;
8364 }
8365 8374
8366 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 8375 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
8367 dip->logical_offset + dip->bytes - 1); 8376 dip->logical_offset + dip->bytes - 1);
@@ -8369,7 +8378,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
8369 8378
8370 kfree(dip); 8379 kfree(dip);
8371 8380
8372 dio_bio->bi_status = bio->bi_status; 8381 dio_bio->bi_status = err;
8373 dio_end_io(dio_bio); 8382 dio_end_io(dio_bio);
8374 8383
8375 if (io_bio->end_io) 8384 if (io_bio->end_io)
@@ -8387,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode,
8387 btrfs_work_func_t func; 8396 btrfs_work_func_t func;
8388 u64 ordered_offset = offset; 8397 u64 ordered_offset = offset;
8389 u64 ordered_bytes = bytes; 8398 u64 ordered_bytes = bytes;
8399 u64 last_offset;
8390 int ret; 8400 int ret;
8391 8401
8392 if (btrfs_is_free_space_inode(BTRFS_I(inode))) { 8402 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -8398,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode,
8398 } 8408 }
8399 8409
8400again: 8410again:
8411 last_offset = ordered_offset;
8401 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 8412 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
8402 &ordered_offset, 8413 &ordered_offset,
8403 ordered_bytes, 8414 ordered_bytes,
@@ -8409,6 +8420,12 @@ again:
8409 btrfs_queue_work(wq, &ordered->work); 8420 btrfs_queue_work(wq, &ordered->work);
8410out_test: 8421out_test:
8411 /* 8422 /*
8423 * If btrfs_dec_test_ordered_pending does not find any ordered extent
8424 * in the range, we can exit.
8425 */
8426 if (ordered_offset == last_offset)
8427 return;
8428 /*
8412 * our bio might span multiple ordered extents. If we haven't 8429 * our bio might span multiple ordered extents. If we haven't
8413 * completed the accounting for the whole dio, go back and try again 8430 * completed the accounting for the whole dio, go back and try again
8414 */ 8431 */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d6715c2bcdc4..6c7a49faf4e0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2773,9 +2773,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
2773 } 2773 }
2774 mutex_unlock(&fs_devices->device_list_mutex); 2774 mutex_unlock(&fs_devices->device_list_mutex);
2775 2775
2776 fi_args->nodesize = fs_info->super_copy->nodesize; 2776 fi_args->nodesize = fs_info->nodesize;
2777 fi_args->sectorsize = fs_info->super_copy->sectorsize; 2777 fi_args->sectorsize = fs_info->sectorsize;
2778 fi_args->clone_alignment = fs_info->super_copy->sectorsize; 2778 fi_args->clone_alignment = fs_info->sectorsize;
2779 2779
2780 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2780 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2781 ret = -EFAULT; 2781 ret = -EFAULT;
@@ -3032,7 +3032,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
3032out: 3032out:
3033 if (ret) 3033 if (ret)
3034 btrfs_cmp_data_free(cmp); 3034 btrfs_cmp_data_free(cmp);
3035 return 0; 3035 return ret;
3036} 3036}
3037 3037
3038static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) 3038static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
@@ -4061,6 +4061,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
4061 ret = PTR_ERR(new_root); 4061 ret = PTR_ERR(new_root);
4062 goto out; 4062 goto out;
4063 } 4063 }
4064 if (!is_fstree(new_root->objectid)) {
4065 ret = -ENOENT;
4066 goto out;
4067 }
4064 4068
4065 path = btrfs_alloc_path(); 4069 path = btrfs_alloc_path();
4066 if (!path) { 4070 if (!path) {
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5c8b61c86e61..e172d4843eae 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
807 } 807 }
808 ret = 0; 808 ret = 0;
809out: 809out:
810 set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags);
811 btrfs_free_path(path); 810 btrfs_free_path(path);
812 return ret; 811 return ret;
813} 812}
@@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
953 if (!fs_info->quota_root) 952 if (!fs_info->quota_root)
954 goto out; 953 goto out;
955 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 954 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
956 set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags);
957 btrfs_qgroup_wait_for_completion(fs_info, false); 955 btrfs_qgroup_wait_for_completion(fs_info, false);
958 spin_lock(&fs_info->qgroup_lock); 956 spin_lock(&fs_info->qgroup_lock);
959 quota_root = fs_info->quota_root; 957 quota_root = fs_info->quota_root;
@@ -1307,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1307 } 1305 }
1308 } 1306 }
1309 ret = del_qgroup_item(trans, quota_root, qgroupid); 1307 ret = del_qgroup_item(trans, quota_root, qgroupid);
1308 if (ret && ret != -ENOENT)
1309 goto out;
1310 1310
1311 while (!list_empty(&qgroup->groups)) { 1311 while (!list_empty(&qgroup->groups)) {
1312 list = list_first_entry(&qgroup->groups, 1312 list = list_first_entry(&qgroup->groups,
@@ -2086,8 +2086,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2086 2086
2087 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2087 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
2088 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2088 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
2089 if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags))
2090 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
2091 2089
2092 spin_lock(&fs_info->qgroup_lock); 2090 spin_lock(&fs_info->qgroup_lock);
2093 while (!list_empty(&fs_info->dirty_qgroups)) { 2091 while (!list_empty(&fs_info->dirty_qgroups)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3a49a3c2fca4..9841faef08ea 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list)
2400 while (!list_empty(list)) { 2400 while (!list_empty(list)) {
2401 reloc_root = list_entry(list->next, struct btrfs_root, 2401 reloc_root = list_entry(list->next, struct btrfs_root,
2402 root_list); 2402 root_list);
2403 __del_reloc_root(reloc_root);
2403 free_extent_buffer(reloc_root->node); 2404 free_extent_buffer(reloc_root->node);
2404 free_extent_buffer(reloc_root->commit_root); 2405 free_extent_buffer(reloc_root->commit_root);
2405 reloc_root->node = NULL; 2406 reloc_root->node = NULL;
2406 reloc_root->commit_root = NULL; 2407 reloc_root->commit_root = NULL;
2407 __del_reloc_root(reloc_root);
2408 } 2408 }
2409} 2409}
2410 2410
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 32b043ef8ac9..8fd195cfe81b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2630,7 +2630,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
2630 } else { 2630 } else {
2631 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", 2631 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
2632 (int)(mode & S_IFMT)); 2632 (int)(mode & S_IFMT));
2633 ret = -ENOTSUPP; 2633 ret = -EOPNOTSUPP;
2634 goto out; 2634 goto out;
2635 } 2635 }
2636 2636
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ad7f4bab640b..c800d067fcbf 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4181 struct extent_map *em, *n; 4181 struct extent_map *em, *n;
4182 struct list_head extents; 4182 struct list_head extents;
4183 struct extent_map_tree *tree = &inode->extent_tree; 4183 struct extent_map_tree *tree = &inode->extent_tree;
4184 u64 logged_start, logged_end;
4184 u64 test_gen; 4185 u64 test_gen;
4185 int ret = 0; 4186 int ret = 0;
4186 int num = 0; 4187 int num = 0;
@@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4190 down_write(&inode->dio_sem); 4191 down_write(&inode->dio_sem);
4191 write_lock(&tree->lock); 4192 write_lock(&tree->lock);
4192 test_gen = root->fs_info->last_trans_committed; 4193 test_gen = root->fs_info->last_trans_committed;
4194 logged_start = start;
4195 logged_end = end;
4193 4196
4194 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 4197 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
4195 list_del_init(&em->list); 4198 list_del_init(&em->list);
4196
4197 /* 4199 /*
4198 * Just an arbitrary number, this can be really CPU intensive 4200 * Just an arbitrary number, this can be really CPU intensive
4199 * once we start getting a lot of extents, and really once we 4201 * once we start getting a lot of extents, and really once we
@@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4208 4210
4209 if (em->generation <= test_gen) 4211 if (em->generation <= test_gen)
4210 continue; 4212 continue;
4213
4214 if (em->start < logged_start)
4215 logged_start = em->start;
4216 if ((em->start + em->len - 1) > logged_end)
4217 logged_end = em->start + em->len - 1;
4218
4211 /* Need a ref to keep it from getting evicted from cache */ 4219 /* Need a ref to keep it from getting evicted from cache */
4212 refcount_inc(&em->refs); 4220 refcount_inc(&em->refs);
4213 set_bit(EXTENT_FLAG_LOGGING, &em->flags); 4221 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
@@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4216 } 4224 }
4217 4225
4218 list_sort(NULL, &extents, extent_cmp); 4226 list_sort(NULL, &extents, extent_cmp);
4219 btrfs_get_logged_extents(inode, logged_list, start, end); 4227 btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
4220 /* 4228 /*
4221 * Some ordered extents started by fsync might have completed 4229 * Some ordered extents started by fsync might have completed
4222 * before we could collect them into the list logged_list, which 4230 * before we could collect them into the list logged_list, which
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..b39737568c22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6166,7 +6166,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6166 map_length = length; 6166 map_length = length;
6167 6167
6168 btrfs_bio_counter_inc_blocked(fs_info); 6168 btrfs_bio_counter_inc_blocked(fs_info);
6169 ret = __btrfs_map_block(fs_info, bio_op(bio), logical, 6169 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6170 &map_length, &bbio, mirror_num, 1); 6170 &map_length, &bbio, mirror_num, 1);
6171 if (ret) { 6171 if (ret) {
6172 btrfs_bio_counter_dec(fs_info); 6172 btrfs_bio_counter_dec(fs_info);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9dd6b836ac9e..f23c820daaed 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,7 +7,6 @@
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/debugfs.h> 8#include <linux/debugfs.h>
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/utsname.h>
11#include <linux/ratelimit.h> 10#include <linux/ratelimit.h>
12 11
13#include "super.h" 12#include "super.h"
@@ -735,12 +734,13 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
735 inode = req->r_inode; 734 inode = req->r_inode;
736 ihold(inode); 735 ihold(inode);
737 } else { 736 } else {
738 /* req->r_dentry is non-null for LSSNAP request. 737 /* req->r_dentry is non-null for LSSNAP request */
739 * fall-thru */ 738 rcu_read_lock();
740 WARN_ON_ONCE(!req->r_dentry); 739 inode = get_nonsnap_parent(req->r_dentry);
740 rcu_read_unlock();
741 dout("__choose_mds using snapdir's parent %p\n", inode);
741 } 742 }
742 } 743 } else if (req->r_dentry) {
743 if (!inode && req->r_dentry) {
744 /* ignore race with rename; old or new d_parent is okay */ 744 /* ignore race with rename; old or new d_parent is okay */
745 struct dentry *parent; 745 struct dentry *parent;
746 struct inode *dir; 746 struct inode *dir;
@@ -884,8 +884,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
884 void *p; 884 void *p;
885 885
886 const char* metadata[][2] = { 886 const char* metadata[][2] = {
887 {"hostname", utsname()->nodename}, 887 {"hostname", mdsc->nodename},
888 {"kernel_version", utsname()->release}, 888 {"kernel_version", init_utsname()->release},
889 {"entity_id", opt->name ? : ""}, 889 {"entity_id", opt->name ? : ""},
890 {"root", fsopt->server_path ? : "/"}, 890 {"root", fsopt->server_path ? : "/"},
891 {NULL, NULL} 891 {NULL, NULL}
@@ -3539,6 +3539,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
3539 init_rwsem(&mdsc->pool_perm_rwsem); 3539 init_rwsem(&mdsc->pool_perm_rwsem);
3540 mdsc->pool_perm_tree = RB_ROOT; 3540 mdsc->pool_perm_tree = RB_ROOT;
3541 3541
3542 strncpy(mdsc->nodename, utsname()->nodename,
3543 sizeof(mdsc->nodename) - 1);
3542 return 0; 3544 return 0;
3543} 3545}
3544 3546
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index db57ae98ed34..636d6b2ec49c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,6 +8,7 @@
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9#include <linux/spinlock.h> 9#include <linux/spinlock.h>
10#include <linux/refcount.h> 10#include <linux/refcount.h>
11#include <linux/utsname.h>
11 12
12#include <linux/ceph/types.h> 13#include <linux/ceph/types.h>
13#include <linux/ceph/messenger.h> 14#include <linux/ceph/messenger.h>
@@ -368,6 +369,8 @@ struct ceph_mds_client {
368 369
369 struct rw_semaphore pool_perm_rwsem; 370 struct rw_semaphore pool_perm_rwsem;
370 struct rb_root pool_perm_tree; 371 struct rb_root pool_perm_tree;
372
373 char nodename[__NEW_UTS_LEN + 1];
371}; 374};
372 375
373extern const char *ceph_mds_op_name(int op); 376extern const char *ceph_mds_op_name(int op);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 1ffc8b426c1c..7fc0b850c352 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -374,12 +374,10 @@ static int build_snap_context(struct ceph_snap_realm *realm,
374 realm->ino, realm, snapc, snapc->seq, 374 realm->ino, realm, snapc, snapc->seq,
375 (unsigned int) snapc->num_snaps); 375 (unsigned int) snapc->num_snaps);
376 376
377 if (realm->cached_context) { 377 ceph_put_snap_context(realm->cached_context);
378 ceph_put_snap_context(realm->cached_context);
379 /* queue realm for cap_snap creation */
380 list_add_tail(&realm->dirty_item, dirty_realms);
381 }
382 realm->cached_context = snapc; 378 realm->cached_context = snapc;
379 /* queue realm for cap_snap creation */
380 list_add_tail(&realm->dirty_item, dirty_realms);
383 return 0; 381 return 0;
384 382
385fail: 383fail:
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9727e1dcacd5..cbb9534b89b4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
160 if ((ses->serverDomain == NULL) || 160 if ((ses->serverDomain == NULL) ||
161 (ses->serverOS == NULL) || 161 (ses->serverOS == NULL) ||
162 (ses->serverNOS == NULL)) { 162 (ses->serverNOS == NULL)) {
163 seq_printf(m, "\n%d) entry for %s not fully " 163 seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
164 "displayed\n\t", i, ses->serverName); 164 i, ses->serverName, ses->ses_count,
165 ses->capabilities, ses->status);
166 if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
167 seq_printf(m, "Guest\t");
168 else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
169 seq_printf(m, "Anonymous\t");
165 } else { 170 } else {
166 seq_printf(m, 171 seq_printf(m,
167 "\n%d) Name: %s Domain: %s Uses: %d OS:" 172 "\n%d) Name: %s Domain: %s Uses: %d OS:"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 180b3356ff86..8c8b75d33f31 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
461 seq_puts(s, ",nocase"); 461 seq_puts(s, ",nocase");
462 if (tcon->retry) 462 if (tcon->retry)
463 seq_puts(s, ",hard"); 463 seq_puts(s, ",hard");
464 else
465 seq_puts(s, ",soft");
464 if (tcon->use_persistent) 466 if (tcon->use_persistent)
465 seq_puts(s, ",persistenthandles"); 467 seq_puts(s, ",persistenthandles");
466 else if (tcon->use_resilient) 468 else if (tcon->use_resilient)
@@ -1447,7 +1449,7 @@ exit_cifs(void)
1447 exit_cifs_idmap(); 1449 exit_cifs_idmap();
1448#endif 1450#endif
1449#ifdef CONFIG_CIFS_UPCALL 1451#ifdef CONFIG_CIFS_UPCALL
1450 unregister_key_type(&cifs_spnego_key_type); 1452 exit_cifs_spnego();
1451#endif 1453#endif
1452 cifs_destroy_request_bufs(); 1454 cifs_destroy_request_bufs();
1453 cifs_destroy_mids(); 1455 cifs_destroy_mids();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30bf89b1fd9a..5a10e566f0e6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
149extern const struct export_operations cifs_export_ops; 149extern const struct export_operations cifs_export_ops;
150#endif /* CONFIG_CIFS_NFSD_EXPORT */ 150#endif /* CONFIG_CIFS_NFSD_EXPORT */
151 151
152#define CIFS_VERSION "2.09" 152#define CIFS_VERSION "2.10"
153#endif /* _CIFSFS_H */ 153#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 808486c29f0d..de5b2e1fcce5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ enum smb_version {
188#ifdef CONFIG_CIFS_SMB311 188#ifdef CONFIG_CIFS_SMB311
189 Smb_311, 189 Smb_311,
190#endif /* SMB311 */ 190#endif /* SMB311 */
191 Smb_3any,
192 Smb_default,
191 Smb_version_err 193 Smb_version_err
192}; 194};
193 195
@@ -1701,6 +1703,10 @@ extern struct smb_version_values smb20_values;
1701#define SMB21_VERSION_STRING "2.1" 1703#define SMB21_VERSION_STRING "2.1"
1702extern struct smb_version_operations smb21_operations; 1704extern struct smb_version_operations smb21_operations;
1703extern struct smb_version_values smb21_values; 1705extern struct smb_version_values smb21_values;
1706#define SMBDEFAULT_VERSION_STRING "default"
1707extern struct smb_version_values smbdefault_values;
1708#define SMB3ANY_VERSION_STRING "3"
1709extern struct smb_version_values smb3any_values;
1704#define SMB30_VERSION_STRING "3.0" 1710#define SMB30_VERSION_STRING "3.0"
1705extern struct smb_version_operations smb30_operations; 1711extern struct smb_version_operations smb30_operations;
1706extern struct smb_version_values smb30_values; 1712extern struct smb_version_values smb30_values;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5aa2d278ca84..0bfc2280436d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -301,6 +301,8 @@ static const match_table_t cifs_smb_version_tokens = {
301 { Smb_311, SMB311_VERSION_STRING }, 301 { Smb_311, SMB311_VERSION_STRING },
302 { Smb_311, ALT_SMB311_VERSION_STRING }, 302 { Smb_311, ALT_SMB311_VERSION_STRING },
303#endif /* SMB311 */ 303#endif /* SMB311 */
304 { Smb_3any, SMB3ANY_VERSION_STRING },
305 { Smb_default, SMBDEFAULT_VERSION_STRING },
304 { Smb_version_err, NULL } 306 { Smb_version_err, NULL }
305}; 307};
306 308
@@ -1148,6 +1150,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
1148 vol->vals = &smb311_values; 1150 vol->vals = &smb311_values;
1149 break; 1151 break;
1150#endif /* SMB311 */ 1152#endif /* SMB311 */
1153 case Smb_3any:
1154 vol->ops = &smb30_operations; /* currently identical with 3.0 */
1155 vol->vals = &smb3any_values;
1156 break;
1157 case Smb_default:
1158 vol->ops = &smb30_operations; /* currently identical with 3.0 */
1159 vol->vals = &smbdefault_values;
1160 break;
1151 default: 1161 default:
1152 cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); 1162 cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
1153 return 1; 1163 return 1;
@@ -1274,9 +1284,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1274 1284
1275 vol->actimeo = CIFS_DEF_ACTIMEO; 1285 vol->actimeo = CIFS_DEF_ACTIMEO;
1276 1286
1277 /* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */ 1287 /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
1278 vol->ops = &smb30_operations; /* both secure and accepted widely */ 1288 vol->ops = &smb30_operations;
1279 vol->vals = &smb30_values; 1289 vol->vals = &smbdefault_values;
1280 1290
1281 vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT; 1291 vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
1282 1292
@@ -1988,11 +1998,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1988 1998
1989 if (got_version == false) 1999 if (got_version == false)
1990 pr_warn("No dialect specified on mount. Default has changed to " 2000 pr_warn("No dialect specified on mount. Default has changed to "
1991 "a more secure dialect, SMB3 (vers=3.0), from CIFS " 2001 "a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS "
1992 "(SMB1). To use the less secure SMB1 dialect to access " 2002 "(SMB1). To use the less secure SMB1 dialect to access "
1993 "old servers which do not support SMB3 specify vers=1.0" 2003 "old servers which do not support SMB3 (or SMB2.1) specify vers=1.0"
1994 " on mount. For somewhat newer servers such as Windows " 2004 " on mount.\n");
1995 "7 try vers=2.1.\n");
1996 2005
1997 kfree(mountdata_copy); 2006 kfree(mountdata_copy);
1998 return 0; 2007 return 0;
@@ -2133,6 +2142,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
2133 if (vol->nosharesock) 2142 if (vol->nosharesock)
2134 return 0; 2143 return 0;
2135 2144
2145 /* BB update this for smb3any and default case */
2136 if ((server->vals != vol->vals) || (server->ops != vol->ops)) 2146 if ((server->vals != vol->vals) || (server->ops != vol->ops))
2137 return 0; 2147 return 0;
2138 2148
@@ -4144,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
4144 cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n", 4154 cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n",
4145 server->sec_mode, server->capabilities, server->timeAdj); 4155 server->sec_mode, server->capabilities, server->timeAdj);
4146 4156
4157 if (ses->auth_key.response) {
4158 cifs_dbg(VFS, "Free previous auth_key.response = %p\n",
4159 ses->auth_key.response);
4160 kfree(ses->auth_key.response);
4161 ses->auth_key.response = NULL;
4162 ses->auth_key.len = 0;
4163 }
4164
4147 if (server->ops->sess_setup) 4165 if (server->ops->sess_setup)
4148 rc = server->ops->sess_setup(xid, ses, nls_info); 4166 rc = server->ops->sess_setup(xid, ses, nls_info);
4149 4167
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0786f19d288f..92fdf9c35de2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
224 if (backup_cred(cifs_sb)) 224 if (backup_cred(cifs_sb))
225 create_options |= CREATE_OPEN_BACKUP_INTENT; 225 create_options |= CREATE_OPEN_BACKUP_INTENT;
226 226
227 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
228 if (f_flags & O_SYNC)
229 create_options |= CREATE_WRITE_THROUGH;
230
231 if (f_flags & O_DIRECT)
232 create_options |= CREATE_NO_BUFFER;
233
227 oparms.tcon = tcon; 234 oparms.tcon = tcon;
228 oparms.cifs_sb = cifs_sb; 235 oparms.cifs_sb = cifs_sb;
229 oparms.desired_access = desired_access; 236 oparms.desired_access = desired_access;
@@ -1102,8 +1109,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1102 struct cifs_tcon *tcon; 1109 struct cifs_tcon *tcon;
1103 unsigned int num, max_num, max_buf; 1110 unsigned int num, max_num, max_buf;
1104 LOCKING_ANDX_RANGE *buf, *cur; 1111 LOCKING_ANDX_RANGE *buf, *cur;
1105 int types[] = {LOCKING_ANDX_LARGE_FILES, 1112 static const int types[] = {
1106 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; 1113 LOCKING_ANDX_LARGE_FILES,
1114 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1115 };
1107 int i; 1116 int i;
1108 1117
1109 xid = get_xid(); 1118 xid = get_xid();
@@ -1434,8 +1443,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1434 unsigned int xid) 1443 unsigned int xid)
1435{ 1444{
1436 int rc = 0, stored_rc; 1445 int rc = 0, stored_rc;
1437 int types[] = {LOCKING_ANDX_LARGE_FILES, 1446 static const int types[] = {
1438 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; 1447 LOCKING_ANDX_LARGE_FILES,
1448 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1449 };
1439 unsigned int i; 1450 unsigned int i;
1440 unsigned int max_num, num, max_buf; 1451 unsigned int max_num, num, max_buf;
1441 LOCKING_ANDX_RANGE *buf, *cur; 1452 LOCKING_ANDX_RANGE *buf, *cur;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8693632235f..7c732cb44164 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
234 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); 234 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
235 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime); 235 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
236 fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange); 236 fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
237 /* old POSIX extensions don't get create time */
238
237 fattr->cf_mode = le64_to_cpu(info->Permissions); 239 fattr->cf_mode = le64_to_cpu(info->Permissions);
238 240
239 /* 241 /*
@@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
2024 stat->blksize = CIFS_MAX_MSGSIZE; 2026 stat->blksize = CIFS_MAX_MSGSIZE;
2025 stat->ino = CIFS_I(inode)->uniqueid; 2027 stat->ino = CIFS_I(inode)->uniqueid;
2026 2028
2029 /* old CIFS Unix Extensions doesn't return create time */
2030 if (CIFS_I(inode)->createtime) {
2031 stat->result_mask |= STATX_BTIME;
2032 stat->btime =
2033 cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
2034 }
2035
2036 stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
2037 if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED)
2038 stat->attributes |= STATX_ATTR_COMPRESSED;
2039 if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED)
2040 stat->attributes |= STATX_ATTR_ENCRYPTED;
2041
2027 /* 2042 /*
2028 * If on a multiuser mount without unix extensions or cifsacl being 2043 * If on a multiuser mount without unix extensions or cifsacl being
2029 * enabled, and the admin hasn't overridden them, set the ownership 2044 * enabled, and the admin hasn't overridden them, set the ownership
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index fb2934b9b97c..0dafdbae1f8c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -426,6 +426,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
426 return rc; 426 return rc;
427} 427}
428 428
429#ifdef CONFIG_CIFS_XATTR
429static ssize_t 430static ssize_t
430move_smb2_ea_to_cifs(char *dst, size_t dst_size, 431move_smb2_ea_to_cifs(char *dst, size_t dst_size,
431 struct smb2_file_full_ea_info *src, size_t src_size, 432 struct smb2_file_full_ea_info *src, size_t src_size,
@@ -613,6 +614,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
613 614
614 return rc; 615 return rc;
615} 616}
617#endif
616 618
617static bool 619static bool
618smb2_can_echo(struct TCP_Server_Info *server) 620smb2_can_echo(struct TCP_Server_Info *server)
@@ -3110,6 +3112,46 @@ struct smb_version_values smb21_values = {
3110 .create_lease_size = sizeof(struct create_lease), 3112 .create_lease_size = sizeof(struct create_lease),
3111}; 3113};
3112 3114
3115struct smb_version_values smb3any_values = {
3116 .version_string = SMB3ANY_VERSION_STRING,
3117 .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
3118 .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
3119 .large_lock_type = 0,
3120 .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
3121 .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
3122 .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
3123 .header_size = sizeof(struct smb2_hdr),
3124 .max_header_size = MAX_SMB2_HDR_SIZE,
3125 .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
3126 .lock_cmd = SMB2_LOCK,
3127 .cap_unix = 0,
3128 .cap_nt_find = SMB2_NT_FIND,
3129 .cap_large_files = SMB2_LARGE_FILES,
3130 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
3131 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
3132 .create_lease_size = sizeof(struct create_lease_v2),
3133};
3134
3135struct smb_version_values smbdefault_values = {
3136 .version_string = SMBDEFAULT_VERSION_STRING,
3137 .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
3138 .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
3139 .large_lock_type = 0,
3140 .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
3141 .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
3142 .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
3143 .header_size = sizeof(struct smb2_hdr),
3144 .max_header_size = MAX_SMB2_HDR_SIZE,
3145 .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
3146 .lock_cmd = SMB2_LOCK,
3147 .cap_unix = 0,
3148 .cap_nt_find = SMB2_NT_FIND,
3149 .cap_large_files = SMB2_LARGE_FILES,
3150 .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
3151 .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
3152 .create_lease_size = sizeof(struct create_lease_v2),
3153};
3154
3113struct smb_version_values smb30_values = { 3155struct smb_version_values smb30_values = {
3114 .version_string = SMB30_VERSION_STRING, 3156 .version_string = SMB30_VERSION_STRING,
3115 .protocol_id = SMB30_PROT_ID, 3157 .protocol_id = SMB30_PROT_ID,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5531e7ee1210..6f0e6343c15e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req)
439 build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); 439 build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
440 req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); 440 req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
441 req->NegotiateContextCount = cpu_to_le16(2); 441 req->NegotiateContextCount = cpu_to_le16(2);
442 inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2 442 inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
443 + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ 443 + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
444} 444}
445#else 445#else
@@ -491,10 +491,25 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
491 491
492 req->hdr.sync_hdr.SessionId = 0; 492 req->hdr.sync_hdr.SessionId = 0;
493 493
494 req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); 494 if (strcmp(ses->server->vals->version_string,
495 495 SMB3ANY_VERSION_STRING) == 0) {
496 req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */ 496 req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
497 inc_rfc1001_len(req, 2); 497 req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
498 req->DialectCount = cpu_to_le16(2);
499 inc_rfc1001_len(req, 4);
500 } else if (strcmp(ses->server->vals->version_string,
501 SMBDEFAULT_VERSION_STRING) == 0) {
502 req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
503 req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
504 req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
505 req->DialectCount = cpu_to_le16(3);
506 inc_rfc1001_len(req, 6);
507 } else {
508 /* otherwise send specific dialect */
509 req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
510 req->DialectCount = cpu_to_le16(1);
511 inc_rfc1001_len(req, 2);
512 }
498 513
499 /* only one of SMB2 signing flags may be set in SMB2 request */ 514 /* only one of SMB2 signing flags may be set in SMB2 request */
500 if (ses->sign) 515 if (ses->sign)
@@ -528,16 +543,43 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
528 */ 543 */
529 if (rc == -EOPNOTSUPP) { 544 if (rc == -EOPNOTSUPP) {
530 cifs_dbg(VFS, "Dialect not supported by server. Consider " 545 cifs_dbg(VFS, "Dialect not supported by server. Consider "
531 "specifying vers=1.0 or vers=2.1 on mount for accessing" 546 "specifying vers=1.0 or vers=2.0 on mount for accessing"
532 " older servers\n"); 547 " older servers\n");
533 goto neg_exit; 548 goto neg_exit;
534 } else if (rc != 0) 549 } else if (rc != 0)
535 goto neg_exit; 550 goto neg_exit;
536 551
552 if (strcmp(ses->server->vals->version_string,
553 SMB3ANY_VERSION_STRING) == 0) {
554 if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
555 cifs_dbg(VFS,
556 "SMB2 dialect returned but not requested\n");
557 return -EIO;
558 } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
559 cifs_dbg(VFS,
560 "SMB2.1 dialect returned but not requested\n");
561 return -EIO;
562 }
563 } else if (strcmp(ses->server->vals->version_string,
564 SMBDEFAULT_VERSION_STRING) == 0) {
565 if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
566 cifs_dbg(VFS,
567 "SMB2 dialect returned but not requested\n");
568 return -EIO;
569 } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
570 /* ops set to 3.0 by default for default so update */
571 ses->server->ops = &smb21_operations;
572 }
573 } else if (le16_to_cpu(rsp->DialectRevision) !=
574 ses->server->vals->protocol_id) {
575 /* if requested single dialect ensure returned dialect matched */
576 cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n",
577 le16_to_cpu(rsp->DialectRevision));
578 return -EIO;
579 }
580
537 cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); 581 cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
538 582
539 /* BB we may eventually want to match the negotiated vs. requested
540 dialect, even though we are only requesting one at a time */
541 if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) 583 if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
542 cifs_dbg(FYI, "negotiated smb2.0 dialect\n"); 584 cifs_dbg(FYI, "negotiated smb2.0 dialect\n");
543 else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) 585 else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
@@ -558,6 +600,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
558 } 600 }
559 server->dialect = le16_to_cpu(rsp->DialectRevision); 601 server->dialect = le16_to_cpu(rsp->DialectRevision);
560 602
603 /* BB: add check that dialect was valid given dialect(s) we asked for */
604
561 /* SMB2 only has an extended negflavor */ 605 /* SMB2 only has an extended negflavor */
562 server->negflavor = CIFS_NEGFLAVOR_EXTENDED; 606 server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
563 /* set it to the maximum buffer size value we can send with 1 credit */ 607 /* set it to the maximum buffer size value we can send with 1 credit */
@@ -606,20 +650,28 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
606 struct validate_negotiate_info_req vneg_inbuf; 650 struct validate_negotiate_info_req vneg_inbuf;
607 struct validate_negotiate_info_rsp *pneg_rsp; 651 struct validate_negotiate_info_rsp *pneg_rsp;
608 u32 rsplen; 652 u32 rsplen;
653 u32 inbuflen; /* max of 4 dialects */
609 654
610 cifs_dbg(FYI, "validate negotiate\n"); 655 cifs_dbg(FYI, "validate negotiate\n");
611 656
612 /* 657 /*
613 * validation ioctl must be signed, so no point sending this if we 658 * validation ioctl must be signed, so no point sending this if we
614 * can not sign it. We could eventually change this to selectively 659 * can not sign it (ie are not known user). Even if signing is not
660 * required (enabled but not negotiated), in those cases we selectively
615 * sign just this, the first and only signed request on a connection. 661 * sign just this, the first and only signed request on a connection.
616 * This is good enough for now since a user who wants better security 662 * Having validation of negotiate info helps reduce attack vectors.
617 * would also enable signing on the mount. Having validation of
618 * negotiate info for signed connections helps reduce attack vectors
619 */ 663 */
620 if (tcon->ses->server->sign == false) 664 if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
621 return 0; /* validation requires signing */ 665 return 0; /* validation requires signing */
622 666
667 if (tcon->ses->user_name == NULL) {
668 cifs_dbg(FYI, "Can't validate negotiate: null user mount\n");
669 return 0; /* validation requires signing */
670 }
671
672 if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
673 cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
674
623 vneg_inbuf.Capabilities = 675 vneg_inbuf.Capabilities =
624 cpu_to_le32(tcon->ses->server->vals->req_capabilities); 676 cpu_to_le32(tcon->ses->server->vals->req_capabilities);
625 memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, 677 memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
@@ -634,9 +686,30 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
634 else 686 else
635 vneg_inbuf.SecurityMode = 0; 687 vneg_inbuf.SecurityMode = 0;
636 688
637 vneg_inbuf.DialectCount = cpu_to_le16(1); 689
638 vneg_inbuf.Dialects[0] = 690 if (strcmp(tcon->ses->server->vals->version_string,
639 cpu_to_le16(tcon->ses->server->vals->protocol_id); 691 SMB3ANY_VERSION_STRING) == 0) {
692 vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
693 vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
694 vneg_inbuf.DialectCount = cpu_to_le16(2);
695 /* structure is big enough for 3 dialects, sending only 2 */
696 inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
697 } else if (strcmp(tcon->ses->server->vals->version_string,
698 SMBDEFAULT_VERSION_STRING) == 0) {
699 vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
700 vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
701 vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
702 vneg_inbuf.DialectCount = cpu_to_le16(3);
703 /* structure is big enough for 3 dialects */
704 inbuflen = sizeof(struct validate_negotiate_info_req);
705 } else {
706 /* otherwise specific dialect was requested */
707 vneg_inbuf.Dialects[0] =
708 cpu_to_le16(tcon->ses->server->vals->protocol_id);
709 vneg_inbuf.DialectCount = cpu_to_le16(1);
710 /* structure is big enough for 3 dialects, sending only 1 */
711 inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
712 }
640 713
641 rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, 714 rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
642 FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, 715 FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
@@ -1110,6 +1183,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
1110 while (sess_data->func) 1183 while (sess_data->func)
1111 sess_data->func(sess_data); 1184 sess_data->func(sess_data);
1112 1185
1186 if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign))
1187 cifs_dbg(VFS, "signing requested but authenticated as guest\n");
1113 rc = sess_data->result; 1188 rc = sess_data->result;
1114out: 1189out:
1115 kfree(sess_data); 1190 kfree(sess_data);
@@ -1634,7 +1709,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
1634 struct cifs_tcon *tcon = oparms->tcon; 1709 struct cifs_tcon *tcon = oparms->tcon;
1635 struct cifs_ses *ses = tcon->ses; 1710 struct cifs_ses *ses = tcon->ses;
1636 struct kvec iov[4]; 1711 struct kvec iov[4];
1637 struct kvec rsp_iov; 1712 struct kvec rsp_iov = {NULL, 0};
1638 int resp_buftype; 1713 int resp_buftype;
1639 int uni_path_len; 1714 int uni_path_len;
1640 __le16 *copy_path = NULL; 1715 __le16 *copy_path = NULL;
@@ -1763,7 +1838,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
1763 1838
1764 if (rc != 0) { 1839 if (rc != 0) {
1765 cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); 1840 cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
1766 if (err_buf) 1841 if (err_buf && rsp)
1767 *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4, 1842 *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4,
1768 GFP_KERNEL); 1843 GFP_KERNEL);
1769 goto creat_exit; 1844 goto creat_exit;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 393ed5f4e1b6..6c9653a130c8 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -716,7 +716,7 @@ struct validate_negotiate_info_req {
716 __u8 Guid[SMB2_CLIENT_GUID_SIZE]; 716 __u8 Guid[SMB2_CLIENT_GUID_SIZE];
717 __le16 SecurityMode; 717 __le16 SecurityMode;
718 __le16 DialectCount; 718 __le16 DialectCount;
719 __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */ 719 __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */
720} __packed; 720} __packed;
721 721
722struct validate_negotiate_info_rsp { 722struct validate_negotiate_info_rsp {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5fa2211e49ae..62cf812ed0e5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
229{ 229{
230 loff_t offset = dio->iocb->ki_pos; 230 loff_t offset = dio->iocb->ki_pos;
231 ssize_t transferred = 0; 231 ssize_t transferred = 0;
232 int err;
232 233
233 /* 234 /*
234 * AIO submission can race with bio completion to get here while 235 * AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
258 if (ret == 0) 259 if (ret == 0)
259 ret = transferred; 260 ret = transferred;
260 261
262 /*
263 * Try again to invalidate clean pages which might have been cached by
264 * non-direct readahead, or faulted in by get_user_pages() if the source
265 * of the write was an mmap'ed region of the file we're writing. Either
266 * one is a pretty crazy thing to do, so we don't support it 100%. If
267 * this invalidation fails, tough, the write still worked...
268 */
269 if (ret > 0 && dio->op == REQ_OP_WRITE &&
270 dio->inode->i_mapping->nrpages) {
271 err = invalidate_inode_pages2_range(dio->inode->i_mapping,
272 offset >> PAGE_SHIFT,
273 (offset + ret - 1) >> PAGE_SHIFT);
274 WARN_ON_ONCE(err);
275 }
276
261 if (dio->end_io) { 277 if (dio->end_io) {
262 int err;
263 278
264 // XXX: ki_pos?? 279 // XXX: ki_pos??
265 err = dio->end_io(dio->iocb, offset, ret, dio->private); 280 err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
304 struct dio *dio = bio->bi_private; 319 struct dio *dio = bio->bi_private;
305 unsigned long remaining; 320 unsigned long remaining;
306 unsigned long flags; 321 unsigned long flags;
322 bool defer_completion = false;
307 323
308 /* cleanup the bio */ 324 /* cleanup the bio */
309 dio_bio_complete(dio, bio); 325 dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
315 spin_unlock_irqrestore(&dio->bio_lock, flags); 331 spin_unlock_irqrestore(&dio->bio_lock, flags);
316 332
317 if (remaining == 0) { 333 if (remaining == 0) {
318 if (dio->result && dio->defer_completion) { 334 /*
335 * Defer completion when defer_completion is set or
336 * when the inode has pages mapped and this is AIO write.
337 * We need to invalidate those pages because there is a
338 * chance they contain stale data in the case buffered IO
339 * went in between AIO submission and completion into the
340 * same region.
341 */
342 if (dio->result)
343 defer_completion = dio->defer_completion ||
344 (dio->op == REQ_OP_WRITE &&
345 dio->inode->i_mapping->nrpages);
346 if (defer_completion) {
319 INIT_WORK(&dio->complete_work, dio_aio_complete_work); 347 INIT_WORK(&dio->complete_work, dio_aio_complete_work);
320 queue_work(dio->inode->i_sb->s_dio_done_wq, 348 queue_work(dio->inode->i_sb->s_dio_done_wq,
321 &dio->complete_work); 349 &dio->complete_work);
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1210 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue 1238 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
1211 * so that we can call ->fsync. 1239 * so that we can call ->fsync.
1212 */ 1240 */
1213 if (dio->is_async && iov_iter_rw(iter) == WRITE && 1241 if (dio->is_async && iov_iter_rw(iter) == WRITE) {
1214 ((iocb->ki_filp->f_flags & O_DSYNC) || 1242 retval = 0;
1215 IS_SYNC(iocb->ki_filp->f_mapping->host))) { 1243 if ((iocb->ki_filp->f_flags & O_DSYNC) ||
1216 retval = dio_set_defer_completion(dio); 1244 IS_SYNC(iocb->ki_filp->f_mapping->host))
1245 retval = dio_set_defer_completion(dio);
1246 else if (!dio->inode->i_sb->s_dio_done_wq) {
1247 /*
1248 * In case of AIO write racing with buffered read we
1249 * need to defer completion. We can't decide this now,
1250 * however the workqueue needs to be initialized here.
1251 */
1252 retval = sb_init_dio_done_wq(dio->inode->i_sb);
1253 }
1217 if (retval) { 1254 if (retval) {
1218 /* 1255 /*
1219 * We grab i_mutex only for reads so we don't have 1256 * We grab i_mutex only for reads so we don't have
diff --git a/fs/exec.c b/fs/exec.c
index ac34d9724684..5470d3c1892a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1410,7 +1410,7 @@ static void free_bprm(struct linux_binprm *bprm)
1410 kfree(bprm); 1410 kfree(bprm);
1411} 1411}
1412 1412
1413int bprm_change_interp(char *interp, struct linux_binprm *bprm) 1413int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
1414{ 1414{
1415 /* If a binfmt changed the interp, free it first. */ 1415 /* If a binfmt changed the interp, free it first. */
1416 if (bprm->interp != bprm->filename) 1416 if (bprm->interp != bprm->filename)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 0491da3b28c3..448a1119f0be 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -749,7 +749,7 @@ static void send_sigio_to_task(struct task_struct *p,
749 * specific si_codes. In that case use SI_SIGIO instead 749 * specific si_codes. In that case use SI_SIGIO instead
750 * to remove the ambiguity. 750 * to remove the ambiguity.
751 */ 751 */
752 if (sig_specific_sicodes(signum)) 752 if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
753 si.si_code = SI_SIGIO; 753 si.si_code = SI_SIGIO;
754 754
755 /* Make sure we are called with one of the POLL_* 755 /* Make sure we are called with one of the POLL_*
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 98e845b7841b..11066d8647d2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1945,13 +1945,9 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1945{ 1945{
1946 struct gfs2_glock_iter *gi = seq->private; 1946 struct gfs2_glock_iter *gi = seq->private;
1947 loff_t n = *pos; 1947 loff_t n = *pos;
1948 int ret;
1949
1950 if (gi->last_pos <= *pos)
1951 n = (*pos - gi->last_pos);
1952 1948
1953 ret = rhashtable_walk_start(&gi->hti); 1949 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
1954 if (ret) 1950 if (rhashtable_walk_start(&gi->hti) != 0)
1955 return NULL; 1951 return NULL;
1956 1952
1957 do { 1953 do {
@@ -1959,6 +1955,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1959 } while (gi->gl && n--); 1955 } while (gi->gl && n--);
1960 1956
1961 gi->last_pos = *pos; 1957 gi->last_pos = *pos;
1958
1962 return gi->gl; 1959 return gi->gl;
1963} 1960}
1964 1961
@@ -1970,6 +1967,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1970 (*pos)++; 1967 (*pos)++;
1971 gi->last_pos = *pos; 1968 gi->last_pos = *pos;
1972 gfs2_glock_iter_next(gi); 1969 gfs2_glock_iter_next(gi);
1970
1973 return gi->gl; 1971 return gi->gl;
1974} 1972}
1975 1973
@@ -1980,6 +1978,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1980 1978
1981 gi->gl = NULL; 1979 gi->gl = NULL;
1982 rhashtable_walk_stop(&gi->hti); 1980 rhashtable_walk_stop(&gi->hti);
1981 rhashtable_walk_exit(&gi->hti);
1983} 1982}
1984 1983
1985static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1984static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2042,12 +2041,10 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
2042 struct gfs2_glock_iter *gi = seq->private; 2041 struct gfs2_glock_iter *gi = seq->private;
2043 2042
2044 gi->sdp = inode->i_private; 2043 gi->sdp = inode->i_private;
2045 gi->last_pos = 0;
2046 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2044 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2047 if (seq->buf) 2045 if (seq->buf)
2048 seq->size = GFS2_SEQ_GOODSIZE; 2046 seq->size = GFS2_SEQ_GOODSIZE;
2049 gi->gl = NULL; 2047 gi->gl = NULL;
2050 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2051 } 2048 }
2052 return ret; 2049 return ret;
2053} 2050}
@@ -2063,7 +2060,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
2063 struct gfs2_glock_iter *gi = seq->private; 2060 struct gfs2_glock_iter *gi = seq->private;
2064 2061
2065 gi->gl = NULL; 2062 gi->gl = NULL;
2066 rhashtable_walk_exit(&gi->hti);
2067 return seq_release_private(inode, file); 2063 return seq_release_private(inode, file);
2068} 2064}
2069 2065
diff --git a/fs/iomap.c b/fs/iomap.c
index 269b24a01f32..be61cf742b5e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -713,8 +713,24 @@ struct iomap_dio {
713static ssize_t iomap_dio_complete(struct iomap_dio *dio) 713static ssize_t iomap_dio_complete(struct iomap_dio *dio)
714{ 714{
715 struct kiocb *iocb = dio->iocb; 715 struct kiocb *iocb = dio->iocb;
716 struct inode *inode = file_inode(iocb->ki_filp);
716 ssize_t ret; 717 ssize_t ret;
717 718
719 /*
720 * Try again to invalidate clean pages which might have been cached by
721 * non-direct readahead, or faulted in by get_user_pages() if the source
722 * of the write was an mmap'ed region of the file we're writing. Either
723 * one is a pretty crazy thing to do, so we don't support it 100%. If
724 * this invalidation fails, tough, the write still worked...
725 */
726 if (!dio->error &&
727 (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
728 ret = invalidate_inode_pages2_range(inode->i_mapping,
729 iocb->ki_pos >> PAGE_SHIFT,
730 (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
731 WARN_ON_ONCE(ret);
732 }
733
718 if (dio->end_io) { 734 if (dio->end_io) {
719 ret = dio->end_io(iocb, 735 ret = dio->end_io(iocb,
720 dio->error ? dio->error : dio->size, 736 dio->error ? dio->error : dio->size,
@@ -993,6 +1009,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
993 WARN_ON_ONCE(ret); 1009 WARN_ON_ONCE(ret);
994 ret = 0; 1010 ret = 0;
995 1011
1012 if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
1013 !inode->i_sb->s_dio_done_wq) {
1014 ret = sb_init_dio_done_wq(inode->i_sb);
1015 if (ret < 0)
1016 goto out_free_dio;
1017 }
1018
996 inode_dio_begin(inode); 1019 inode_dio_begin(inode);
997 1020
998 blk_start_plug(&plug); 1021 blk_start_plug(&plug);
@@ -1015,13 +1038,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1015 if (ret < 0) 1038 if (ret < 0)
1016 iomap_dio_set_error(dio, ret); 1039 iomap_dio_set_error(dio, ret);
1017 1040
1018 if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
1019 !inode->i_sb->s_dio_done_wq) {
1020 ret = sb_init_dio_done_wq(inode->i_sb);
1021 if (ret < 0)
1022 iomap_dio_set_error(dio, ret);
1023 }
1024
1025 if (!atomic_dec_and_test(&dio->ref)) { 1041 if (!atomic_dec_and_test(&dio->ref)) {
1026 if (!is_sync_kiocb(iocb)) 1042 if (!is_sync_kiocb(iocb))
1027 return -EIOCBQUEUED; 1043 return -EIOCBQUEUED;
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1042 1058
1043 ret = iomap_dio_complete(dio); 1059 ret = iomap_dio_complete(dio);
1044 1060
1045 /*
1046 * Try again to invalidate clean pages which might have been cached by
1047 * non-direct readahead, or faulted in by get_user_pages() if the source
1048 * of the write was an mmap'ed region of the file we're writing. Either
1049 * one is a pretty crazy thing to do, so we don't support it 100%. If
1050 * this invalidation fails, tough, the write still worked...
1051 */
1052 if (iov_iter_rw(iter) == WRITE) {
1053 int err = invalidate_inode_pages2_range(mapping,
1054 start >> PAGE_SHIFT, end >> PAGE_SHIFT);
1055 WARN_ON_ONCE(err);
1056 }
1057
1058 return ret; 1061 return ret;
1059 1062
1060out_free_dio: 1063out_free_dio:
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index db692f554158..447a24d77b89 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
514 if (sbi->s_fmode != ISOFS_INVALID_MODE) 514 if (sbi->s_fmode != ISOFS_INVALID_MODE)
515 seq_printf(m, ",fmode=%o", sbi->s_fmode); 515 seq_printf(m, ",fmode=%o", sbi->s_fmode);
516 516
517#ifdef CONFIG_JOLIET
517 if (sbi->s_nls_iocharset && 518 if (sbi->s_nls_iocharset &&
518 strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0) 519 strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
519 seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); 520 seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
521#endif
520 return 0; 522 return 0;
521} 523}
522 524
diff --git a/fs/namespace.c b/fs/namespace.c
index 54059b142d6b..3b601f115b6c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -468,7 +468,9 @@ static inline int may_write_real(struct file *file)
468 468
469 /* File refers to upper, writable layer? */ 469 /* File refers to upper, writable layer? */
470 upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); 470 upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
471 if (upperdentry && file_inode(file) == d_inode(upperdentry)) 471 if (upperdentry &&
472 (file_inode(file) == d_inode(upperdentry) ||
473 file_inode(file) == d_inode(dentry)))
472 return 0; 474 return 0;
473 475
474 /* Lower layer: can't write to real file, sorry... */ 476 /* Lower layer: can't write to real file, sorry... */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index aad97b30d5e6..c441f9387a1b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -561,10 +561,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
561 c->tmpfile = true; 561 c->tmpfile = true;
562 err = ovl_copy_up_locked(c); 562 err = ovl_copy_up_locked(c);
563 } else { 563 } else {
564 err = -EIO; 564 err = ovl_lock_rename_workdir(c->workdir, c->destdir);
565 if (lock_rename(c->workdir, c->destdir) != NULL) { 565 if (!err) {
566 pr_err("overlayfs: failed to lock workdir+upperdir\n");
567 } else {
568 err = ovl_copy_up_locked(c); 566 err = ovl_copy_up_locked(c);
569 unlock_rename(c->workdir, c->destdir); 567 unlock_rename(c->workdir, c->destdir);
570 } 568 }
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 3309b1912241..cc961a3bd3bd 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -216,26 +216,6 @@ out_unlock:
216 return err; 216 return err;
217} 217}
218 218
219static int ovl_lock_rename_workdir(struct dentry *workdir,
220 struct dentry *upperdir)
221{
222 /* Workdir should not be the same as upperdir */
223 if (workdir == upperdir)
224 goto err;
225
226 /* Workdir should not be subdir of upperdir and vice versa */
227 if (lock_rename(workdir, upperdir) != NULL)
228 goto err_unlock;
229
230 return 0;
231
232err_unlock:
233 unlock_rename(workdir, upperdir);
234err:
235 pr_err("overlayfs: failed to lock workdir+upperdir\n");
236 return -EIO;
237}
238
239static struct dentry *ovl_clear_empty(struct dentry *dentry, 219static struct dentry *ovl_clear_empty(struct dentry *dentry,
240 struct list_head *list) 220 struct list_head *list)
241{ 221{
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c3addd1114f1..654bea1a5ac9 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -506,6 +506,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
506 506
507 index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); 507 index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
508 if (IS_ERR(index)) { 508 if (IS_ERR(index)) {
509 err = PTR_ERR(index);
509 pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" 510 pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
510 "overlayfs: mount with '-o index=off' to disable inodes index.\n", 511 "overlayfs: mount with '-o index=off' to disable inodes index.\n",
511 d_inode(origin)->i_ino, name.len, name.name, 512 d_inode(origin)->i_ino, name.len, name.name,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d4e8c1a08fb0..c706a6f99928 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -235,6 +235,7 @@ bool ovl_inuse_trylock(struct dentry *dentry);
235void ovl_inuse_unlock(struct dentry *dentry); 235void ovl_inuse_unlock(struct dentry *dentry);
236int ovl_nlink_start(struct dentry *dentry, bool *locked); 236int ovl_nlink_start(struct dentry *dentry, bool *locked);
237void ovl_nlink_end(struct dentry *dentry, bool locked); 237void ovl_nlink_end(struct dentry *dentry, bool locked);
238int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
238 239
239static inline bool ovl_is_impuredir(struct dentry *dentry) 240static inline bool ovl_is_impuredir(struct dentry *dentry)
240{ 241{
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 878a750986dd..25d9b5adcd42 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -37,6 +37,9 @@ struct ovl_fs {
37 bool noxattr; 37 bool noxattr;
38 /* sb common to all layers */ 38 /* sb common to all layers */
39 struct super_block *same_sb; 39 struct super_block *same_sb;
40 /* Did we take the inuse lock? */
41 bool upperdir_locked;
42 bool workdir_locked;
40}; 43};
41 44
42/* private information held for every overlayfs dentry */ 45/* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 62e9b22a2077..0f85ee9c3268 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -988,6 +988,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
988 struct path *lowerstack, unsigned int numlower) 988 struct path *lowerstack, unsigned int numlower)
989{ 989{
990 int err; 990 int err;
991 struct dentry *index = NULL;
991 struct inode *dir = dentry->d_inode; 992 struct inode *dir = dentry->d_inode;
992 struct path path = { .mnt = mnt, .dentry = dentry }; 993 struct path path = { .mnt = mnt, .dentry = dentry };
993 LIST_HEAD(list); 994 LIST_HEAD(list);
@@ -1007,8 +1008,6 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
1007 1008
1008 inode_lock_nested(dir, I_MUTEX_PARENT); 1009 inode_lock_nested(dir, I_MUTEX_PARENT);
1009 list_for_each_entry(p, &list, l_node) { 1010 list_for_each_entry(p, &list, l_node) {
1010 struct dentry *index;
1011
1012 if (p->name[0] == '.') { 1011 if (p->name[0] == '.') {
1013 if (p->len == 1) 1012 if (p->len == 1)
1014 continue; 1013 continue;
@@ -1018,6 +1017,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
1018 index = lookup_one_len(p->name, dentry, p->len); 1017 index = lookup_one_len(p->name, dentry, p->len);
1019 if (IS_ERR(index)) { 1018 if (IS_ERR(index)) {
1020 err = PTR_ERR(index); 1019 err = PTR_ERR(index);
1020 index = NULL;
1021 break; 1021 break;
1022 } 1022 }
1023 err = ovl_verify_index(index, lowerstack, numlower); 1023 err = ovl_verify_index(index, lowerstack, numlower);
@@ -1029,7 +1029,9 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
1029 break; 1029 break;
1030 } 1030 }
1031 dput(index); 1031 dput(index);
1032 index = NULL;
1032 } 1033 }
1034 dput(index);
1033 inode_unlock(dir); 1035 inode_unlock(dir);
1034out: 1036out:
1035 ovl_cache_free(&list); 1037 ovl_cache_free(&list);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fd5ea4facc62..092d150643c1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -211,9 +211,10 @@ static void ovl_put_super(struct super_block *sb)
211 211
212 dput(ufs->indexdir); 212 dput(ufs->indexdir);
213 dput(ufs->workdir); 213 dput(ufs->workdir);
214 ovl_inuse_unlock(ufs->workbasedir); 214 if (ufs->workdir_locked)
215 ovl_inuse_unlock(ufs->workbasedir);
215 dput(ufs->workbasedir); 216 dput(ufs->workbasedir);
216 if (ufs->upper_mnt) 217 if (ufs->upper_mnt && ufs->upperdir_locked)
217 ovl_inuse_unlock(ufs->upper_mnt->mnt_root); 218 ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
218 mntput(ufs->upper_mnt); 219 mntput(ufs->upper_mnt);
219 for (i = 0; i < ufs->numlower; i++) 220 for (i = 0; i < ufs->numlower; i++)
@@ -881,9 +882,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
881 goto out_put_upperpath; 882 goto out_put_upperpath;
882 883
883 err = -EBUSY; 884 err = -EBUSY;
884 if (!ovl_inuse_trylock(upperpath.dentry)) { 885 if (ovl_inuse_trylock(upperpath.dentry)) {
885 pr_err("overlayfs: upperdir is in-use by another mount\n"); 886 ufs->upperdir_locked = true;
887 } else if (ufs->config.index) {
888 pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
886 goto out_put_upperpath; 889 goto out_put_upperpath;
890 } else {
891 pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
887 } 892 }
888 893
889 err = ovl_mount_dir(ufs->config.workdir, &workpath); 894 err = ovl_mount_dir(ufs->config.workdir, &workpath);
@@ -901,9 +906,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
901 } 906 }
902 907
903 err = -EBUSY; 908 err = -EBUSY;
904 if (!ovl_inuse_trylock(workpath.dentry)) { 909 if (ovl_inuse_trylock(workpath.dentry)) {
905 pr_err("overlayfs: workdir is in-use by another mount\n"); 910 ufs->workdir_locked = true;
911 } else if (ufs->config.index) {
912 pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
906 goto out_put_workpath; 913 goto out_put_workpath;
914 } else {
915 pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
907 } 916 }
908 917
909 ufs->workbasedir = workpath.dentry; 918 ufs->workbasedir = workpath.dentry;
@@ -1156,11 +1165,13 @@ out_put_lowerpath:
1156out_free_lowertmp: 1165out_free_lowertmp:
1157 kfree(lowertmp); 1166 kfree(lowertmp);
1158out_unlock_workdentry: 1167out_unlock_workdentry:
1159 ovl_inuse_unlock(workpath.dentry); 1168 if (ufs->workdir_locked)
1169 ovl_inuse_unlock(workpath.dentry);
1160out_put_workpath: 1170out_put_workpath:
1161 path_put(&workpath); 1171 path_put(&workpath);
1162out_unlock_upperdentry: 1172out_unlock_upperdentry:
1163 ovl_inuse_unlock(upperpath.dentry); 1173 if (ufs->upperdir_locked)
1174 ovl_inuse_unlock(upperpath.dentry);
1164out_put_upperpath: 1175out_put_upperpath:
1165 path_put(&upperpath); 1176 path_put(&upperpath);
1166out_free_config: 1177out_free_config:
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 117794582f9f..b9b239fa5cfd 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -430,7 +430,7 @@ void ovl_inuse_unlock(struct dentry *dentry)
430 } 430 }
431} 431}
432 432
433/* Called must hold OVL_I(inode)->oi_lock */ 433/* Caller must hold OVL_I(inode)->lock */
434static void ovl_cleanup_index(struct dentry *dentry) 434static void ovl_cleanup_index(struct dentry *dentry)
435{ 435{
436 struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; 436 struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
@@ -469,6 +469,9 @@ static void ovl_cleanup_index(struct dentry *dentry)
469 err = PTR_ERR(index); 469 err = PTR_ERR(index);
470 if (!IS_ERR(index)) 470 if (!IS_ERR(index))
471 err = ovl_cleanup(dir, index); 471 err = ovl_cleanup(dir, index);
472 else
473 index = NULL;
474
472 inode_unlock(dir); 475 inode_unlock(dir);
473 if (err) 476 if (err)
474 goto fail; 477 goto fail;
@@ -557,3 +560,22 @@ void ovl_nlink_end(struct dentry *dentry, bool locked)
557 mutex_unlock(&OVL_I(d_inode(dentry))->lock); 560 mutex_unlock(&OVL_I(d_inode(dentry))->lock);
558 } 561 }
559} 562}
563
564int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
565{
566 /* Workdir should not be the same as upperdir */
567 if (workdir == upperdir)
568 goto err;
569
570 /* Workdir should not be subdir of upperdir and vice versa */
571 if (lock_rename(workdir, upperdir) != NULL)
572 goto err_unlock;
573
574 return 0;
575
576err_unlock:
577 unlock_rename(workdir, upperdir);
578err:
579 pr_err("overlayfs: failed to lock workdir+upperdir\n");
580 return -EIO;
581}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c355574aa0..77a8eacbe032 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,7 @@
62#include <linux/mman.h> 62#include <linux/mman.h>
63#include <linux/sched/mm.h> 63#include <linux/sched/mm.h>
64#include <linux/sched/numa_balancing.h> 64#include <linux/sched/numa_balancing.h>
65#include <linux/sched/task_stack.h>
65#include <linux/sched/task.h> 66#include <linux/sched/task.h>
66#include <linux/sched/cputime.h> 67#include <linux/sched/cputime.h>
67#include <linux/proc_fs.h> 68#include <linux/proc_fs.h>
@@ -118,30 +119,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
118 * simple bit tests. 119 * simple bit tests.
119 */ 120 */
120static const char * const task_state_array[] = { 121static const char * const task_state_array[] = {
121 "R (running)", /* 0 */ 122
122 "S (sleeping)", /* 1 */ 123 /* states in TASK_REPORT: */
123 "D (disk sleep)", /* 2 */ 124 "R (running)", /* 0x00 */
124 "T (stopped)", /* 4 */ 125 "S (sleeping)", /* 0x01 */
125 "t (tracing stop)", /* 8 */ 126 "D (disk sleep)", /* 0x02 */
126 "X (dead)", /* 16 */ 127 "T (stopped)", /* 0x04 */
127 "Z (zombie)", /* 32 */ 128 "t (tracing stop)", /* 0x08 */
129 "X (dead)", /* 0x10 */
130 "Z (zombie)", /* 0x20 */
131 "P (parked)", /* 0x40 */
132
133 /* states beyond TASK_REPORT: */
134 "I (idle)", /* 0x80 */
128}; 135};
129 136
130static inline const char *get_task_state(struct task_struct *tsk) 137static inline const char *get_task_state(struct task_struct *tsk)
131{ 138{
132 unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; 139 BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array));
133 140 return task_state_array[__get_task_state(tsk)];
134 /*
135 * Parked tasks do not run; they sit in __kthread_parkme().
136 * Without this check, we would report them as running, which is
137 * clearly wrong, so we report them as sleeping instead.
138 */
139 if (tsk->state == TASK_PARKED)
140 state = TASK_INTERRUPTIBLE;
141
142 BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
143
144 return task_state_array[fls(state)];
145} 141}
146 142
147static inline int get_task_umask(struct task_struct *tsk) 143static inline int get_task_umask(struct task_struct *tsk)
@@ -421,7 +417,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
421 * esp and eip are intentionally zeroed out. There is no 417 * esp and eip are intentionally zeroed out. There is no
422 * non-racy way to read them without freezing the task. 418 * non-racy way to read them without freezing the task.
423 * Programs that need reliable values can use ptrace(2). 419 * Programs that need reliable values can use ptrace(2).
420 *
421 * The only exception is if the task is core dumping because
422 * a program is not able to use ptrace(2) in that case. It is
423 * safe because the task has stopped executing permanently.
424 */ 424 */
425 if (permitted && (task->flags & PF_DUMPCORE)) {
426 eip = KSTK_EIP(task);
427 esp = KSTK_ESP(task);
428 }
425 } 429 }
426 430
427 get_task_comm(tcomm, task); 431 get_task_comm(tcomm, task);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8381db9db6d9..50b0556a124f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1980 ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, 1980 ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
1981 &warn_to[cnt]); 1981 &warn_to[cnt]);
1982 if (ret) { 1982 if (ret) {
1983 spin_lock(&transfer_to[cnt]->dq_dqb_lock);
1983 dquot_decr_inodes(transfer_to[cnt], inode_usage); 1984 dquot_decr_inodes(transfer_to[cnt], inode_usage);
1985 spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
1984 goto over_quota; 1986 goto over_quota;
1985 } 1987 }
1986 } 1988 }
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index c0187cda2c1e..a73e5b34db41 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
328 if (!dquot->dq_off) { 328 if (!dquot->dq_off) {
329 alloc = true; 329 alloc = true;
330 down_write(&dqopt->dqio_sem); 330 down_write(&dqopt->dqio_sem);
331 } else {
332 down_read(&dqopt->dqio_sem);
331 } 333 }
332 ret = qtree_write_dquot( 334 ret = qtree_write_dquot(
333 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, 335 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
334 dquot); 336 dquot);
335 if (alloc) 337 if (alloc)
336 up_write(&dqopt->dqio_sem); 338 up_write(&dqopt->dqio_sem);
339 else
340 up_read(&dqopt->dqio_sem);
337 return ret; 341 return ret;
338} 342}
339 343
diff --git a/fs/read_write.c b/fs/read_write.c
index a2b9a47235c5..f0d4b16873e8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
112 * In the generic case the entire file is data, so as long as 112 * In the generic case the entire file is data, so as long as
113 * offset isn't at the end of the file then the offset is data. 113 * offset isn't at the end of the file then the offset is data.
114 */ 114 */
115 if (offset >= eof) 115 if ((unsigned long long)offset >= eof)
116 return -ENXIO; 116 return -ENXIO;
117 break; 117 break;
118 case SEEK_HOLE: 118 case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
120 * There is a virtual hole at the end of the file, so as long as 120 * There is a virtual hole at the end of the file, so as long as
121 * offset isn't i_size or larger, return i_size. 121 * offset isn't i_size or larger, return i_size.
122 */ 122 */
123 if (offset >= eof) 123 if ((unsigned long long)offset >= eof)
124 return -ENXIO; 124 return -ENXIO;
125 offset = eof; 125 offset = eof;
126 break; 126 break;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ef4b48d1ea42..1c713fd5b3e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -588,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
588 break; 588 break;
589 if (ACCESS_ONCE(ctx->released) || 589 if (ACCESS_ONCE(ctx->released) ||
590 fatal_signal_pending(current)) { 590 fatal_signal_pending(current)) {
591 /*
592 * &ewq->wq may be queued in fork_event, but
593 * __remove_wait_queue ignores the head
594 * parameter. It would be a problem if it
595 * didn't.
596 */
591 __remove_wait_queue(&ctx->event_wqh, &ewq->wq); 597 __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
592 if (ewq->msg.event == UFFD_EVENT_FORK) { 598 if (ewq->msg.event == UFFD_EVENT_FORK) {
593 struct userfaultfd_ctx *new; 599 struct userfaultfd_ctx *new;
@@ -1061,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
1061 (unsigned long) 1067 (unsigned long)
1062 uwq->msg.arg.reserved.reserved1; 1068 uwq->msg.arg.reserved.reserved1;
1063 list_move(&uwq->wq.entry, &fork_event); 1069 list_move(&uwq->wq.entry, &fork_event);
1070 /*
1071 * fork_nctx can be freed as soon as
1072 * we drop the lock, unless we take a
1073 * reference on it.
1074 */
1075 userfaultfd_ctx_get(fork_nctx);
1064 spin_unlock(&ctx->event_wqh.lock); 1076 spin_unlock(&ctx->event_wqh.lock);
1065 ret = 0; 1077 ret = 0;
1066 break; 1078 break;
@@ -1091,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
1091 1103
1092 if (!ret && msg->event == UFFD_EVENT_FORK) { 1104 if (!ret && msg->event == UFFD_EVENT_FORK) {
1093 ret = resolve_userfault_fork(ctx, fork_nctx, msg); 1105 ret = resolve_userfault_fork(ctx, fork_nctx, msg);
1106 spin_lock(&ctx->event_wqh.lock);
1107 if (!list_empty(&fork_event)) {
1108 /*
1109 * The fork thread didn't abort, so we can
1110 * drop the temporary refcount.
1111 */
1112 userfaultfd_ctx_put(fork_nctx);
1113
1114 uwq = list_first_entry(&fork_event,
1115 typeof(*uwq),
1116 wq.entry);
1117 /*
1118 * If fork_event list wasn't empty and in turn
1119 * the event wasn't already released by fork
1120 * (the event is allocated on fork kernel
1121 * stack), put the event back to its place in
1122 * the event_wq. fork_event head will be freed
1123 * as soon as we return so the event cannot
1124 * stay queued there no matter the current
1125 * "ret" value.
1126 */
1127 list_del(&uwq->wq.entry);
1128 __add_wait_queue(&ctx->event_wqh, &uwq->wq);
1094 1129
1095 if (!ret) { 1130 /*
1096 spin_lock(&ctx->event_wqh.lock); 1131 * Leave the event in the waitqueue and report
1097 if (!list_empty(&fork_event)) { 1132 * error to userland if we failed to resolve
1098 uwq = list_first_entry(&fork_event, 1133 * the userfault fork.
1099 typeof(*uwq), 1134 */
1100 wq.entry); 1135 if (likely(!ret))
1101 list_del(&uwq->wq.entry);
1102 __add_wait_queue(&ctx->event_wqh, &uwq->wq);
1103 userfaultfd_event_complete(ctx, uwq); 1136 userfaultfd_event_complete(ctx, uwq);
1104 } 1137 } else {
1105 spin_unlock(&ctx->event_wqh.lock); 1138 /*
1139 * Here the fork thread aborted and the
1140 * refcount from the fork thread on fork_nctx
1141 * has already been released. We still hold
1142 * the reference we took before releasing the
1143 * lock above. If resolve_userfault_fork
1144 * failed we've to drop it because the
1145 * fork_nctx has to be freed in such case. If
1146 * it succeeded we'll hold it because the new
1147 * uffd references it.
1148 */
1149 if (ret)
1150 userfaultfd_ctx_put(fork_nctx);
1106 } 1151 }
1152 spin_unlock(&ctx->event_wqh.lock);
1107 } 1153 }
1108 1154
1109 return ret; 1155 return ret;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4424f7fecf14..61cd28ba25f3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -250,7 +250,7 @@ xattr_getsecurity(struct inode *inode, const char *name, void *value,
250 } 250 }
251 memcpy(value, buffer, len); 251 memcpy(value, buffer, len);
252out: 252out:
253 security_release_secctx(buffer, len); 253 kfree(buffer);
254out_noalloc: 254out_noalloc:
255 return len; 255 return len;
256} 256}
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index b008ff3250eb..df3e600835e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
156 trace_xfs_ag_resv_free(pag, type, 0); 156 trace_xfs_ag_resv_free(pag, type, 0);
157 157
158 resv = xfs_perag_resv(pag, type); 158 resv = xfs_perag_resv(pag, type);
159 pag->pag_mount->m_ag_max_usable += resv->ar_asked; 159 if (pag->pag_agno == 0)
160 pag->pag_mount->m_ag_max_usable += resv->ar_asked;
160 /* 161 /*
161 * AGFL blocks are always considered "free", so whatever 162 * AGFL blocks are always considered "free", so whatever
162 * was reserved at mount time must be given back at umount. 163 * was reserved at mount time must be given back at umount.
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
216 return error; 217 return error;
217 } 218 }
218 219
219 mp->m_ag_max_usable -= ask; 220 /*
221 * Reduce the maximum per-AG allocation length by however much we're
222 * trying to reserve for an AG. Since this is a filesystem-wide
223 * counter, we only make the adjustment for AG 0. This assumes that
224 * there aren't any AGs hungrier for per-AG reservation than AG 0.
225 */
226 if (pag->pag_agno == 0)
227 mp->m_ag_max_usable -= ask;
220 228
221 resv = xfs_perag_resv(pag, type); 229 resv = xfs_perag_resv(pag, type);
222 resv->ar_asked = ask; 230 resv->ar_asked = ask;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 459f4b4f08fe..044a363119be 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,7 +49,6 @@
49#include "xfs_rmap.h" 49#include "xfs_rmap.h"
50#include "xfs_ag_resv.h" 50#include "xfs_ag_resv.h"
51#include "xfs_refcount.h" 51#include "xfs_refcount.h"
52#include "xfs_rmap_btree.h"
53#include "xfs_icache.h" 52#include "xfs_icache.h"
54 53
55 54
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
192 int maxrecs; /* maximum record count at this level */ 191 int maxrecs; /* maximum record count at this level */
193 xfs_mount_t *mp; /* mount structure */ 192 xfs_mount_t *mp; /* mount structure */
194 xfs_filblks_t rval; /* return value */ 193 xfs_filblks_t rval; /* return value */
195 xfs_filblks_t orig_len;
196 194
197 mp = ip->i_mount; 195 mp = ip->i_mount;
198
199 /* Calculate the worst-case size of the bmbt. */
200 orig_len = len;
201 maxrecs = mp->m_bmap_dmxr[0]; 196 maxrecs = mp->m_bmap_dmxr[0];
202 for (level = 0, rval = 0; 197 for (level = 0, rval = 0;
203 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); 198 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
205 len += maxrecs - 1; 200 len += maxrecs - 1;
206 do_div(len, maxrecs); 201 do_div(len, maxrecs);
207 rval += len; 202 rval += len;
208 if (len == 1) { 203 if (len == 1)
209 rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 204 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
210 level - 1; 205 level - 1;
211 break;
212 }
213 if (level == 0) 206 if (level == 0)
214 maxrecs = mp->m_bmap_dmxr[1]; 207 maxrecs = mp->m_bmap_dmxr[1];
215 } 208 }
216
217 /* Calculate the worst-case size of the rmapbt. */
218 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
219 rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
220 mp->m_rmap_maxlevels;
221
222 return rval; 209 return rval;
223} 210}
224 211
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29172609f2a3..f18e5932aec4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -343,7 +343,8 @@ xfs_end_io(
343 error = xfs_reflink_end_cow(ip, offset, size); 343 error = xfs_reflink_end_cow(ip, offset, size);
344 break; 344 break;
345 case XFS_IO_UNWRITTEN: 345 case XFS_IO_UNWRITTEN:
346 error = xfs_iomap_write_unwritten(ip, offset, size); 346 /* writeback should never update isize */
347 error = xfs_iomap_write_unwritten(ip, offset, size, false);
347 break; 348 break;
348 default: 349 default:
349 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); 350 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd9a5400ba4f..e9db7fc95b70 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1459,7 +1459,19 @@ xfs_shift_file_space(
1459 return error; 1459 return error;
1460 1460
1461 /* 1461 /*
1462 * The extent shiting code works on extent granularity. So, if 1462 * Clean out anything hanging around in the cow fork now that
1463 * we've flushed all the dirty data out to disk to avoid having
1464 * CoW extents at the wrong offsets.
1465 */
1466 if (xfs_is_reflink_inode(ip)) {
1467 error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
1468 true);
1469 if (error)
1470 return error;
1471 }
1472
1473 /*
1474 * The extent shifting code works on extent granularity. So, if
1463 * stop_fsb is not the starting block of extent, we need to split 1475 * stop_fsb is not the starting block of extent, we need to split
1464 * the extent at stop_fsb. 1476 * the extent at stop_fsb.
1465 */ 1477 */
@@ -2110,11 +2122,31 @@ xfs_swap_extents(
2110 ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; 2122 ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
2111 tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 2123 tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
2112 tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; 2124 tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK;
2125 }
2126
2127 /* Swap the cow forks. */
2128 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
2129 xfs_extnum_t extnum;
2130
2131 ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
2132 ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
2133
2134 extnum = ip->i_cnextents;
2135 ip->i_cnextents = tip->i_cnextents;
2136 tip->i_cnextents = extnum;
2137
2113 cowfp = ip->i_cowfp; 2138 cowfp = ip->i_cowfp;
2114 ip->i_cowfp = tip->i_cowfp; 2139 ip->i_cowfp = tip->i_cowfp;
2115 tip->i_cowfp = cowfp; 2140 tip->i_cowfp = cowfp;
2116 xfs_inode_set_cowblocks_tag(ip); 2141
2117 xfs_inode_set_cowblocks_tag(tip); 2142 if (ip->i_cowfp && ip->i_cnextents)
2143 xfs_inode_set_cowblocks_tag(ip);
2144 else
2145 xfs_inode_clear_cowblocks_tag(ip);
2146 if (tip->i_cowfp && tip->i_cnextents)
2147 xfs_inode_set_cowblocks_tag(tip);
2148 else
2149 xfs_inode_clear_cowblocks_tag(tip);
2118 } 2150 }
2119 2151
2120 xfs_trans_log_inode(tp, ip, src_log_flags); 2152 xfs_trans_log_inode(tp, ip, src_log_flags);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index da14658da310..2f97c12ca75e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
1258 int size; 1258 int size;
1259 int offset; 1259 int offset;
1260 1260
1261 total_nr_pages = bp->b_page_count;
1262
1263 /* skip the pages in the buffer before the start offset */ 1261 /* skip the pages in the buffer before the start offset */
1264 page_index = 0; 1262 page_index = 0;
1265 offset = *buf_offset; 1263 offset = *buf_offset;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index bd786a9ac2c3..eaf86f55b7f2 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,7 +347,7 @@ xfs_verifier_error(
347{ 347{
348 struct xfs_mount *mp = bp->b_target->bt_mount; 348 struct xfs_mount *mp = bp->b_target->bt_mount;
349 349
350 xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx", 350 xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
351 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", 351 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
352 __return_address, bp->b_ops->name, bp->b_bn); 352 __return_address, bp->b_ops->name, bp->b_bn);
353 353
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebdd0bd2b261..309e26c9dddb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -58,7 +58,7 @@ xfs_zero_range(
58 xfs_off_t count, 58 xfs_off_t count,
59 bool *did_zero) 59 bool *did_zero)
60{ 60{
61 return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); 61 return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
62} 62}
63 63
64int 64int
@@ -377,8 +377,6 @@ restart:
377 */ 377 */
378 spin_lock(&ip->i_flags_lock); 378 spin_lock(&ip->i_flags_lock);
379 if (iocb->ki_pos > i_size_read(inode)) { 379 if (iocb->ki_pos > i_size_read(inode)) {
380 bool zero = false;
381
382 spin_unlock(&ip->i_flags_lock); 380 spin_unlock(&ip->i_flags_lock);
383 if (!drained_dio) { 381 if (!drained_dio) {
384 if (*iolock == XFS_IOLOCK_SHARED) { 382 if (*iolock == XFS_IOLOCK_SHARED) {
@@ -399,7 +397,7 @@ restart:
399 drained_dio = true; 397 drained_dio = true;
400 goto restart; 398 goto restart;
401 } 399 }
402 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); 400 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
403 if (error) 401 if (error)
404 return error; 402 return error;
405 } else 403 } else
@@ -436,7 +434,6 @@ xfs_dio_write_end_io(
436 struct inode *inode = file_inode(iocb->ki_filp); 434 struct inode *inode = file_inode(iocb->ki_filp);
437 struct xfs_inode *ip = XFS_I(inode); 435 struct xfs_inode *ip = XFS_I(inode);
438 loff_t offset = iocb->ki_pos; 436 loff_t offset = iocb->ki_pos;
439 bool update_size = false;
440 int error = 0; 437 int error = 0;
441 438
442 trace_xfs_end_io_direct_write(ip, offset, size); 439 trace_xfs_end_io_direct_write(ip, offset, size);
@@ -447,6 +444,21 @@ xfs_dio_write_end_io(
447 if (size <= 0) 444 if (size <= 0)
448 return size; 445 return size;
449 446
447 if (flags & IOMAP_DIO_COW) {
448 error = xfs_reflink_end_cow(ip, offset, size);
449 if (error)
450 return error;
451 }
452
453 /*
454 * Unwritten conversion updates the in-core isize after extent
455 * conversion but before updating the on-disk size. Updating isize any
456 * earlier allows a racing dio read to find unwritten extents before
457 * they are converted.
458 */
459 if (flags & IOMAP_DIO_UNWRITTEN)
460 return xfs_iomap_write_unwritten(ip, offset, size, true);
461
450 /* 462 /*
451 * We need to update the in-core inode size here so that we don't end up 463 * We need to update the in-core inode size here so that we don't end up
452 * with the on-disk inode size being outside the in-core inode size. We 464 * with the on-disk inode size being outside the in-core inode size. We
@@ -461,20 +473,11 @@ xfs_dio_write_end_io(
461 spin_lock(&ip->i_flags_lock); 473 spin_lock(&ip->i_flags_lock);
462 if (offset + size > i_size_read(inode)) { 474 if (offset + size > i_size_read(inode)) {
463 i_size_write(inode, offset + size); 475 i_size_write(inode, offset + size);
464 update_size = true; 476 spin_unlock(&ip->i_flags_lock);
465 }
466 spin_unlock(&ip->i_flags_lock);
467
468 if (flags & IOMAP_DIO_COW) {
469 error = xfs_reflink_end_cow(ip, offset, size);
470 if (error)
471 return error;
472 }
473
474 if (flags & IOMAP_DIO_UNWRITTEN)
475 error = xfs_iomap_write_unwritten(ip, offset, size);
476 else if (update_size)
477 error = xfs_setfilesize(ip, offset, size); 477 error = xfs_setfilesize(ip, offset, size);
478 } else {
479 spin_unlock(&ip->i_flags_lock);
480 }
478 481
479 return error; 482 return error;
480} 483}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5599dda4727a..4ec5b7f45401 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
1624 goto out; 1624 goto out;
1625 1625
1626 /* 1626 /*
1627 * Clear the reflink flag if we truncated everything. 1627 * Clear the reflink flag if there are no data fork blocks and
1628 * there are no extents staged in the cow fork.
1628 */ 1629 */
1629 if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { 1630 if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
1630 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1631 if (ip->i_d.di_nblocks == 0)
1632 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1631 xfs_inode_clear_cowblocks_tag(ip); 1633 xfs_inode_clear_cowblocks_tag(ip);
1632 } 1634 }
1633 1635
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6d0f74ec31e8..a705f34b58fa 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -745,7 +745,7 @@ xfs_iflush_done(
745 */ 745 */
746 iip = INODE_ITEM(blip); 746 iip = INODE_ITEM(blip);
747 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || 747 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
748 lip->li_flags & XFS_LI_FAILED) 748 (blip->li_flags & XFS_LI_FAILED))
749 need_ail++; 749 need_ail++;
750 750
751 blip = next; 751 blip = next;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5049e8ab6e30..aa75389be8cf 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
1088 int *join_flags) 1088 int *join_flags)
1089{ 1089{
1090 struct inode *inode = VFS_I(ip); 1090 struct inode *inode = VFS_I(ip);
1091 struct super_block *sb = inode->i_sb;
1091 int error; 1092 int error;
1092 1093
1093 *join_flags = 0; 1094 *join_flags = 0;
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
1100 if (fa->fsx_xflags & FS_XFLAG_DAX) { 1101 if (fa->fsx_xflags & FS_XFLAG_DAX) {
1101 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) 1102 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
1102 return -EINVAL; 1103 return -EINVAL;
1103 if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) 1104 if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
1104 return -EINVAL; 1105 return -EINVAL;
1105 } 1106 }
1106 1107
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a1909bc064e9..f179bdf1644d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -829,7 +829,8 @@ int
829xfs_iomap_write_unwritten( 829xfs_iomap_write_unwritten(
830 xfs_inode_t *ip, 830 xfs_inode_t *ip,
831 xfs_off_t offset, 831 xfs_off_t offset,
832 xfs_off_t count) 832 xfs_off_t count,
833 bool update_isize)
833{ 834{
834 xfs_mount_t *mp = ip->i_mount; 835 xfs_mount_t *mp = ip->i_mount;
835 xfs_fileoff_t offset_fsb; 836 xfs_fileoff_t offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
840 xfs_trans_t *tp; 841 xfs_trans_t *tp;
841 xfs_bmbt_irec_t imap; 842 xfs_bmbt_irec_t imap;
842 struct xfs_defer_ops dfops; 843 struct xfs_defer_ops dfops;
844 struct inode *inode = VFS_I(ip);
843 xfs_fsize_t i_size; 845 xfs_fsize_t i_size;
844 uint resblks; 846 uint resblks;
845 int error; 847 int error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
899 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 901 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
900 if (i_size > offset + count) 902 if (i_size > offset + count)
901 i_size = offset + count; 903 i_size = offset + count;
902 904 if (update_isize && i_size > i_size_read(inode))
905 i_size_write(inode, i_size);
903 i_size = xfs_new_eof(ip, i_size); 906 i_size = xfs_new_eof(ip, i_size);
904 if (i_size) { 907 if (i_size) {
905 ip->i_d.di_size = i_size; 908 ip->i_d.di_size = i_size;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 00db3ecea084..ee535065c5d0 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
27 struct xfs_bmbt_irec *, int); 27 struct xfs_bmbt_irec *, int);
28int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, 28int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
29 struct xfs_bmbt_irec *); 29 struct xfs_bmbt_irec *);
30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); 30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
31 31
32void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, 32void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
33 struct xfs_bmbt_irec *); 33 struct xfs_bmbt_irec *);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 2f2dc3c09ad0..4246876df7b7 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
274 (end - 1) >> PAGE_SHIFT); 274 (end - 1) >> PAGE_SHIFT);
275 WARN_ON_ONCE(error); 275 WARN_ON_ONCE(error);
276 276
277 error = xfs_iomap_write_unwritten(ip, start, length); 277 error = xfs_iomap_write_unwritten(ip, start, length, false);
278 if (error) 278 if (error)
279 goto out_drop_iolock; 279 goto out_drop_iolock;
280 } 280 }
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3246815c24d6..37e603bf1591 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -736,7 +736,13 @@ xfs_reflink_end_cow(
736 /* If there is a hole at end_fsb - 1 go to the previous extent */ 736 /* If there is a hole at end_fsb - 1 go to the previous extent */
737 if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || 737 if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
738 got.br_startoff > end_fsb) { 738 got.br_startoff > end_fsb) {
739 ASSERT(idx > 0); 739 /*
740 * In case of racing, overlapping AIO writes no COW extents
741 * might be left by the time I/O completes for the loser of
742 * the race. In that case we are done.
743 */
744 if (idx <= 0)
745 goto out_cancel;
740 xfs_iext_get_extent(ifp, --idx, &got); 746 xfs_iext_get_extent(ifp, --idx, &got);
741 } 747 }
742 748
@@ -809,6 +815,7 @@ next_extent:
809 815
810out_defer: 816out_defer:
811 xfs_defer_cancel(&dfops); 817 xfs_defer_cancel(&dfops);
818out_cancel:
812 xfs_trans_cancel(tp); 819 xfs_trans_cancel(tp);
813 xfs_iunlock(ip, XFS_ILOCK_EXCL); 820 xfs_iunlock(ip, XFS_ILOCK_EXCL);
814out: 821out:
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c996f4ae4a5f..584cf2d573ba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
1654 "DAX and reflink have not been tested together!"); 1654 "DAX and reflink have not been tested together!");
1655 } 1655 }
1656 1656
1657 if (mp->m_flags & XFS_MOUNT_DISCARD) {
1658 struct request_queue *q = bdev_get_queue(sb->s_bdev);
1659
1660 if (!blk_queue_discard(q)) {
1661 xfs_warn(mp, "mounting with \"discard\" option, but "
1662 "the device does not support discard");
1663 mp->m_flags &= ~XFS_MOUNT_DISCARD;
1664 }
1665 }
1666
1657 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 1667 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
1658 if (mp->m_sb.sb_rblocks) { 1668 if (mp->m_sb.sb_rblocks) {
1659 xfs_alert(mp, 1669 xfs_alert(mp,