aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/ceph/caps.c5
-rw-r--r--fs/crypto/keyinfo.c5
-rw-r--r--fs/direct-io.c39
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h24
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/exec.c1
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/fscache/object-list.c7
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/iomap.c41
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c11
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h1
-rw-r--r--fs/xfs/xfs_aops.c47
-rw-r--r--fs/xfs/xfs_file.c21
-rw-r--r--fs/xfs/xfs_fsmap.c48
-rw-r--r--fs/xfs/xfs_super.c2
19 files changed, 184 insertions, 91 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 35a128acfbd1..161694b66038 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1135,7 +1135,7 @@ static int btrfs_fill_super(struct super_block *sb,
1135#ifdef CONFIG_BTRFS_FS_POSIX_ACL 1135#ifdef CONFIG_BTRFS_FS_POSIX_ACL
1136 sb->s_flags |= MS_POSIXACL; 1136 sb->s_flags |= MS_POSIXACL;
1137#endif 1137#endif
1138 sb->s_flags |= MS_I_VERSION; 1138 sb->s_flags |= SB_I_VERSION;
1139 sb->s_iflags |= SB_I_CGROUPWB; 1139 sb->s_iflags |= SB_I_CGROUPWB;
1140 1140
1141 err = super_setup_bdi(sb); 1141 err = super_setup_bdi(sb);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 157fe59fbabe..1978a8cb1cb1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1991,6 +1991,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
1991retry: 1991retry:
1992 spin_lock(&ci->i_ceph_lock); 1992 spin_lock(&ci->i_ceph_lock);
1993 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { 1993 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
1994 spin_unlock(&ci->i_ceph_lock);
1994 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); 1995 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
1995 goto out; 1996 goto out;
1996 } 1997 }
@@ -2008,8 +2009,10 @@ retry:
2008 mutex_lock(&session->s_mutex); 2009 mutex_lock(&session->s_mutex);
2009 goto retry; 2010 goto retry;
2010 } 2011 }
2011 if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) 2012 if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) {
2013 spin_unlock(&ci->i_ceph_lock);
2012 goto out; 2014 goto out;
2015 }
2013 2016
2014 flushing = __mark_caps_flushing(inode, session, true, 2017 flushing = __mark_caps_flushing(inode, session, true,
2015 &flush_tid, &oldest_flush_tid); 2018 &flush_tid, &oldest_flush_tid);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 018c588c7ac3..8e704d12a1cf 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -109,6 +109,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
109 goto out; 109 goto out;
110 } 110 }
111 ukp = user_key_payload_locked(keyring_key); 111 ukp = user_key_payload_locked(keyring_key);
112 if (!ukp) {
113 /* key was revoked before we acquired its semaphore */
114 res = -EKEYREVOKED;
115 goto out;
116 }
112 if (ukp->datalen != sizeof(struct fscrypt_key)) { 117 if (ukp->datalen != sizeof(struct fscrypt_key)) {
113 res = -EINVAL; 118 res = -EINVAL;
114 goto out; 119 goto out;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 96415c65bbdc..b53e66d9abd7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -45,6 +45,12 @@
45#define DIO_PAGES 64 45#define DIO_PAGES 64
46 46
47/* 47/*
48 * Flags for dio_complete()
49 */
50#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */
51#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */
52
53/*
48 * This code generally works in units of "dio_blocks". A dio_block is 54 * This code generally works in units of "dio_blocks". A dio_block is
49 * somewhere between the hard sector size and the filesystem block size. it 55 * somewhere between the hard sector size and the filesystem block size. it
50 * is determined on a per-invocation basis. When talking to the filesystem 56 * is determined on a per-invocation basis. When talking to the filesystem
@@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio,
225 * filesystems can use it to hold additional state between get_block calls and 231 * filesystems can use it to hold additional state between get_block calls and
226 * dio_complete. 232 * dio_complete.
227 */ 233 */
228static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) 234static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
229{ 235{
230 loff_t offset = dio->iocb->ki_pos; 236 loff_t offset = dio->iocb->ki_pos;
231 ssize_t transferred = 0; 237 ssize_t transferred = 0;
@@ -259,14 +265,27 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
259 if (ret == 0) 265 if (ret == 0)
260 ret = transferred; 266 ret = transferred;
261 267
268 if (dio->end_io) {
269 // XXX: ki_pos??
270 err = dio->end_io(dio->iocb, offset, ret, dio->private);
271 if (err)
272 ret = err;
273 }
274
262 /* 275 /*
263 * Try again to invalidate clean pages which might have been cached by 276 * Try again to invalidate clean pages which might have been cached by
264 * non-direct readahead, or faulted in by get_user_pages() if the source 277 * non-direct readahead, or faulted in by get_user_pages() if the source
265 * of the write was an mmap'ed region of the file we're writing. Either 278 * of the write was an mmap'ed region of the file we're writing. Either
266 * one is a pretty crazy thing to do, so we don't support it 100%. If 279 * one is a pretty crazy thing to do, so we don't support it 100%. If
267 * this invalidation fails, tough, the write still worked... 280 * this invalidation fails, tough, the write still worked...
281 *
282 * And this page cache invalidation has to be after dio->end_io(), as
283 * some filesystems convert unwritten extents to real allocations in
284 * end_io() when necessary, otherwise a racing buffer read would cache
285 * zeros from unwritten extents.
268 */ 286 */
269 if (ret > 0 && dio->op == REQ_OP_WRITE && 287 if (flags & DIO_COMPLETE_INVALIDATE &&
288 ret > 0 && dio->op == REQ_OP_WRITE &&
270 dio->inode->i_mapping->nrpages) { 289 dio->inode->i_mapping->nrpages) {
271 err = invalidate_inode_pages2_range(dio->inode->i_mapping, 290 err = invalidate_inode_pages2_range(dio->inode->i_mapping,
272 offset >> PAGE_SHIFT, 291 offset >> PAGE_SHIFT,
@@ -274,18 +293,10 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
274 WARN_ON_ONCE(err); 293 WARN_ON_ONCE(err);
275 } 294 }
276 295
277 if (dio->end_io) {
278
279 // XXX: ki_pos??
280 err = dio->end_io(dio->iocb, offset, ret, dio->private);
281 if (err)
282 ret = err;
283 }
284
285 if (!(dio->flags & DIO_SKIP_DIO_COUNT)) 296 if (!(dio->flags & DIO_SKIP_DIO_COUNT))
286 inode_dio_end(dio->inode); 297 inode_dio_end(dio->inode);
287 298
288 if (is_async) { 299 if (flags & DIO_COMPLETE_ASYNC) {
289 /* 300 /*
290 * generic_write_sync expects ki_pos to have been updated 301 * generic_write_sync expects ki_pos to have been updated
291 * already, but the submission path only does this for 302 * already, but the submission path only does this for
@@ -306,7 +317,7 @@ static void dio_aio_complete_work(struct work_struct *work)
306{ 317{
307 struct dio *dio = container_of(work, struct dio, complete_work); 318 struct dio *dio = container_of(work, struct dio, complete_work);
308 319
309 dio_complete(dio, 0, true); 320 dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE);
310} 321}
311 322
312static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); 323static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
@@ -348,7 +359,7 @@ static void dio_bio_end_aio(struct bio *bio)
348 queue_work(dio->inode->i_sb->s_dio_done_wq, 359 queue_work(dio->inode->i_sb->s_dio_done_wq,
349 &dio->complete_work); 360 &dio->complete_work);
350 } else { 361 } else {
351 dio_complete(dio, 0, true); 362 dio_complete(dio, 0, DIO_COMPLETE_ASYNC);
352 } 363 }
353 } 364 }
354} 365}
@@ -1360,7 +1371,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1360 dio_await_completion(dio); 1371 dio_await_completion(dio);
1361 1372
1362 if (drop_refcount(dio) == 0) { 1373 if (drop_refcount(dio) == 0) {
1363 retval = dio_complete(dio, retval, false); 1374 retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE);
1364 } else 1375 } else
1365 BUG_ON(retval != -EIOCBQUEUED); 1376 BUG_ON(retval != -EIOCBQUEUED);
1366 1377
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 9c351bf757b2..3fbc0ff79699 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -84,11 +84,16 @@ struct ecryptfs_page_crypt_context {
84static inline struct ecryptfs_auth_tok * 84static inline struct ecryptfs_auth_tok *
85ecryptfs_get_encrypted_key_payload_data(struct key *key) 85ecryptfs_get_encrypted_key_payload_data(struct key *key)
86{ 86{
87 if (key->type == &key_type_encrypted) 87 struct encrypted_key_payload *payload;
88 return (struct ecryptfs_auth_tok *) 88
89 (&((struct encrypted_key_payload *)key->payload.data[0])->payload_data); 89 if (key->type != &key_type_encrypted)
90 else
91 return NULL; 90 return NULL;
91
92 payload = key->payload.data[0];
93 if (!payload)
94 return ERR_PTR(-EKEYREVOKED);
95
96 return (struct ecryptfs_auth_tok *)payload->payload_data;
92} 97}
93 98
94static inline struct key *ecryptfs_get_encrypted_key(char *sig) 99static inline struct key *ecryptfs_get_encrypted_key(char *sig)
@@ -114,12 +119,17 @@ static inline struct ecryptfs_auth_tok *
114ecryptfs_get_key_payload_data(struct key *key) 119ecryptfs_get_key_payload_data(struct key *key)
115{ 120{
116 struct ecryptfs_auth_tok *auth_tok; 121 struct ecryptfs_auth_tok *auth_tok;
122 struct user_key_payload *ukp;
117 123
118 auth_tok = ecryptfs_get_encrypted_key_payload_data(key); 124 auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
119 if (!auth_tok) 125 if (auth_tok)
120 return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data;
121 else
122 return auth_tok; 126 return auth_tok;
127
128 ukp = user_key_payload_locked(key);
129 if (!ukp)
130 return ERR_PTR(-EKEYREVOKED);
131
132 return (struct ecryptfs_auth_tok *)ukp->data;
123} 133}
124 134
125#define ECRYPTFS_MAX_KEYSET_SIZE 1024 135#define ECRYPTFS_MAX_KEYSET_SIZE 1024
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 3cf1546dca82..fa218cd64f74 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -459,7 +459,8 @@ out:
459 * @auth_tok_key: key containing the authentication token 459 * @auth_tok_key: key containing the authentication token
460 * @auth_tok: authentication token 460 * @auth_tok: authentication token
461 * 461 *
462 * Returns zero on valid auth tok; -EINVAL otherwise 462 * Returns zero on valid auth tok; -EINVAL if the payload is invalid; or
463 * -EKEYREVOKED if the key was revoked before we acquired its semaphore.
463 */ 464 */
464static int 465static int
465ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key, 466ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key,
@@ -468,6 +469,12 @@ ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key,
468 int rc = 0; 469 int rc = 0;
469 470
470 (*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key); 471 (*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key);
472 if (IS_ERR(*auth_tok)) {
473 rc = PTR_ERR(*auth_tok);
474 *auth_tok = NULL;
475 goto out;
476 }
477
471 if (ecryptfs_verify_version((*auth_tok)->version)) { 478 if (ecryptfs_verify_version((*auth_tok)->version)) {
472 printk(KERN_ERR "Data structure version mismatch. Userspace " 479 printk(KERN_ERR "Data structure version mismatch. Userspace "
473 "tools must match eCryptfs kernel module with major " 480 "tools must match eCryptfs kernel module with major "
diff --git a/fs/exec.c b/fs/exec.c
index 5470d3c1892a..3e14ba25f678 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename,
1802 /* execve succeeded */ 1802 /* execve succeeded */
1803 current->fs->in_exec = 0; 1803 current->fs->in_exec = 0;
1804 current->in_execve = 0; 1804 current->in_execve = 0;
1805 membarrier_execve(current);
1805 acct_update_integrals(current); 1806 acct_update_integrals(current);
1806 task_numa_free(current); 1807 task_numa_free(current);
1807 free_bprm(bprm); 1808 free_bprm(bprm);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b104096fce9e..b0915b734a38 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1677,7 +1677,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1677 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; 1677 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1678 return 1; 1678 return 1;
1679 case Opt_i_version: 1679 case Opt_i_version:
1680 sb->s_flags |= MS_I_VERSION; 1680 sb->s_flags |= SB_I_VERSION;
1681 return 1; 1681 return 1;
1682 case Opt_lazytime: 1682 case Opt_lazytime:
1683 sb->s_flags |= MS_LAZYTIME; 1683 sb->s_flags |= MS_LAZYTIME;
@@ -2060,7 +2060,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2060 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); 2060 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2061 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) 2061 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2062 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); 2062 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2063 if (sb->s_flags & MS_I_VERSION) 2063 if (sb->s_flags & SB_I_VERSION)
2064 SEQ_OPTS_PUTS("i_version"); 2064 SEQ_OPTS_PUTS("i_version");
2065 if (nodefs || sbi->s_stripe) 2065 if (nodefs || sbi->s_stripe)
2066 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); 2066 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index b5ab06fabc60..0438d4cd91ef 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -331,6 +331,13 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
331 rcu_read_lock(); 331 rcu_read_lock();
332 332
333 confkey = user_key_payload_rcu(key); 333 confkey = user_key_payload_rcu(key);
334 if (!confkey) {
335 /* key was revoked */
336 rcu_read_unlock();
337 key_put(key);
338 goto no_config;
339 }
340
334 buf = confkey->data; 341 buf = confkey->data;
335 342
336 for (len = confkey->datalen - 1; len >= 0; len--) { 343 for (len = confkey->datalen - 1; len >= 0; len--) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 622081b97426..24967382a7b1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1308,7 +1308,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1308 */ 1308 */
1309 over = !dir_emit(ctx, dirent->name, dirent->namelen, 1309 over = !dir_emit(ctx, dirent->name, dirent->namelen,
1310 dirent->ino, dirent->type); 1310 dirent->ino, dirent->type);
1311 ctx->pos = dirent->off; 1311 if (!over)
1312 ctx->pos = dirent->off;
1312 } 1313 }
1313 1314
1314 buf += reclen; 1315 buf += reclen;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 65c88379a3a1..94a745acaef8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1059,7 +1059,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1059 if (sb->s_flags & MS_MANDLOCK) 1059 if (sb->s_flags & MS_MANDLOCK)
1060 goto err; 1060 goto err;
1061 1061
1062 sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); 1062 sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION);
1063 1063
1064 if (!parse_fuse_opt(data, &d, is_bdev)) 1064 if (!parse_fuse_opt(data, &d, is_bdev))
1065 goto err; 1065 goto err;
diff --git a/fs/iomap.c b/fs/iomap.c
index be61cf742b5e..d4801f8dd4fd 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
714{ 714{
715 struct kiocb *iocb = dio->iocb; 715 struct kiocb *iocb = dio->iocb;
716 struct inode *inode = file_inode(iocb->ki_filp); 716 struct inode *inode = file_inode(iocb->ki_filp);
717 loff_t offset = iocb->ki_pos;
717 ssize_t ret; 718 ssize_t ret;
718 719
719 /*
720 * Try again to invalidate clean pages which might have been cached by
721 * non-direct readahead, or faulted in by get_user_pages() if the source
722 * of the write was an mmap'ed region of the file we're writing. Either
723 * one is a pretty crazy thing to do, so we don't support it 100%. If
724 * this invalidation fails, tough, the write still worked...
725 */
726 if (!dio->error &&
727 (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
728 ret = invalidate_inode_pages2_range(inode->i_mapping,
729 iocb->ki_pos >> PAGE_SHIFT,
730 (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
731 WARN_ON_ONCE(ret);
732 }
733
734 if (dio->end_io) { 720 if (dio->end_io) {
735 ret = dio->end_io(iocb, 721 ret = dio->end_io(iocb,
736 dio->error ? dio->error : dio->size, 722 dio->error ? dio->error : dio->size,
@@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
742 if (likely(!ret)) { 728 if (likely(!ret)) {
743 ret = dio->size; 729 ret = dio->size;
744 /* check for short read */ 730 /* check for short read */
745 if (iocb->ki_pos + ret > dio->i_size && 731 if (offset + ret > dio->i_size &&
746 !(dio->flags & IOMAP_DIO_WRITE)) 732 !(dio->flags & IOMAP_DIO_WRITE))
747 ret = dio->i_size - iocb->ki_pos; 733 ret = dio->i_size - offset;
748 iocb->ki_pos += ret; 734 iocb->ki_pos += ret;
749 } 735 }
750 736
737 /*
738 * Try again to invalidate clean pages which might have been cached by
739 * non-direct readahead, or faulted in by get_user_pages() if the source
740 * of the write was an mmap'ed region of the file we're writing. Either
741 * one is a pretty crazy thing to do, so we don't support it 100%. If
742 * this invalidation fails, tough, the write still worked...
743 *
744 * And this page cache invalidation has to be after dio->end_io(), as
745 * some filesystems convert unwritten extents to real allocations in
746 * end_io() when necessary, otherwise a racing buffer read would cache
747 * zeros from unwritten extents.
748 */
749 if (!dio->error &&
750 (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
751 int err;
752 err = invalidate_inode_pages2_range(inode->i_mapping,
753 offset >> PAGE_SHIFT,
754 (offset + dio->size - 1) >> PAGE_SHIFT);
755 WARN_ON_ONCE(err);
756 }
757
751 inode_dio_end(file_inode(iocb->ki_filp)); 758 inode_dio_end(file_inode(iocb->ki_filp));
752 kfree(dio); 759 kfree(dio);
753 760
diff --git a/fs/namespace.c b/fs/namespace.c
index 3b601f115b6c..d18deb4c410b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2825,7 +2825,8 @@ long do_mount(const char *dev_name, const char __user *dir_name,
2825 SB_MANDLOCK | 2825 SB_MANDLOCK |
2826 SB_DIRSYNC | 2826 SB_DIRSYNC |
2827 SB_SILENT | 2827 SB_SILENT |
2828 SB_POSIXACL); 2828 SB_POSIXACL |
2829 SB_I_VERSION);
2829 2830
2830 if (flags & MS_REMOUNT) 2831 if (flags & MS_REMOUNT)
2831 retval = do_remount(&path, flags, sb_flags, mnt_flags, 2832 retval = do_remount(&path, flags, sb_flags, mnt_flags,
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index def32fa1c225..89263797cf32 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3852,6 +3852,17 @@ xfs_trim_extent(
3852 } 3852 }
3853} 3853}
3854 3854
3855/* trim extent to within eof */
3856void
3857xfs_trim_extent_eof(
3858 struct xfs_bmbt_irec *irec,
3859 struct xfs_inode *ip)
3860
3861{
3862 xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
3863 i_size_read(VFS_I(ip))));
3864}
3865
3855/* 3866/*
3856 * Trim the returned map to the required bounds 3867 * Trim the returned map to the required bounds
3857 */ 3868 */
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 851982a5dfbc..502e0d8fb4ff 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
208 208
209void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, 209void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
210 xfs_filblks_t len); 210 xfs_filblks_t len);
211void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
211int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); 212int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
212void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); 213void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
213void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 214void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f18e5932aec4..a3eeaba156c5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -446,6 +446,19 @@ xfs_imap_valid(
446{ 446{
447 offset >>= inode->i_blkbits; 447 offset >>= inode->i_blkbits;
448 448
449 /*
450 * We have to make sure the cached mapping is within EOF to protect
451 * against eofblocks trimming on file release leaving us with a stale
452 * mapping. Otherwise, a page for a subsequent file extending buffered
453 * write could get picked up by this writeback cycle and written to the
454 * wrong blocks.
455 *
456 * Note that what we really want here is a generic mapping invalidation
457 * mechanism to protect us from arbitrary extent modifying contexts, not
458 * just eofblocks.
459 */
460 xfs_trim_extent_eof(imap, XFS_I(inode));
461
449 return offset >= imap->br_startoff && 462 return offset >= imap->br_startoff &&
450 offset < imap->br_startoff + imap->br_blockcount; 463 offset < imap->br_startoff + imap->br_blockcount;
451} 464}
@@ -735,6 +748,14 @@ xfs_vm_invalidatepage(
735{ 748{
736 trace_xfs_invalidatepage(page->mapping->host, page, offset, 749 trace_xfs_invalidatepage(page->mapping->host, page, offset,
737 length); 750 length);
751
752 /*
753 * If we are invalidating the entire page, clear the dirty state from it
754 * so that we can check for attempts to release dirty cached pages in
755 * xfs_vm_releasepage().
756 */
757 if (offset == 0 && length >= PAGE_SIZE)
758 cancel_dirty_page(page);
738 block_invalidatepage(page, offset, length); 759 block_invalidatepage(page, offset, length);
739} 760}
740 761
@@ -1190,25 +1211,27 @@ xfs_vm_releasepage(
1190 * mm accommodates an old ext3 case where clean pages might not have had 1211 * mm accommodates an old ext3 case where clean pages might not have had
1191 * the dirty bit cleared. Thus, it can send actual dirty pages to 1212 * the dirty bit cleared. Thus, it can send actual dirty pages to
1192 * ->releasepage() via shrink_active_list(). Conversely, 1213 * ->releasepage() via shrink_active_list(). Conversely,
1193 * block_invalidatepage() can send pages that are still marked dirty 1214 * block_invalidatepage() can send pages that are still marked dirty but
1194 * but otherwise have invalidated buffers. 1215 * otherwise have invalidated buffers.
1195 * 1216 *
1196 * We want to release the latter to avoid unnecessary buildup of the 1217 * We want to release the latter to avoid unnecessary buildup of the
1197 * LRU, skip the former and warn if we've left any lingering 1218 * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
1198 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc 1219 * that are entirely invalidated and need to be released. Hence the
1199 * or unwritten buffers and warn if the page is not dirty. Otherwise 1220 * only time we should get dirty pages here is through
1200 * try to release the buffers. 1221 * shrink_active_list() and so we can simply skip those now.
1222 *
1223 * warn if we've left any lingering delalloc/unwritten buffers on clean
1224 * or invalidated pages we are about to release.
1201 */ 1225 */
1226 if (PageDirty(page))
1227 return 0;
1228
1202 xfs_count_page_state(page, &delalloc, &unwritten); 1229 xfs_count_page_state(page, &delalloc, &unwritten);
1203 1230
1204 if (delalloc) { 1231 if (WARN_ON_ONCE(delalloc))
1205 WARN_ON_ONCE(!PageDirty(page));
1206 return 0; 1232 return 0;
1207 } 1233 if (WARN_ON_ONCE(unwritten))
1208 if (unwritten) {
1209 WARN_ON_ONCE(!PageDirty(page));
1210 return 0; 1234 return 0;
1211 }
1212 1235
1213 return try_to_free_buffers(page); 1236 return try_to_free_buffers(page);
1214} 1237}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 56d0e526870c..6526ef0e2a23 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,11 +237,13 @@ xfs_file_dax_read(
237 if (!count) 237 if (!count)
238 return 0; /* skip atime */ 238 return 0; /* skip atime */
239 239
240 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { 240 if (iocb->ki_flags & IOCB_NOWAIT) {
241 if (iocb->ki_flags & IOCB_NOWAIT) 241 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
242 return -EAGAIN; 242 return -EAGAIN;
243 } else {
243 xfs_ilock(ip, XFS_IOLOCK_SHARED); 244 xfs_ilock(ip, XFS_IOLOCK_SHARED);
244 } 245 }
246
245 ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); 247 ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
246 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 248 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
247 249
@@ -259,9 +261,10 @@ xfs_file_buffered_aio_read(
259 261
260 trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); 262 trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
261 263
262 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { 264 if (iocb->ki_flags & IOCB_NOWAIT) {
263 if (iocb->ki_flags & IOCB_NOWAIT) 265 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
264 return -EAGAIN; 266 return -EAGAIN;
267 } else {
265 xfs_ilock(ip, XFS_IOLOCK_SHARED); 268 xfs_ilock(ip, XFS_IOLOCK_SHARED);
266 } 269 }
267 ret = generic_file_read_iter(iocb, to); 270 ret = generic_file_read_iter(iocb, to);
@@ -552,9 +555,10 @@ xfs_file_dio_aio_write(
552 iolock = XFS_IOLOCK_SHARED; 555 iolock = XFS_IOLOCK_SHARED;
553 } 556 }
554 557
555 if (!xfs_ilock_nowait(ip, iolock)) { 558 if (iocb->ki_flags & IOCB_NOWAIT) {
556 if (iocb->ki_flags & IOCB_NOWAIT) 559 if (!xfs_ilock_nowait(ip, iolock))
557 return -EAGAIN; 560 return -EAGAIN;
561 } else {
558 xfs_ilock(ip, iolock); 562 xfs_ilock(ip, iolock);
559 } 563 }
560 564
@@ -606,9 +610,10 @@ xfs_file_dax_write(
606 size_t count; 610 size_t count;
607 loff_t pos; 611 loff_t pos;
608 612
609 if (!xfs_ilock_nowait(ip, iolock)) { 613 if (iocb->ki_flags & IOCB_NOWAIT) {
610 if (iocb->ki_flags & IOCB_NOWAIT) 614 if (!xfs_ilock_nowait(ip, iolock))
611 return -EAGAIN; 615 return -EAGAIN;
616 } else {
612 xfs_ilock(ip, iolock); 617 xfs_ilock(ip, iolock);
613 } 618 }
614 619
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 560e0b40ac1b..43cfc07996a4 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper(
367 return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); 367 return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
368} 368}
369 369
370/* Transform a rtbitmap "record" into a fsmap */
371STATIC int
372xfs_getfsmap_rtdev_rtbitmap_helper(
373 struct xfs_trans *tp,
374 struct xfs_rtalloc_rec *rec,
375 void *priv)
376{
377 struct xfs_mount *mp = tp->t_mountp;
378 struct xfs_getfsmap_info *info = priv;
379 struct xfs_rmap_irec irec;
380 xfs_daddr_t rec_daddr;
381
382 rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
383
384 irec.rm_startblock = rec->ar_startblock;
385 irec.rm_blockcount = rec->ar_blockcount;
386 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
387 irec.rm_offset = 0;
388 irec.rm_flags = 0;
389
390 return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
391}
392
393/* Transform a bnobt irec into a fsmap */ 370/* Transform a bnobt irec into a fsmap */
394STATIC int 371STATIC int
395xfs_getfsmap_datadev_bnobt_helper( 372xfs_getfsmap_datadev_bnobt_helper(
@@ -475,6 +452,30 @@ xfs_getfsmap_logdev(
475 return xfs_getfsmap_helper(tp, info, &rmap, 0); 452 return xfs_getfsmap_helper(tp, info, &rmap, 0);
476} 453}
477 454
455#ifdef CONFIG_XFS_RT
456/* Transform a rtbitmap "record" into a fsmap */
457STATIC int
458xfs_getfsmap_rtdev_rtbitmap_helper(
459 struct xfs_trans *tp,
460 struct xfs_rtalloc_rec *rec,
461 void *priv)
462{
463 struct xfs_mount *mp = tp->t_mountp;
464 struct xfs_getfsmap_info *info = priv;
465 struct xfs_rmap_irec irec;
466 xfs_daddr_t rec_daddr;
467
468 rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
469
470 irec.rm_startblock = rec->ar_startblock;
471 irec.rm_blockcount = rec->ar_blockcount;
472 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
473 irec.rm_offset = 0;
474 irec.rm_flags = 0;
475
476 return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
477}
478
478/* Execute a getfsmap query against the realtime device. */ 479/* Execute a getfsmap query against the realtime device. */
479STATIC int 480STATIC int
480__xfs_getfsmap_rtdev( 481__xfs_getfsmap_rtdev(
@@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev(
521 return query_fn(tp, info); 522 return query_fn(tp, info);
522} 523}
523 524
524#ifdef CONFIG_XFS_RT
525/* Actually query the realtime bitmap. */ 525/* Actually query the realtime bitmap. */
526STATIC int 526STATIC int
527xfs_getfsmap_rtdev_rtbitmap_query( 527xfs_getfsmap_rtdev_rtbitmap_query(
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 584cf2d573ba..f663022353c0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1637,7 +1637,7 @@ xfs_fs_fill_super(
1637 1637
1638 /* version 5 superblocks support inode version counters. */ 1638 /* version 5 superblocks support inode version counters. */
1639 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) 1639 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1640 sb->s_flags |= MS_I_VERSION; 1640 sb->s_flags |= SB_I_VERSION;
1641 1641
1642 if (mp->m_flags & XFS_MOUNT_DAX) { 1642 if (mp->m_flags & XFS_MOUNT_DAX) {
1643 xfs_warn(mp, 1643 xfs_warn(mp,