aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-08-10 11:10:28 -0400
committerDan Williams <dan.j.williams@intel.com>2016-12-26 23:29:24 -0500
commite3fce68cdbed297d927e993b3ea7b8b1cee545da (patch)
tree52e8c6f6bea775cb623cfca46012b91657b7dc47
parentc6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 (diff)
dax: Avoid page invalidation races and unnecessary radix tree traversals
Currently dax_iomap_rw() takes care of invalidating page tables and evicting hole pages from the radix tree when write(2) to the file happens. This invalidation is only necessary when there is some block allocation resulting from write(2). Furthermore in current place the invalidation is racy wrt page fault instantiating a hole page just after we have invalidated it. So perform the page invalidation inside dax_iomap_actor() where we can do it only when really necessary and after blocks have been allocated so nobody will be instantiating new hole pages anymore. Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--fs/dax.c28
1 files changed, 11 insertions, 17 deletions
diff --git a/fs/dax.c b/fs/dax.c
index bcfedd184860..08e15db28b79 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -985,6 +985,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
985 if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) 985 if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
986 return -EIO; 986 return -EIO;
987 987
988 /*
989 * Write can allocate block for an area which has a hole page mapped
990 * into page tables. We have to tear down these mappings so that data
991 * written by write(2) is visible in mmap.
992 */
993 if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
994 invalidate_inode_pages2_range(inode->i_mapping,
995 pos >> PAGE_SHIFT,
996 (end - 1) >> PAGE_SHIFT);
997 }
998
988 while (pos < end) { 999 while (pos < end) {
989 unsigned offset = pos & (PAGE_SIZE - 1); 1000 unsigned offset = pos & (PAGE_SIZE - 1);
990 struct blk_dax_ctl dax = { 0 }; 1001 struct blk_dax_ctl dax = { 0 };
@@ -1043,23 +1054,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
1043 if (iov_iter_rw(iter) == WRITE) 1054 if (iov_iter_rw(iter) == WRITE)
1044 flags |= IOMAP_WRITE; 1055 flags |= IOMAP_WRITE;
1045 1056
1046 /*
1047 * Yes, even DAX files can have page cache attached to them: A zeroed
1048 * page is inserted into the pagecache when we have to serve a write
1049 * fault on a hole. It should never be dirtied and can simply be
1050 * dropped from the pagecache once we get real data for the page.
1051 *
1052 * XXX: This is racy against mmap, and there's nothing we can do about
1053 * it. We'll eventually need to shift this down even further so that
1054 * we can check if we allocated blocks over a hole first.
1055 */
1056 if (mapping->nrpages) {
1057 ret = invalidate_inode_pages2_range(mapping,
1058 pos >> PAGE_SHIFT,
1059 (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
1060 WARN_ON_ONCE(ret);
1061 }
1062
1063 while (iov_iter_count(iter)) { 1057 while (iov_iter_count(iter)) {
1064 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, 1058 ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
1065 iter, dax_iomap_actor); 1059 iter, dax_iomap_actor);