aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-07-28 21:48:00 -0400
committerDave Chinner <david@fromorbit.com>2015-07-28 21:48:00 -0400
commitb2442c5a7fe92cca08437070c8a45a7aa0d1703e (patch)
treedd16bd4306ef3e81925f883314a88c4f2be11690
parentbc0195aad0daa2ad5b0d76cce22b167bc3435590 (diff)
xfs: call dax_fault on read page faults for DAX
When modifying the patch series to handle the XFS MMAP_LOCK nesting of page faults, I botched the conversion of the read page fault path, and so it is only every calling through the page cache. Re-add the necessary __dax_fault() call for such files. Because the get_blocks callback on read faults may not set up the mapping buffer correctly to allow unwritten extent completion to be run, we need to allow callers of __dax_fault() to pass a null complete_unwritten() callback. The DAX code always zeros the unwritten page when it is read faulted so there are no stale data exposure issues with not doing the conversion. The only downside will be the potential for increased CPU overhead on repeated read faults of the same page. If this proves to be a problem, then the filesystem needs to fix it's get_block callback and provide a convert_unwritten() callback to the read fault path. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Matthew Wilcox <willy@linux.intel.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/dax.c14
-rw-r--r--fs/xfs/xfs_file.c21
2 files changed, 27 insertions, 8 deletions
diff --git a/fs/dax.c b/fs/dax.c
index c3e21ccfc358..a7f77e1fa18c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
319 * @vma: The virtual memory area where the fault occurred 319 * @vma: The virtual memory area where the fault occurred
320 * @vmf: The description of the fault 320 * @vmf: The description of the fault
321 * @get_block: The filesystem method used to translate file offsets to blocks 321 * @get_block: The filesystem method used to translate file offsets to blocks
322 * @complete_unwritten: The filesystem method used to convert unwritten blocks
323 * to written so the data written to them is exposed. This is required for
324 * required by write faults for filesystems that will return unwritten
325 * extent mappings from @get_block, but it is optional for reads as
326 * dax_insert_mapping() will always zero unwritten blocks. If the fs does
327 * not support unwritten extents, the it should pass NULL.
322 * 328 *
323 * When a page fault occurs, filesystems may call this helper in their 329 * When a page fault occurs, filesystems may call this helper in their
324 * fault handler for DAX files. __dax_fault() assumes the caller has done all 330 * fault handler for DAX files. __dax_fault() assumes the caller has done all
@@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
437 * as for normal BH based IO completions. 443 * as for normal BH based IO completions.
438 */ 444 */
439 error = dax_insert_mapping(inode, &bh, vma, vmf); 445 error = dax_insert_mapping(inode, &bh, vma, vmf);
440 if (buffer_unwritten(&bh)) 446 if (buffer_unwritten(&bh)) {
441 complete_unwritten(&bh, !error); 447 if (complete_unwritten)
448 complete_unwritten(&bh, !error);
449 else
450 WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
451 }
442 452
443 out: 453 out:
444 if (error == -ENOMEM) 454 if (error == -ENOMEM)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f0e8249722d4..db4acc1c3e73 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1514,18 +1514,27 @@ xfs_filemap_fault(
1514 struct vm_area_struct *vma, 1514 struct vm_area_struct *vma,
1515 struct vm_fault *vmf) 1515 struct vm_fault *vmf)
1516{ 1516{
1517 struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file)); 1517 struct inode *inode = file_inode(vma->vm_file);
1518 int ret; 1518 int ret;
1519 1519
1520 trace_xfs_filemap_fault(ip); 1520 trace_xfs_filemap_fault(XFS_I(inode));
1521 1521
1522 /* DAX can shortcut the normal fault path on write faults! */ 1522 /* DAX can shortcut the normal fault path on write faults! */
1523 if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip))) 1523 if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
1524 return xfs_filemap_page_mkwrite(vma, vmf); 1524 return xfs_filemap_page_mkwrite(vma, vmf);
1525 1525
1526 xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 1526 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1527 ret = filemap_fault(vma, vmf); 1527 if (IS_DAX(inode)) {
1528 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1528 /*
1529 * we do not want to trigger unwritten extent conversion on read
1530 * faults - that is unnecessary overhead and would also require
1531 * changes to xfs_get_blocks_direct() to map unwritten extent
1532 * ioend for conversion on read-only mappings.
1533 */
1534 ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL);
1535 } else
1536 ret = filemap_fault(vma, vmf);
1537 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1529 1538
1530 return ret; 1539 return ret;
1531} 1540}