summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2017-11-01 11:36:45 -0400
committerDan Williams <dan.j.williams@intel.com>2017-11-03 09:26:26 -0400
commitb8a6176c214cf9aa2679131ed7e4515cddaadc33 (patch)
treee9b6ff2b1ee0459854e64f3195a7af7571b62196 /fs/ext4
parent497f6926d880c57f65bf7c3f1086526fa774c55e (diff)
ext4: Support for synchronous DAX faults
We return IOMAP_F_DIRTY flag from ext4_iomap_begin() when asked to prepare blocks for writing and the inode has some uncommitted metadata changes. In the fault handler ext4_dax_fault() we then detect this case (through VM_FAULT_NEEDDSYNC return value) and call helper dax_finish_sync_fault() to flush metadata changes and insert page table entry. Note that this will also dirty corresponding radix tree entry which is what we want - fsync(2) will still provide data integrity guarantees for applications not using userspace flushing. And applications using userspace flushing can avoid calling fsync(2) and thus avoid the performance overhead. Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/file.c15
-rw-r--r--fs/ext4/inode.c15
2 files changed, 29 insertions, 1 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 208adfc3e673..08a1d1a33a90 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -26,6 +26,7 @@
26#include <linux/quotaops.h> 26#include <linux/quotaops.h>
27#include <linux/pagevec.h> 27#include <linux/pagevec.h>
28#include <linux/uio.h> 28#include <linux/uio.h>
29#include <linux/mman.h>
29#include "ext4.h" 30#include "ext4.h"
30#include "ext4_jbd2.h" 31#include "ext4_jbd2.h"
31#include "xattr.h" 32#include "xattr.h"
@@ -295,6 +296,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
295 */ 296 */
296 bool write = (vmf->flags & FAULT_FLAG_WRITE) && 297 bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
297 (vmf->vma->vm_flags & VM_SHARED); 298 (vmf->vma->vm_flags & VM_SHARED);
299 pfn_t pfn;
298 300
299 if (write) { 301 if (write) {
300 sb_start_pagefault(sb); 302 sb_start_pagefault(sb);
@@ -310,9 +312,12 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
310 } else { 312 } else {
311 down_read(&EXT4_I(inode)->i_mmap_sem); 313 down_read(&EXT4_I(inode)->i_mmap_sem);
312 } 314 }
313 result = dax_iomap_fault(vmf, pe_size, NULL, &ext4_iomap_ops); 315 result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops);
314 if (write) { 316 if (write) {
315 ext4_journal_stop(handle); 317 ext4_journal_stop(handle);
318 /* Handling synchronous page fault? */
319 if (result & VM_FAULT_NEEDDSYNC)
320 result = dax_finish_sync_fault(vmf, pe_size, pfn);
316 up_read(&EXT4_I(inode)->i_mmap_sem); 321 up_read(&EXT4_I(inode)->i_mmap_sem);
317 sb_end_pagefault(sb); 322 sb_end_pagefault(sb);
318 } else { 323 } else {
@@ -350,6 +355,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
350 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 355 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
351 return -EIO; 356 return -EIO;
352 357
358 /*
359 * We don't support synchronous mappings for non-DAX files. At least
360 * until someone comes with a sensible use case.
361 */
362 if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
363 return -EOPNOTSUPP;
364
353 file_accessed(file); 365 file_accessed(file);
354 if (IS_DAX(file_inode(file))) { 366 if (IS_DAX(file_inode(file))) {
355 vma->vm_ops = &ext4_dax_vm_ops; 367 vma->vm_ops = &ext4_dax_vm_ops;
@@ -719,6 +731,7 @@ const struct file_operations ext4_file_operations = {
719 .compat_ioctl = ext4_compat_ioctl, 731 .compat_ioctl = ext4_compat_ioctl,
720#endif 732#endif
721 .mmap = ext4_file_mmap, 733 .mmap = ext4_file_mmap,
734 .mmap_supported_flags = MAP_SYNC,
722 .open = ext4_file_open, 735 .open = ext4_file_open,
723 .release = ext4_release_file, 736 .release = ext4_release_file,
724 .fsync = ext4_sync_file, 737 .fsync = ext4_sync_file,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 31db875bc7a1..13a198924a0f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3394,6 +3394,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3394} 3394}
3395 3395
3396#ifdef CONFIG_FS_DAX 3396#ifdef CONFIG_FS_DAX
3397static bool ext4_inode_datasync_dirty(struct inode *inode)
3398{
3399 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
3400
3401 if (journal)
3402 return !jbd2_transaction_committed(journal,
3403 EXT4_I(inode)->i_datasync_tid);
3404 /* Any metadata buffers to write? */
3405 if (!list_empty(&inode->i_mapping->private_list))
3406 return true;
3407 return inode->i_state & I_DIRTY_DATASYNC;
3408}
3409
3397static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 3410static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
3398 unsigned flags, struct iomap *iomap) 3411 unsigned flags, struct iomap *iomap)
3399{ 3412{
@@ -3466,6 +3479,8 @@ retry:
3466 } 3479 }
3467 3480
3468 iomap->flags = 0; 3481 iomap->flags = 0;
3482 if ((flags & IOMAP_WRITE) && ext4_inode_datasync_dirty(inode))
3483 iomap->flags |= IOMAP_F_DIRTY;
3469 iomap->bdev = inode->i_sb->s_bdev; 3484 iomap->bdev = inode->i_sb->s_bdev;
3470 iomap->dax_dev = sbi->s_daxdev; 3485 iomap->dax_dev = sbi->s_daxdev;
3471 iomap->offset = first_block << blkbits; 3486 iomap->offset = first_block << blkbits;