diff options
author | Jan Kara <jack@suse.cz> | 2017-11-01 11:36:45 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2017-11-03 09:26:26 -0400 |
commit | b8a6176c214cf9aa2679131ed7e4515cddaadc33 (patch) | |
tree | e9b6ff2b1ee0459854e64f3195a7af7571b62196 /fs/ext4 | |
parent | 497f6926d880c57f65bf7c3f1086526fa774c55e (diff) |
ext4: Support for synchronous DAX faults
We return IOMAP_F_DIRTY flag from ext4_iomap_begin() when asked to
prepare blocks for writing and the inode has some uncommitted metadata
changes. In the fault handler ext4_dax_fault() we then detect this case
(through VM_FAULT_NEEDDSYNC return value) and call helper
dax_finish_sync_fault() to flush metadata changes and insert page table
entry. Note that this will also dirty corresponding radix tree entry
which is what we want - fsync(2) will still provide data integrity
guarantees for applications not using userspace flushing. And
applications using userspace flushing can avoid calling fsync(2) and
thus avoid the performance overhead.
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/file.c | 15 | ||||
-rw-r--r-- | fs/ext4/inode.c | 15 |
2 files changed, 29 insertions, 1 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 208adfc3e673..08a1d1a33a90 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/quotaops.h> | 26 | #include <linux/quotaops.h> |
27 | #include <linux/pagevec.h> | 27 | #include <linux/pagevec.h> |
28 | #include <linux/uio.h> | 28 | #include <linux/uio.h> |
29 | #include <linux/mman.h> | ||
29 | #include "ext4.h" | 30 | #include "ext4.h" |
30 | #include "ext4_jbd2.h" | 31 | #include "ext4_jbd2.h" |
31 | #include "xattr.h" | 32 | #include "xattr.h" |
@@ -295,6 +296,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, | |||
295 | */ | 296 | */ |
296 | bool write = (vmf->flags & FAULT_FLAG_WRITE) && | 297 | bool write = (vmf->flags & FAULT_FLAG_WRITE) && |
297 | (vmf->vma->vm_flags & VM_SHARED); | 298 | (vmf->vma->vm_flags & VM_SHARED); |
299 | pfn_t pfn; | ||
298 | 300 | ||
299 | if (write) { | 301 | if (write) { |
300 | sb_start_pagefault(sb); | 302 | sb_start_pagefault(sb); |
@@ -310,9 +312,12 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, | |||
310 | } else { | 312 | } else { |
311 | down_read(&EXT4_I(inode)->i_mmap_sem); | 313 | down_read(&EXT4_I(inode)->i_mmap_sem); |
312 | } | 314 | } |
313 | result = dax_iomap_fault(vmf, pe_size, NULL, &ext4_iomap_ops); | 315 | result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops); |
314 | if (write) { | 316 | if (write) { |
315 | ext4_journal_stop(handle); | 317 | ext4_journal_stop(handle); |
318 | /* Handling synchronous page fault? */ | ||
319 | if (result & VM_FAULT_NEEDDSYNC) | ||
320 | result = dax_finish_sync_fault(vmf, pe_size, pfn); | ||
316 | up_read(&EXT4_I(inode)->i_mmap_sem); | 321 | up_read(&EXT4_I(inode)->i_mmap_sem); |
317 | sb_end_pagefault(sb); | 322 | sb_end_pagefault(sb); |
318 | } else { | 323 | } else { |
@@ -350,6 +355,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
350 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) | 355 | if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) |
351 | return -EIO; | 356 | return -EIO; |
352 | 357 | ||
358 | /* | ||
359 | * We don't support synchronous mappings for non-DAX files. At least | ||
360 | * until someone comes with a sensible use case. | ||
361 | */ | ||
362 | if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC)) | ||
363 | return -EOPNOTSUPP; | ||
364 | |||
353 | file_accessed(file); | 365 | file_accessed(file); |
354 | if (IS_DAX(file_inode(file))) { | 366 | if (IS_DAX(file_inode(file))) { |
355 | vma->vm_ops = &ext4_dax_vm_ops; | 367 | vma->vm_ops = &ext4_dax_vm_ops; |
@@ -719,6 +731,7 @@ const struct file_operations ext4_file_operations = { | |||
719 | .compat_ioctl = ext4_compat_ioctl, | 731 | .compat_ioctl = ext4_compat_ioctl, |
720 | #endif | 732 | #endif |
721 | .mmap = ext4_file_mmap, | 733 | .mmap = ext4_file_mmap, |
734 | .mmap_supported_flags = MAP_SYNC, | ||
722 | .open = ext4_file_open, | 735 | .open = ext4_file_open, |
723 | .release = ext4_release_file, | 736 | .release = ext4_release_file, |
724 | .fsync = ext4_sync_file, | 737 | .fsync = ext4_sync_file, |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 31db875bc7a1..13a198924a0f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3394,6 +3394,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3394 | } | 3394 | } |
3395 | 3395 | ||
3396 | #ifdef CONFIG_FS_DAX | 3396 | #ifdef CONFIG_FS_DAX |
3397 | static bool ext4_inode_datasync_dirty(struct inode *inode) | ||
3398 | { | ||
3399 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
3400 | |||
3401 | if (journal) | ||
3402 | return !jbd2_transaction_committed(journal, | ||
3403 | EXT4_I(inode)->i_datasync_tid); | ||
3404 | /* Any metadata buffers to write? */ | ||
3405 | if (!list_empty(&inode->i_mapping->private_list)) | ||
3406 | return true; | ||
3407 | return inode->i_state & I_DIRTY_DATASYNC; | ||
3408 | } | ||
3409 | |||
3397 | static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | 3410 | static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, |
3398 | unsigned flags, struct iomap *iomap) | 3411 | unsigned flags, struct iomap *iomap) |
3399 | { | 3412 | { |
@@ -3466,6 +3479,8 @@ retry: | |||
3466 | } | 3479 | } |
3467 | 3480 | ||
3468 | iomap->flags = 0; | 3481 | iomap->flags = 0; |
3482 | if ((flags & IOMAP_WRITE) && ext4_inode_datasync_dirty(inode)) | ||
3483 | iomap->flags |= IOMAP_F_DIRTY; | ||
3469 | iomap->bdev = inode->i_sb->s_bdev; | 3484 | iomap->bdev = inode->i_sb->s_bdev; |
3470 | iomap->dax_dev = sbi->s_daxdev; | 3485 | iomap->dax_dev = sbi->s_daxdev; |
3471 | iomap->offset = first_block << blkbits; | 3486 | iomap->offset = first_block << blkbits; |