aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChao Yu <yuchao0@huawei.com>2018-09-27 06:34:52 -0400
committerJaegeuk Kim <jaegeuk@kernel.org>2018-09-30 21:42:50 -0400
commitf847c699cff3f050286ee0a08632046468e7a511 (patch)
tree030956a695c5806406fab3571511d93bed2f6381
parent39a8695824510a951ded696d69b8dea3c720b109 (diff)
f2fs: allow out-place-update for direct IO in LFS mode
Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't allow triggering any in-place-update writes, so we fallback direct write to buffered write, result in bad performance of large size write. This patch adds to support triggering out-place-update for direct IO to enhance its performance. Note that it needs to exclude direct read IO during direct write, since new data writing to new block address will no be valid until write finished. storage: zram time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" Before: real 0m13.061s user 0m0.327s sys 0m12.486s After: real 0m6.448s user 0m0.228s sys 0m6.212s Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r--fs/f2fs/data.c44
-rw-r--r--fs/f2fs/f2fs.h45
-rw-r--r--fs/f2fs/file.c3
3 files changed, 78 insertions, 14 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3f01bc2d73eb..02d5ce888a4a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -893,7 +893,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
893 893
894 dn->data_blkaddr = datablock_addr(dn->inode, 894 dn->data_blkaddr = datablock_addr(dn->inode,
895 dn->node_page, dn->ofs_in_node); 895 dn->node_page, dn->ofs_in_node);
896 if (dn->data_blkaddr == NEW_ADDR) 896 if (dn->data_blkaddr != NULL_ADDR)
897 goto alloc; 897 goto alloc;
898 898
899 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) 899 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
@@ -947,7 +947,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
947 947
948 if (direct_io) { 948 if (direct_io) {
949 map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); 949 map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
950 flag = f2fs_force_buffered_io(inode, WRITE) ? 950 flag = f2fs_force_buffered_io(inode, iocb, from) ?
951 F2FS_GET_BLOCK_PRE_AIO : 951 F2FS_GET_BLOCK_PRE_AIO :
952 F2FS_GET_BLOCK_PRE_DIO; 952 F2FS_GET_BLOCK_PRE_DIO;
953 goto map_blocks; 953 goto map_blocks;
@@ -1066,7 +1066,15 @@ next_block:
1066 goto sync_out; 1066 goto sync_out;
1067 } 1067 }
1068 1068
1069 if (!is_valid_data_blkaddr(sbi, blkaddr)) { 1069 if (is_valid_data_blkaddr(sbi, blkaddr)) {
1070 /* use out-place-update for driect IO under LFS mode */
1071 if (test_opt(sbi, LFS) && create &&
1072 flag == F2FS_GET_BLOCK_DIO) {
1073 err = __allocate_data_block(&dn, map->m_seg_type);
1074 if (!err)
1075 set_inode_flag(inode, FI_APPEND_WRITE);
1076 }
1077 } else {
1070 if (create) { 1078 if (create) {
1071 if (unlikely(f2fs_cp_error(sbi))) { 1079 if (unlikely(f2fs_cp_error(sbi))) {
1072 err = -EIO; 1080 err = -EIO;
@@ -2486,36 +2494,53 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2486 struct address_space *mapping = iocb->ki_filp->f_mapping; 2494 struct address_space *mapping = iocb->ki_filp->f_mapping;
2487 struct inode *inode = mapping->host; 2495 struct inode *inode = mapping->host;
2488 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2496 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2497 struct f2fs_inode_info *fi = F2FS_I(inode);
2489 size_t count = iov_iter_count(iter); 2498 size_t count = iov_iter_count(iter);
2490 loff_t offset = iocb->ki_pos; 2499 loff_t offset = iocb->ki_pos;
2491 int rw = iov_iter_rw(iter); 2500 int rw = iov_iter_rw(iter);
2492 int err; 2501 int err;
2493 enum rw_hint hint = iocb->ki_hint; 2502 enum rw_hint hint = iocb->ki_hint;
2494 int whint_mode = F2FS_OPTION(sbi).whint_mode; 2503 int whint_mode = F2FS_OPTION(sbi).whint_mode;
2504 bool do_opu;
2495 2505
2496 err = check_direct_IO(inode, iter, offset); 2506 err = check_direct_IO(inode, iter, offset);
2497 if (err) 2507 if (err)
2498 return err < 0 ? err : 0; 2508 return err < 0 ? err : 0;
2499 2509
2500 if (f2fs_force_buffered_io(inode, rw)) 2510 if (f2fs_force_buffered_io(inode, iocb, iter))
2501 return 0; 2511 return 0;
2502 2512
2513 do_opu = allow_outplace_dio(inode, iocb, iter);
2514
2503 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 2515 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2504 2516
2505 if (rw == WRITE && whint_mode == WHINT_MODE_OFF) 2517 if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
2506 iocb->ki_hint = WRITE_LIFE_NOT_SET; 2518 iocb->ki_hint = WRITE_LIFE_NOT_SET;
2507 2519
2508 if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { 2520 if (iocb->ki_flags & IOCB_NOWAIT) {
2509 if (iocb->ki_flags & IOCB_NOWAIT) { 2521 if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
2522 iocb->ki_hint = hint;
2523 err = -EAGAIN;
2524 goto out;
2525 }
2526 if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
2527 up_read(&fi->i_gc_rwsem[rw]);
2510 iocb->ki_hint = hint; 2528 iocb->ki_hint = hint;
2511 err = -EAGAIN; 2529 err = -EAGAIN;
2512 goto out; 2530 goto out;
2513 } 2531 }
2514 down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); 2532 } else {
2533 down_read(&fi->i_gc_rwsem[rw]);
2534 if (do_opu)
2535 down_read(&fi->i_gc_rwsem[READ]);
2515 } 2536 }
2516 2537
2517 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); 2538 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
2518 up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); 2539
2540 if (do_opu)
2541 up_read(&fi->i_gc_rwsem[READ]);
2542
2543 up_read(&fi->i_gc_rwsem[rw]);
2519 2544
2520 if (rw == WRITE) { 2545 if (rw == WRITE) {
2521 if (whint_mode == WHINT_MODE_OFF) 2546 if (whint_mode == WHINT_MODE_OFF)
@@ -2523,7 +2548,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2523 if (err > 0) { 2548 if (err > 0) {
2524 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, 2549 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
2525 err); 2550 err);
2526 set_inode_flag(inode, FI_UPDATE_WRITE); 2551 if (!do_opu)
2552 set_inode_flag(inode, FI_UPDATE_WRITE);
2527 } else if (err < 0) { 2553 } else if (err < 0) {
2528 f2fs_write_failed(mapping, offset + count); 2554 f2fs_write_failed(mapping, offset + count);
2529 } 2555 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 05217aad4c0b..95d9edd8ff6e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -8,6 +8,7 @@
8#ifndef _LINUX_F2FS_H 8#ifndef _LINUX_F2FS_H
9#define _LINUX_F2FS_H 9#define _LINUX_F2FS_H
10 10
11#include <linux/uio.h>
11#include <linux/types.h> 12#include <linux/types.h>
12#include <linux/page-flags.h> 13#include <linux/page-flags.h>
13#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
@@ -3491,11 +3492,47 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
3491#endif 3492#endif
3492} 3493}
3493 3494
3494static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) 3495static inline int block_unaligned_IO(struct inode *inode,
3496 struct kiocb *iocb, struct iov_iter *iter)
3495{ 3497{
3496 return (f2fs_post_read_required(inode) || 3498 unsigned int i_blkbits = READ_ONCE(inode->i_blkbits);
3497 (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || 3499 unsigned int blocksize_mask = (1 << i_blkbits) - 1;
3498 F2FS_I_SB(inode)->s_ndevs); 3500 loff_t offset = iocb->ki_pos;
3501 unsigned long align = offset | iov_iter_alignment(iter);
3502
3503 return align & blocksize_mask;
3504}
3505
3506static inline int allow_outplace_dio(struct inode *inode,
3507 struct kiocb *iocb, struct iov_iter *iter)
3508{
3509 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3510 int rw = iov_iter_rw(iter);
3511
3512 return (test_opt(sbi, LFS) && (rw == WRITE) &&
3513 !block_unaligned_IO(inode, iocb, iter));
3514}
3515
3516static inline bool f2fs_force_buffered_io(struct inode *inode,
3517 struct kiocb *iocb, struct iov_iter *iter)
3518{
3519 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3520 int rw = iov_iter_rw(iter);
3521
3522 if (f2fs_post_read_required(inode))
3523 return true;
3524 if (sbi->s_ndevs)
3525 return true;
3526 /*
3527 * for blkzoned device, fallback direct IO to buffered IO, so
3528 * all IOs can be serialized by log-structured write.
3529 */
3530 if (f2fs_sb_has_blkzoned(sbi->sb))
3531 return true;
3532 if (test_opt(sbi, LFS) && (rw == WRITE) &&
3533 block_unaligned_IO(inode, iocb, iter))
3534 return true;
3535 return false;
3499} 3536}
3500 3537
3501#ifdef CONFIG_F2FS_FAULT_INJECTION 3538#ifdef CONFIG_F2FS_FAULT_INJECTION
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6db808c80813..e29715ea736f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3001,7 +3001,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3001 if (!f2fs_overwrite_io(inode, iocb->ki_pos, 3001 if (!f2fs_overwrite_io(inode, iocb->ki_pos,
3002 iov_iter_count(from)) || 3002 iov_iter_count(from)) ||
3003 f2fs_has_inline_data(inode) || 3003 f2fs_has_inline_data(inode) ||
3004 f2fs_force_buffered_io(inode, WRITE)) { 3004 f2fs_force_buffered_io(inode,
3005 iocb, from)) {
3005 clear_inode_flag(inode, 3006 clear_inode_flag(inode,
3006 FI_NO_PREALLOC); 3007 FI_NO_PREALLOC);
3007 inode_unlock(inode); 3008 inode_unlock(inode);