diff options
author | Jaegeuk Kim <jaegeuk.kim@samsung.com> | 2013-12-16 05:04:05 -0500 |
---|---|---|
committer | Jaegeuk Kim <jaegeuk.kim@samsung.com> | 2013-12-22 20:18:07 -0500 |
commit | bfad7c2d40332be6a1d7a89660bceb0f6ea1d73a (patch) | |
tree | 893b2d6ab84cdc2a7ae3b1381cf7c394711a0cce /fs/f2fs | |
parent | 216fbd64437452d23db54ae845916facd7215caa (diff) |
f2fs: introduce a new direct_IO write path
Previously, f2fs doesn't support direct IOs with high performance, which throws
every write requests via the buffered write path, resulting in highly
performance degradation due to memory opeations like copy_from_user.
This patch introduces a new direct IO path in which every write requests are
processed by generic blockdev_direct_IO() with enhanced get_block function.
The get_data_block() in f2fs handles:
1. if original data blocks are allocates, then give them to blockdev.
2. otherwise,
a. preallocate requested block addresses
b. do not use extent cache for better performance
c. give the block addresses to blockdev
This policy induces that:
- new allocated data are sequentially written to the disk
- updated data are randomly written to the disk.
- f2fs gives consistency on its file meta, not file data.
Reviewed-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/data.c | 152 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 2 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 23 |
3 files changed, 129 insertions, 48 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 15956fa584de..a0950bcbf568 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -570,74 +570,151 @@ repeat: | |||
570 | return page; | 570 | return page; |
571 | } | 571 | } |
572 | 572 | ||
573 | static int __allocate_data_block(struct dnode_of_data *dn) | ||
574 | { | ||
575 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | ||
576 | struct f2fs_summary sum; | ||
577 | block_t new_blkaddr; | ||
578 | struct node_info ni; | ||
579 | int type; | ||
580 | |||
581 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | ||
582 | return -EPERM; | ||
583 | if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) | ||
584 | return -ENOSPC; | ||
585 | |||
586 | __set_data_blkaddr(dn, NEW_ADDR); | ||
587 | dn->data_blkaddr = NEW_ADDR; | ||
588 | |||
589 | get_node_info(sbi, dn->nid, &ni); | ||
590 | set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); | ||
591 | |||
592 | type = CURSEG_WARM_DATA; | ||
593 | |||
594 | allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); | ||
595 | |||
596 | /* direct IO doesn't use extent cache to maximize the performance */ | ||
597 | set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); | ||
598 | update_extent_cache(new_blkaddr, dn); | ||
599 | clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); | ||
600 | |||
601 | dn->data_blkaddr = new_blkaddr; | ||
602 | return 0; | ||
603 | } | ||
604 | |||
573 | /* | 605 | /* |
574 | * This function should be used by the data read flow only where it | 606 | * This function should be used by the data read flow only where it |
575 | * does not check the "create" flag that indicates block allocation. | 607 | * does not check the "create" flag that indicates block allocation. |
576 | * The reason for this special functionality is to exploit VFS readahead | 608 | * The reason for this special functionality is to exploit VFS readahead |
577 | * mechanism. | 609 | * mechanism. |
578 | */ | 610 | */ |
579 | static int get_data_block_ro(struct inode *inode, sector_t iblock, | 611 | static int get_data_block(struct inode *inode, sector_t iblock, |
580 | struct buffer_head *bh_result, int create) | 612 | struct buffer_head *bh_result, int create) |
581 | { | 613 | { |
614 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
582 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; | 615 | unsigned int blkbits = inode->i_sb->s_blocksize_bits; |
583 | unsigned maxblocks = bh_result->b_size >> blkbits; | 616 | unsigned maxblocks = bh_result->b_size >> blkbits; |
584 | struct dnode_of_data dn; | 617 | struct dnode_of_data dn; |
585 | pgoff_t pgofs; | 618 | int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; |
586 | int err; | 619 | pgoff_t pgofs, end_offset; |
620 | int err = 0, ofs = 1; | ||
621 | bool allocated = false; | ||
587 | 622 | ||
588 | /* Get the page offset from the block offset(iblock) */ | 623 | /* Get the page offset from the block offset(iblock) */ |
589 | pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); | 624 | pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); |
590 | 625 | ||
591 | if (check_extent_cache(inode, pgofs, bh_result)) { | 626 | if (check_extent_cache(inode, pgofs, bh_result)) |
592 | trace_f2fs_get_data_block(inode, iblock, bh_result, 0); | 627 | goto out; |
593 | return 0; | 628 | |
594 | } | 629 | if (create) |
630 | f2fs_lock_op(sbi); | ||
595 | 631 | ||
596 | /* When reading holes, we need its node page */ | 632 | /* When reading holes, we need its node page */ |
597 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 633 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
598 | err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); | 634 | err = get_dnode_of_data(&dn, pgofs, mode); |
599 | if (err) { | 635 | if (err || dn.data_blkaddr == NEW_ADDR) { |
600 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); | 636 | if (err == -ENOENT) |
601 | return (err == -ENOENT) ? 0 : err; | 637 | err = 0; |
638 | goto unlock_out; | ||
602 | } | 639 | } |
603 | 640 | ||
604 | /* It does not support data allocation */ | 641 | if (dn.data_blkaddr != NULL_ADDR) { |
605 | f2fs_bug_on(create); | 642 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); |
606 | 643 | } else if (create) { | |
607 | if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { | 644 | err = __allocate_data_block(&dn); |
608 | int i; | 645 | if (err) |
609 | unsigned int end_offset; | 646 | goto put_out; |
610 | 647 | allocated = true; | |
648 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | ||
649 | } else { | ||
650 | goto put_out; | ||
651 | } | ||
652 | |||
653 | end_offset = IS_INODE(dn.node_page) ? | ||
654 | ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; | ||
655 | bh_result->b_size = (((size_t)1) << blkbits); | ||
656 | dn.ofs_in_node++; | ||
657 | pgofs++; | ||
658 | |||
659 | get_next: | ||
660 | if (dn.ofs_in_node >= end_offset) { | ||
661 | if (allocated) | ||
662 | sync_inode_page(&dn); | ||
663 | allocated = false; | ||
664 | f2fs_put_dnode(&dn); | ||
665 | |||
666 | set_new_dnode(&dn, inode, NULL, NULL, 0); | ||
667 | err = get_dnode_of_data(&dn, pgofs, mode); | ||
668 | if (err || dn.data_blkaddr == NEW_ADDR) { | ||
669 | if (err == -ENOENT) | ||
670 | err = 0; | ||
671 | goto unlock_out; | ||
672 | } | ||
611 | end_offset = IS_INODE(dn.node_page) ? | 673 | end_offset = IS_INODE(dn.node_page) ? |
612 | ADDRS_PER_INODE(F2FS_I(inode)) : | 674 | ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; |
613 | ADDRS_PER_BLOCK; | 675 | } |
614 | |||
615 | clear_buffer_new(bh_result); | ||
616 | 676 | ||
677 | if (maxblocks > (bh_result->b_size >> blkbits)) { | ||
678 | block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); | ||
679 | if (blkaddr == NULL_ADDR && create) { | ||
680 | err = __allocate_data_block(&dn); | ||
681 | if (err) | ||
682 | goto sync_out; | ||
683 | allocated = true; | ||
684 | blkaddr = dn.data_blkaddr; | ||
685 | } | ||
617 | /* Give more consecutive addresses for the read ahead */ | 686 | /* Give more consecutive addresses for the read ahead */ |
618 | for (i = 0; i < end_offset - dn.ofs_in_node; i++) | 687 | if (blkaddr == (bh_result->b_blocknr + ofs)) { |
619 | if (((datablock_addr(dn.node_page, | 688 | ofs++; |
620 | dn.ofs_in_node + i)) | 689 | dn.ofs_in_node++; |
621 | != (dn.data_blkaddr + i)) || maxblocks == i) | 690 | pgofs++; |
622 | break; | 691 | bh_result->b_size += (((size_t)1) << blkbits); |
623 | map_bh(bh_result, inode->i_sb, dn.data_blkaddr); | 692 | goto get_next; |
624 | bh_result->b_size = (((size_t)i) << blkbits); | 693 | } |
625 | } | 694 | } |
695 | sync_out: | ||
696 | if (allocated) | ||
697 | sync_inode_page(&dn); | ||
698 | put_out: | ||
626 | f2fs_put_dnode(&dn); | 699 | f2fs_put_dnode(&dn); |
627 | trace_f2fs_get_data_block(inode, iblock, bh_result, 0); | 700 | unlock_out: |
628 | return 0; | 701 | if (create) |
702 | f2fs_unlock_op(sbi); | ||
703 | out: | ||
704 | trace_f2fs_get_data_block(inode, iblock, bh_result, err); | ||
705 | return err; | ||
629 | } | 706 | } |
630 | 707 | ||
631 | static int f2fs_read_data_page(struct file *file, struct page *page) | 708 | static int f2fs_read_data_page(struct file *file, struct page *page) |
632 | { | 709 | { |
633 | return mpage_readpage(page, get_data_block_ro); | 710 | return mpage_readpage(page, get_data_block); |
634 | } | 711 | } |
635 | 712 | ||
636 | static int f2fs_read_data_pages(struct file *file, | 713 | static int f2fs_read_data_pages(struct file *file, |
637 | struct address_space *mapping, | 714 | struct address_space *mapping, |
638 | struct list_head *pages, unsigned nr_pages) | 715 | struct list_head *pages, unsigned nr_pages) |
639 | { | 716 | { |
640 | return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); | 717 | return mpage_readpages(mapping, pages, nr_pages, get_data_block); |
641 | } | 718 | } |
642 | 719 | ||
643 | int do_write_data_page(struct page *page, struct f2fs_io_info *fio) | 720 | int do_write_data_page(struct page *page, struct f2fs_io_info *fio) |
@@ -883,13 +960,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
883 | { | 960 | { |
884 | struct file *file = iocb->ki_filp; | 961 | struct file *file = iocb->ki_filp; |
885 | struct inode *inode = file->f_mapping->host; | 962 | struct inode *inode = file->f_mapping->host; |
886 | |||
887 | if (rw == WRITE) | ||
888 | return 0; | ||
889 | |||
890 | /* Needs synchronization with the cleaner */ | ||
891 | return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, | 963 | return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, |
892 | get_data_block_ro); | 964 | get_data_block); |
893 | } | 965 | } |
894 | 966 | ||
895 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, | 967 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, |
@@ -928,7 +1000,7 @@ static int f2fs_set_data_page_dirty(struct page *page) | |||
928 | 1000 | ||
929 | static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) | 1001 | static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) |
930 | { | 1002 | { |
931 | return generic_block_bmap(mapping, block, get_data_block_ro); | 1003 | return generic_block_bmap(mapping, block, get_data_block); |
932 | } | 1004 | } |
933 | 1005 | ||
934 | const struct address_space_operations f2fs_dblock_aops = { | 1006 | const struct address_space_operations f2fs_dblock_aops = { |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1b05a628670a..8cbc5a6bf484 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -1115,6 +1115,8 @@ void recover_data_page(struct f2fs_sb_info *, struct page *, | |||
1115 | struct f2fs_summary *, block_t, block_t); | 1115 | struct f2fs_summary *, block_t, block_t); |
1116 | void rewrite_node_page(struct f2fs_sb_info *, struct page *, | 1116 | void rewrite_node_page(struct f2fs_sb_info *, struct page *, |
1117 | struct f2fs_summary *, block_t, block_t); | 1117 | struct f2fs_summary *, block_t, block_t); |
1118 | void allocate_data_block(struct f2fs_sb_info *, struct page *, | ||
1119 | block_t, block_t *, struct f2fs_summary *, int); | ||
1118 | void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); | 1120 | void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); |
1119 | void write_data_summaries(struct f2fs_sb_info *, block_t); | 1121 | void write_data_summaries(struct f2fs_sb_info *, block_t); |
1120 | void write_node_summaries(struct f2fs_sb_info *, block_t); | 1122 | void write_node_summaries(struct f2fs_sb_info *, block_t); |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5b890ce74b15..9f8bdd02e3a8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -854,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type) | |||
854 | return __get_segment_type_6(page, p_type); | 854 | return __get_segment_type_6(page, p_type); |
855 | } | 855 | } |
856 | 856 | ||
857 | static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | 857 | void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, |
858 | block_t old_blkaddr, block_t *new_blkaddr, | 858 | block_t old_blkaddr, block_t *new_blkaddr, |
859 | struct f2fs_summary *sum, struct f2fs_io_info *fio) | 859 | struct f2fs_summary *sum, int type) |
860 | { | 860 | { |
861 | struct sit_info *sit_i = SIT_I(sbi); | 861 | struct sit_info *sit_i = SIT_I(sbi); |
862 | struct curseg_info *curseg; | 862 | struct curseg_info *curseg; |
863 | unsigned int old_cursegno; | 863 | unsigned int old_cursegno; |
864 | int type; | ||
865 | 864 | ||
866 | type = __get_segment_type(page, fio->type); | ||
867 | curseg = CURSEG_I(sbi, type); | 865 | curseg = CURSEG_I(sbi, type); |
868 | 866 | ||
869 | mutex_lock(&curseg->curseg_mutex); | 867 | mutex_lock(&curseg->curseg_mutex); |
@@ -896,13 +894,22 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | |||
896 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | 894 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); |
897 | mutex_unlock(&sit_i->sentry_lock); | 895 | mutex_unlock(&sit_i->sentry_lock); |
898 | 896 | ||
899 | if (fio->type == NODE) | 897 | if (page && IS_NODESEG(type)) |
900 | fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); | 898 | fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); |
901 | 899 | ||
900 | mutex_unlock(&curseg->curseg_mutex); | ||
901 | } | ||
902 | |||
903 | static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | ||
904 | block_t old_blkaddr, block_t *new_blkaddr, | ||
905 | struct f2fs_summary *sum, struct f2fs_io_info *fio) | ||
906 | { | ||
907 | int type = __get_segment_type(page, fio->type); | ||
908 | |||
909 | allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); | ||
910 | |||
902 | /* writeout dirty page into bdev */ | 911 | /* writeout dirty page into bdev */ |
903 | f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); | 912 | f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); |
904 | |||
905 | mutex_unlock(&curseg->curseg_mutex); | ||
906 | } | 913 | } |
907 | 914 | ||
908 | void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) | 915 | void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) |