aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk.kim@samsung.com>2013-12-16 05:04:05 -0500
committerJaegeuk Kim <jaegeuk.kim@samsung.com>2013-12-22 20:18:07 -0500
commitbfad7c2d40332be6a1d7a89660bceb0f6ea1d73a (patch)
tree893b2d6ab84cdc2a7ae3b1381cf7c394711a0cce /fs/f2fs
parent216fbd64437452d23db54ae845916facd7215caa (diff)
f2fs: introduce a new direct_IO write path
Previously, f2fs doesn't support direct IOs with high performance, which throws every write requests via the buffered write path, resulting in highly performance degradation due to memory opeations like copy_from_user. This patch introduces a new direct IO path in which every write requests are processed by generic blockdev_direct_IO() with enhanced get_block function. The get_data_block() in f2fs handles: 1. if original data blocks are allocates, then give them to blockdev. 2. otherwise, a. preallocate requested block addresses b. do not use extent cache for better performance c. give the block addresses to blockdev This policy induces that: - new allocated data are sequentially written to the disk - updated data are randomly written to the disk. - f2fs gives consistency on its file meta, not file data. Reviewed-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/data.c152
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/segment.c23
3 files changed, 129 insertions, 48 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 15956fa584de..a0950bcbf568 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -570,74 +570,151 @@ repeat:
570 return page; 570 return page;
571} 571}
572 572
573static int __allocate_data_block(struct dnode_of_data *dn)
574{
575 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
576 struct f2fs_summary sum;
577 block_t new_blkaddr;
578 struct node_info ni;
579 int type;
580
581 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
582 return -EPERM;
583 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
584 return -ENOSPC;
585
586 __set_data_blkaddr(dn, NEW_ADDR);
587 dn->data_blkaddr = NEW_ADDR;
588
589 get_node_info(sbi, dn->nid, &ni);
590 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
591
592 type = CURSEG_WARM_DATA;
593
594 allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
595
596 /* direct IO doesn't use extent cache to maximize the performance */
597 set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
598 update_extent_cache(new_blkaddr, dn);
599 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
600
601 dn->data_blkaddr = new_blkaddr;
602 return 0;
603}
604
573/* 605/*
574 * This function should be used by the data read flow only where it 606 * This function should be used by the data read flow only where it
575 * does not check the "create" flag that indicates block allocation. 607 * does not check the "create" flag that indicates block allocation.
576 * The reason for this special functionality is to exploit VFS readahead 608 * The reason for this special functionality is to exploit VFS readahead
577 * mechanism. 609 * mechanism.
578 */ 610 */
579static int get_data_block_ro(struct inode *inode, sector_t iblock, 611static int get_data_block(struct inode *inode, sector_t iblock,
580 struct buffer_head *bh_result, int create) 612 struct buffer_head *bh_result, int create)
581{ 613{
614 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
582 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 615 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
583 unsigned maxblocks = bh_result->b_size >> blkbits; 616 unsigned maxblocks = bh_result->b_size >> blkbits;
584 struct dnode_of_data dn; 617 struct dnode_of_data dn;
585 pgoff_t pgofs; 618 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
586 int err; 619 pgoff_t pgofs, end_offset;
620 int err = 0, ofs = 1;
621 bool allocated = false;
587 622
588 /* Get the page offset from the block offset(iblock) */ 623 /* Get the page offset from the block offset(iblock) */
589 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 624 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
590 625
591 if (check_extent_cache(inode, pgofs, bh_result)) { 626 if (check_extent_cache(inode, pgofs, bh_result))
592 trace_f2fs_get_data_block(inode, iblock, bh_result, 0); 627 goto out;
593 return 0; 628
594 } 629 if (create)
630 f2fs_lock_op(sbi);
595 631
596 /* When reading holes, we need its node page */ 632 /* When reading holes, we need its node page */
597 set_new_dnode(&dn, inode, NULL, NULL, 0); 633 set_new_dnode(&dn, inode, NULL, NULL, 0);
598 err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); 634 err = get_dnode_of_data(&dn, pgofs, mode);
599 if (err) { 635 if (err || dn.data_blkaddr == NEW_ADDR) {
600 trace_f2fs_get_data_block(inode, iblock, bh_result, err); 636 if (err == -ENOENT)
601 return (err == -ENOENT) ? 0 : err; 637 err = 0;
638 goto unlock_out;
602 } 639 }
603 640
604 /* It does not support data allocation */ 641 if (dn.data_blkaddr != NULL_ADDR) {
605 f2fs_bug_on(create); 642 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
606 643 } else if (create) {
607 if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { 644 err = __allocate_data_block(&dn);
608 int i; 645 if (err)
609 unsigned int end_offset; 646 goto put_out;
610 647 allocated = true;
648 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
649 } else {
650 goto put_out;
651 }
652
653 end_offset = IS_INODE(dn.node_page) ?
654 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
655 bh_result->b_size = (((size_t)1) << blkbits);
656 dn.ofs_in_node++;
657 pgofs++;
658
659get_next:
660 if (dn.ofs_in_node >= end_offset) {
661 if (allocated)
662 sync_inode_page(&dn);
663 allocated = false;
664 f2fs_put_dnode(&dn);
665
666 set_new_dnode(&dn, inode, NULL, NULL, 0);
667 err = get_dnode_of_data(&dn, pgofs, mode);
668 if (err || dn.data_blkaddr == NEW_ADDR) {
669 if (err == -ENOENT)
670 err = 0;
671 goto unlock_out;
672 }
611 end_offset = IS_INODE(dn.node_page) ? 673 end_offset = IS_INODE(dn.node_page) ?
612 ADDRS_PER_INODE(F2FS_I(inode)) : 674 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
613 ADDRS_PER_BLOCK; 675 }
614
615 clear_buffer_new(bh_result);
616 676
677 if (maxblocks > (bh_result->b_size >> blkbits)) {
678 block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
679 if (blkaddr == NULL_ADDR && create) {
680 err = __allocate_data_block(&dn);
681 if (err)
682 goto sync_out;
683 allocated = true;
684 blkaddr = dn.data_blkaddr;
685 }
617 /* Give more consecutive addresses for the read ahead */ 686 /* Give more consecutive addresses for the read ahead */
618 for (i = 0; i < end_offset - dn.ofs_in_node; i++) 687 if (blkaddr == (bh_result->b_blocknr + ofs)) {
619 if (((datablock_addr(dn.node_page, 688 ofs++;
620 dn.ofs_in_node + i)) 689 dn.ofs_in_node++;
621 != (dn.data_blkaddr + i)) || maxblocks == i) 690 pgofs++;
622 break; 691 bh_result->b_size += (((size_t)1) << blkbits);
623 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 692 goto get_next;
624 bh_result->b_size = (((size_t)i) << blkbits); 693 }
625 } 694 }
695sync_out:
696 if (allocated)
697 sync_inode_page(&dn);
698put_out:
626 f2fs_put_dnode(&dn); 699 f2fs_put_dnode(&dn);
627 trace_f2fs_get_data_block(inode, iblock, bh_result, 0); 700unlock_out:
628 return 0; 701 if (create)
702 f2fs_unlock_op(sbi);
703out:
704 trace_f2fs_get_data_block(inode, iblock, bh_result, err);
705 return err;
629} 706}
630 707
631static int f2fs_read_data_page(struct file *file, struct page *page) 708static int f2fs_read_data_page(struct file *file, struct page *page)
632{ 709{
633 return mpage_readpage(page, get_data_block_ro); 710 return mpage_readpage(page, get_data_block);
634} 711}
635 712
636static int f2fs_read_data_pages(struct file *file, 713static int f2fs_read_data_pages(struct file *file,
637 struct address_space *mapping, 714 struct address_space *mapping,
638 struct list_head *pages, unsigned nr_pages) 715 struct list_head *pages, unsigned nr_pages)
639{ 716{
640 return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); 717 return mpage_readpages(mapping, pages, nr_pages, get_data_block);
641} 718}
642 719
643int do_write_data_page(struct page *page, struct f2fs_io_info *fio) 720int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
@@ -883,13 +960,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
883{ 960{
884 struct file *file = iocb->ki_filp; 961 struct file *file = iocb->ki_filp;
885 struct inode *inode = file->f_mapping->host; 962 struct inode *inode = file->f_mapping->host;
886
887 if (rw == WRITE)
888 return 0;
889
890 /* Needs synchronization with the cleaner */
891 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 963 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
892 get_data_block_ro); 964 get_data_block);
893} 965}
894 966
895static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 967static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
@@ -928,7 +1000,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
928 1000
929static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 1001static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
930{ 1002{
931 return generic_block_bmap(mapping, block, get_data_block_ro); 1003 return generic_block_bmap(mapping, block, get_data_block);
932} 1004}
933 1005
934const struct address_space_operations f2fs_dblock_aops = { 1006const struct address_space_operations f2fs_dblock_aops = {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1b05a628670a..8cbc5a6bf484 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1115,6 +1115,8 @@ void recover_data_page(struct f2fs_sb_info *, struct page *,
1115 struct f2fs_summary *, block_t, block_t); 1115 struct f2fs_summary *, block_t, block_t);
1116void rewrite_node_page(struct f2fs_sb_info *, struct page *, 1116void rewrite_node_page(struct f2fs_sb_info *, struct page *,
1117 struct f2fs_summary *, block_t, block_t); 1117 struct f2fs_summary *, block_t, block_t);
1118void allocate_data_block(struct f2fs_sb_info *, struct page *,
1119 block_t, block_t *, struct f2fs_summary *, int);
1118void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); 1120void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
1119void write_data_summaries(struct f2fs_sb_info *, block_t); 1121void write_data_summaries(struct f2fs_sb_info *, block_t);
1120void write_node_summaries(struct f2fs_sb_info *, block_t); 1122void write_node_summaries(struct f2fs_sb_info *, block_t);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5b890ce74b15..9f8bdd02e3a8 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -854,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
854 return __get_segment_type_6(page, p_type); 854 return __get_segment_type_6(page, p_type);
855} 855}
856 856
857static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, 857void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
858 block_t old_blkaddr, block_t *new_blkaddr, 858 block_t old_blkaddr, block_t *new_blkaddr,
859 struct f2fs_summary *sum, struct f2fs_io_info *fio) 859 struct f2fs_summary *sum, int type)
860{ 860{
861 struct sit_info *sit_i = SIT_I(sbi); 861 struct sit_info *sit_i = SIT_I(sbi);
862 struct curseg_info *curseg; 862 struct curseg_info *curseg;
863 unsigned int old_cursegno; 863 unsigned int old_cursegno;
864 int type;
865 864
866 type = __get_segment_type(page, fio->type);
867 curseg = CURSEG_I(sbi, type); 865 curseg = CURSEG_I(sbi, type);
868 866
869 mutex_lock(&curseg->curseg_mutex); 867 mutex_lock(&curseg->curseg_mutex);
@@ -896,13 +894,22 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
896 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 894 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
897 mutex_unlock(&sit_i->sentry_lock); 895 mutex_unlock(&sit_i->sentry_lock);
898 896
899 if (fio->type == NODE) 897 if (page && IS_NODESEG(type))
900 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 898 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
901 899
900 mutex_unlock(&curseg->curseg_mutex);
901}
902
903static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
904 block_t old_blkaddr, block_t *new_blkaddr,
905 struct f2fs_summary *sum, struct f2fs_io_info *fio)
906{
907 int type = __get_segment_type(page, fio->type);
908
909 allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
910
902 /* writeout dirty page into bdev */ 911 /* writeout dirty page into bdev */
903 f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); 912 f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
904
905 mutex_unlock(&curseg->curseg_mutex);
906} 913}
907 914
908void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) 915void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)