diff options
author | Benjamin Marzinski <bmarzins@redhat.com> | 2011-09-12 19:15:24 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2011-10-21 07:39:49 -0400 |
commit | 64dd153c83743af81f20924c6343652d731eeecb (patch) | |
tree | e296566ece355c34cb10ad35ce35f43ca58fd036 /fs | |
parent | bd5437a7d4307a35f2c7cc19cad706ec0e5d61f0 (diff) |
GFS2: rewrite fallocate code to write blocks directly
GFS2's fallocate code currently goes through the page cache. Since it's only
writing to the end of the file or to holes in it, it doesn't need to, and it
was causing issues on low memory environments. This patch pulls in some of
Steve's block allocation work, and uses it to simply allocate the blocks for
the file, and zero them out at allocation time. It provides a slight
performance increase, and it dramatically simplifies the code.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/gfs2/bmap.c | 12 | ||||
-rw-r--r-- | fs/gfs2/file.c | 171 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 3 |
3 files changed, 39 insertions, 147 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 834cd9442a1d..97b61955850a 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
11 | #include <linux/completion.h> | 11 | #include <linux/completion.h> |
12 | #include <linux/buffer_head.h> | 12 | #include <linux/buffer_head.h> |
13 | #include <linux/blkdev.h> | ||
13 | #include <linux/gfs2_ondisk.h> | 14 | #include <linux/gfs2_ondisk.h> |
14 | #include <linux/crc32.h> | 15 | #include <linux/crc32.h> |
15 | 16 | ||
@@ -427,12 +428,14 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
427 | { | 428 | { |
428 | struct gfs2_inode *ip = GFS2_I(inode); | 429 | struct gfs2_inode *ip = GFS2_I(inode); |
429 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 430 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
431 | struct super_block *sb = sdp->sd_vfs; | ||
430 | struct buffer_head *dibh = mp->mp_bh[0]; | 432 | struct buffer_head *dibh = mp->mp_bh[0]; |
431 | u64 bn, dblock = 0; | 433 | u64 bn, dblock = 0; |
432 | unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; | 434 | unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; |
433 | unsigned dblks = 0; | 435 | unsigned dblks = 0; |
434 | unsigned ptrs_per_blk; | 436 | unsigned ptrs_per_blk; |
435 | const unsigned end_of_metadata = height - 1; | 437 | const unsigned end_of_metadata = height - 1; |
438 | int ret; | ||
436 | int eob = 0; | 439 | int eob = 0; |
437 | enum alloc_state state; | 440 | enum alloc_state state; |
438 | __be64 *ptr; | 441 | __be64 *ptr; |
@@ -535,6 +538,15 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | |||
535 | dblock = bn; | 538 | dblock = bn; |
536 | while (n-- > 0) | 539 | while (n-- > 0) |
537 | *ptr++ = cpu_to_be64(bn++); | 540 | *ptr++ = cpu_to_be64(bn++); |
541 | if (buffer_zeronew(bh_map)) { | ||
542 | ret = sb_issue_zeroout(sb, dblock, dblks, | ||
543 | GFP_NOFS); | ||
544 | if (ret) { | ||
545 | fs_err(sdp, | ||
546 | "Failed to zero data buffers\n"); | ||
547 | clear_buffer_zeronew(bh_map); | ||
548 | } | ||
549 | } | ||
538 | break; | 550 | break; |
539 | } | 551 | } |
540 | } while ((state != ALLOC_DATA) || !dblock); | 552 | } while ((state != ALLOC_DATA) || !dblock); |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index aa3a4ddb834e..5002408dabea 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -669,135 +669,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
669 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 669 | return generic_file_aio_write(iocb, iov, nr_segs, pos); |
670 | } | 670 | } |
671 | 671 | ||
672 | static int empty_write_end(struct page *page, unsigned from, | ||
673 | unsigned to, int mode) | ||
674 | { | ||
675 | struct inode *inode = page->mapping->host; | ||
676 | struct gfs2_inode *ip = GFS2_I(inode); | ||
677 | struct buffer_head *bh; | ||
678 | unsigned offset, blksize = 1 << inode->i_blkbits; | ||
679 | pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; | ||
680 | |||
681 | zero_user(page, from, to-from); | ||
682 | mark_page_accessed(page); | ||
683 | |||
684 | if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
685 | if (!gfs2_is_writeback(ip)) | ||
686 | gfs2_page_add_databufs(ip, page, from, to); | ||
687 | |||
688 | block_commit_write(page, from, to); | ||
689 | return 0; | ||
690 | } | ||
691 | |||
692 | offset = 0; | ||
693 | bh = page_buffers(page); | ||
694 | while (offset < to) { | ||
695 | if (offset >= from) { | ||
696 | set_buffer_uptodate(bh); | ||
697 | mark_buffer_dirty(bh); | ||
698 | clear_buffer_new(bh); | ||
699 | write_dirty_buffer(bh, WRITE); | ||
700 | } | ||
701 | offset += blksize; | ||
702 | bh = bh->b_this_page; | ||
703 | } | ||
704 | |||
705 | offset = 0; | ||
706 | bh = page_buffers(page); | ||
707 | while (offset < to) { | ||
708 | if (offset >= from) { | ||
709 | wait_on_buffer(bh); | ||
710 | if (!buffer_uptodate(bh)) | ||
711 | return -EIO; | ||
712 | } | ||
713 | offset += blksize; | ||
714 | bh = bh->b_this_page; | ||
715 | } | ||
716 | return 0; | ||
717 | } | ||
718 | |||
719 | static int needs_empty_write(sector_t block, struct inode *inode) | ||
720 | { | ||
721 | int error; | ||
722 | struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; | ||
723 | |||
724 | bh_map.b_size = 1 << inode->i_blkbits; | ||
725 | error = gfs2_block_map(inode, block, &bh_map, 0); | ||
726 | if (unlikely(error)) | ||
727 | return error; | ||
728 | return !buffer_mapped(&bh_map); | ||
729 | } | ||
730 | |||
731 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to, | ||
732 | int mode) | ||
733 | { | ||
734 | struct inode *inode = page->mapping->host; | ||
735 | unsigned start, end, next, blksize; | ||
736 | sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
737 | int ret; | ||
738 | |||
739 | blksize = 1 << inode->i_blkbits; | ||
740 | next = end = 0; | ||
741 | while (next < from) { | ||
742 | next += blksize; | ||
743 | block++; | ||
744 | } | ||
745 | start = next; | ||
746 | do { | ||
747 | next += blksize; | ||
748 | ret = needs_empty_write(block, inode); | ||
749 | if (unlikely(ret < 0)) | ||
750 | return ret; | ||
751 | if (ret == 0) { | ||
752 | if (end) { | ||
753 | ret = __block_write_begin(page, start, end - start, | ||
754 | gfs2_block_map); | ||
755 | if (unlikely(ret)) | ||
756 | return ret; | ||
757 | ret = empty_write_end(page, start, end, mode); | ||
758 | if (unlikely(ret)) | ||
759 | return ret; | ||
760 | end = 0; | ||
761 | } | ||
762 | start = next; | ||
763 | } | ||
764 | else | ||
765 | end = next; | ||
766 | block++; | ||
767 | } while (next < to); | ||
768 | |||
769 | if (end) { | ||
770 | ret = __block_write_begin(page, start, end - start, gfs2_block_map); | ||
771 | if (unlikely(ret)) | ||
772 | return ret; | ||
773 | ret = empty_write_end(page, start, end, mode); | ||
774 | if (unlikely(ret)) | ||
775 | return ret; | ||
776 | } | ||
777 | |||
778 | return 0; | ||
779 | } | ||
780 | |||
781 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | 672 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, |
782 | int mode) | 673 | int mode) |
783 | { | 674 | { |
784 | struct gfs2_inode *ip = GFS2_I(inode); | 675 | struct gfs2_inode *ip = GFS2_I(inode); |
785 | struct buffer_head *dibh; | 676 | struct buffer_head *dibh; |
786 | int error; | 677 | int error; |
787 | u64 start = offset >> PAGE_CACHE_SHIFT; | 678 | unsigned int nr_blks; |
788 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | 679 | sector_t lblock = offset >> inode->i_blkbits; |
789 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
790 | pgoff_t curr; | ||
791 | struct page *page; | ||
792 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
793 | unsigned int from, to; | ||
794 | |||
795 | if (!end_offset) | ||
796 | end_offset = PAGE_CACHE_SIZE; | ||
797 | 680 | ||
798 | error = gfs2_meta_inode_buffer(ip, &dibh); | 681 | error = gfs2_meta_inode_buffer(ip, &dibh); |
799 | if (unlikely(error)) | 682 | if (unlikely(error)) |
800 | goto out; | 683 | return error; |
801 | 684 | ||
802 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 685 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
803 | 686 | ||
@@ -807,39 +690,31 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | |||
807 | goto out; | 690 | goto out; |
808 | } | 691 | } |
809 | 692 | ||
810 | curr = start; | 693 | while (len) { |
811 | offset = start << PAGE_CACHE_SHIFT; | 694 | struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; |
812 | from = start_offset; | 695 | bh_map.b_size = len; |
813 | to = PAGE_CACHE_SIZE; | 696 | set_buffer_zeronew(&bh_map); |
814 | while (curr <= end) { | ||
815 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
816 | AOP_FLAG_NOFS); | ||
817 | if (unlikely(!page)) { | ||
818 | error = -ENOMEM; | ||
819 | goto out; | ||
820 | } | ||
821 | 697 | ||
822 | if (curr == end) | 698 | error = gfs2_block_map(inode, lblock, &bh_map, 1); |
823 | to = end_offset; | 699 | if (unlikely(error)) |
824 | error = write_empty_blocks(page, from, to, mode); | ||
825 | if (!error && offset + to > inode->i_size && | ||
826 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
827 | i_size_write(inode, offset + to); | ||
828 | } | ||
829 | unlock_page(page); | ||
830 | page_cache_release(page); | ||
831 | if (error) | ||
832 | goto out; | 700 | goto out; |
833 | curr++; | 701 | len -= bh_map.b_size; |
834 | offset += PAGE_CACHE_SIZE; | 702 | nr_blks = bh_map.b_size >> inode->i_blkbits; |
835 | from = 0; | 703 | lblock += nr_blks; |
704 | if (!buffer_new(&bh_map)) | ||
705 | continue; | ||
706 | if (unlikely(!buffer_zeronew(&bh_map))) { | ||
707 | error = -EIO; | ||
708 | goto out; | ||
709 | } | ||
836 | } | 710 | } |
711 | if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) | ||
712 | i_size_write(inode, offset + len); | ||
837 | 713 | ||
838 | mark_inode_dirty(inode); | 714 | mark_inode_dirty(inode); |
839 | 715 | ||
840 | brelse(dibh); | ||
841 | |||
842 | out: | 716 | out: |
717 | brelse(dibh); | ||
843 | return error; | 718 | return error; |
844 | } | 719 | } |
845 | 720 | ||
@@ -879,6 +754,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
879 | int error; | 754 | int error; |
880 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); | 755 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); |
881 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | 756 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; |
757 | loff_t max_chunk_size = UINT_MAX & bsize_mask; | ||
882 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | 758 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; |
883 | 759 | ||
884 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 760 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ |
@@ -932,7 +808,8 @@ retry: | |||
932 | goto out_qunlock; | 808 | goto out_qunlock; |
933 | } | 809 | } |
934 | max_bytes = bytes; | 810 | max_bytes = bytes; |
935 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | 811 | calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, |
812 | &max_bytes, &data_blocks, &ind_blocks); | ||
936 | al->al_requested = data_blocks + ind_blocks; | 813 | al->al_requested = data_blocks + ind_blocks; |
937 | 814 | ||
938 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | 815 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 55e335b52839..6429aa4339ff 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -103,12 +103,15 @@ struct gfs2_rgrpd { | |||
103 | enum gfs2_state_bits { | 103 | enum gfs2_state_bits { |
104 | BH_Pinned = BH_PrivateStart, | 104 | BH_Pinned = BH_PrivateStart, |
105 | BH_Escaped = BH_PrivateStart + 1, | 105 | BH_Escaped = BH_PrivateStart + 1, |
106 | BH_Zeronew = BH_PrivateStart + 2, | ||
106 | }; | 107 | }; |
107 | 108 | ||
108 | BUFFER_FNS(Pinned, pinned) | 109 | BUFFER_FNS(Pinned, pinned) |
109 | TAS_BUFFER_FNS(Pinned, pinned) | 110 | TAS_BUFFER_FNS(Pinned, pinned) |
110 | BUFFER_FNS(Escaped, escaped) | 111 | BUFFER_FNS(Escaped, escaped) |
111 | TAS_BUFFER_FNS(Escaped, escaped) | 112 | TAS_BUFFER_FNS(Escaped, escaped) |
113 | BUFFER_FNS(Zeronew, zeronew) | ||
114 | TAS_BUFFER_FNS(Zeronew, zeronew) | ||
112 | 115 | ||
113 | struct gfs2_bufdata { | 116 | struct gfs2_bufdata { |
114 | struct buffer_head *bd_bh; | 117 | struct buffer_head *bd_bh; |