diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 1198 |
1 files changed, 532 insertions, 666 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4b8debeb3965..e3126c051006 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -39,7 +39,9 @@ | |||
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> |
41 | #include <linux/kernel.h> | 41 | #include <linux/kernel.h> |
42 | #include <linux/printk.h> | ||
42 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/ratelimit.h> | ||
43 | 45 | ||
44 | #include "ext4_jbd2.h" | 46 | #include "ext4_jbd2.h" |
45 | #include "xattr.h" | 47 | #include "xattr.h" |
@@ -53,13 +55,27 @@ | |||
53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 55 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
54 | loff_t new_size) | 56 | loff_t new_size) |
55 | { | 57 | { |
56 | return jbd2_journal_begin_ordered_truncate( | 58 | trace_ext4_begin_ordered_truncate(inode, new_size); |
57 | EXT4_SB(inode->i_sb)->s_journal, | 59 | /* |
58 | &EXT4_I(inode)->jinode, | 60 | * If jinode is zero, then we never opened the file for |
59 | new_size); | 61 | * writing, so there's no need to call |
62 | * jbd2_journal_begin_ordered_truncate() since there's no | ||
63 | * outstanding writes we need to flush. | ||
64 | */ | ||
65 | if (!EXT4_I(inode)->jinode) | ||
66 | return 0; | ||
67 | return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), | ||
68 | EXT4_I(inode)->jinode, | ||
69 | new_size); | ||
60 | } | 70 | } |
61 | 71 | ||
62 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 72 | static void ext4_invalidatepage(struct page *page, unsigned long offset); |
73 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | ||
74 | struct buffer_head *bh_result, int create); | ||
75 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
76 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
77 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | ||
78 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | ||
63 | 79 | ||
64 | /* | 80 | /* |
65 | * Test whether an inode is a fast symlink. | 81 | * Test whether an inode is a fast symlink. |
@@ -157,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
157 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
158 | jbd_debug(2, "restarting handle %p\n", handle); | 174 | jbd_debug(2, "restarting handle %p\n", handle); |
159 | up_write(&EXT4_I(inode)->i_data_sem); | 175 | up_write(&EXT4_I(inode)->i_data_sem); |
160 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | 176 | ret = ext4_journal_restart(handle, nblocks); |
161 | down_write(&EXT4_I(inode)->i_data_sem); | 177 | down_write(&EXT4_I(inode)->i_data_sem); |
162 | ext4_discard_preallocations(inode); | 178 | ext4_discard_preallocations(inode); |
163 | 179 | ||
@@ -172,6 +188,7 @@ void ext4_evict_inode(struct inode *inode) | |||
172 | handle_t *handle; | 188 | handle_t *handle; |
173 | int err; | 189 | int err; |
174 | 190 | ||
191 | trace_ext4_evict_inode(inode); | ||
175 | if (inode->i_nlink) { | 192 | if (inode->i_nlink) { |
176 | truncate_inode_pages(&inode->i_data, 0); | 193 | truncate_inode_pages(&inode->i_data, 0); |
177 | goto no_delete; | 194 | goto no_delete; |
@@ -544,7 +561,7 @@ static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | |||
544 | } | 561 | } |
545 | 562 | ||
546 | /** | 563 | /** |
547 | * ext4_blks_to_allocate: Look up the block map and count the number | 564 | * ext4_blks_to_allocate - Look up the block map and count the number |
548 | * of direct blocks need to be allocated for the given branch. | 565 | * of direct blocks need to be allocated for the given branch. |
549 | * | 566 | * |
550 | * @branch: chain of indirect blocks | 567 | * @branch: chain of indirect blocks |
@@ -583,13 +600,19 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, | |||
583 | 600 | ||
584 | /** | 601 | /** |
585 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch | 602 | * ext4_alloc_blocks: multiple allocate blocks needed for a branch |
603 | * @handle: handle for this transaction | ||
604 | * @inode: inode which needs allocated blocks | ||
605 | * @iblock: the logical block to start allocated at | ||
606 | * @goal: preferred physical block of allocation | ||
586 | * @indirect_blks: the number of blocks need to allocate for indirect | 607 | * @indirect_blks: the number of blocks need to allocate for indirect |
587 | * blocks | 608 | * blocks |
588 | * | 609 | * @blks: number of desired blocks |
589 | * @new_blocks: on return it will store the new block numbers for | 610 | * @new_blocks: on return it will store the new block numbers for |
590 | * the indirect blocks(if needed) and the first direct block, | 611 | * the indirect blocks(if needed) and the first direct block, |
591 | * @blks: on return it will store the total number of allocated | 612 | * @err: on return it will store the error code |
592 | * direct blocks | 613 | * |
614 | * This function will return the number of blocks allocated as | ||
615 | * requested by the passed-in parameters. | ||
593 | */ | 616 | */ |
594 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | 617 | static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, |
595 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 618 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
@@ -616,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
616 | while (target > 0) { | 639 | while (target > 0) { |
617 | count = target; | 640 | count = target; |
618 | /* allocating blocks for indirect blocks and direct blocks */ | 641 | /* allocating blocks for indirect blocks and direct blocks */ |
619 | current_block = ext4_new_meta_blocks(handle, inode, | 642 | current_block = ext4_new_meta_blocks(handle, inode, goal, |
620 | goal, &count, err); | 643 | 0, &count, err); |
621 | if (*err) | 644 | if (*err) |
622 | goto failed_out; | 645 | goto failed_out; |
623 | 646 | ||
@@ -697,15 +720,17 @@ allocated: | |||
697 | return ret; | 720 | return ret; |
698 | failed_out: | 721 | failed_out: |
699 | for (i = 0; i < index; i++) | 722 | for (i = 0; i < index; i++) |
700 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
701 | return ret; | 724 | return ret; |
702 | } | 725 | } |
703 | 726 | ||
704 | /** | 727 | /** |
705 | * ext4_alloc_branch - allocate and set up a chain of blocks. | 728 | * ext4_alloc_branch - allocate and set up a chain of blocks. |
729 | * @handle: handle for this transaction | ||
706 | * @inode: owner | 730 | * @inode: owner |
707 | * @indirect_blks: number of allocated indirect blocks | 731 | * @indirect_blks: number of allocated indirect blocks |
708 | * @blks: number of allocated direct blocks | 732 | * @blks: number of allocated direct blocks |
733 | * @goal: preferred place for allocation | ||
709 | * @offsets: offsets (in the blocks) to store the pointers to next. | 734 | * @offsets: offsets (in the blocks) to store the pointers to next. |
710 | * @branch: place to store the chain in. | 735 | * @branch: place to store the chain in. |
711 | * | 736 | * |
@@ -755,6 +780,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
755 | * parent to disk. | 780 | * parent to disk. |
756 | */ | 781 | */ |
757 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); | 782 | bh = sb_getblk(inode->i_sb, new_blocks[n-1]); |
783 | if (unlikely(!bh)) { | ||
784 | err = -EIO; | ||
785 | goto failed; | ||
786 | } | ||
787 | |||
758 | branch[n].bh = bh; | 788 | branch[n].bh = bh; |
759 | lock_buffer(bh); | 789 | lock_buffer(bh); |
760 | BUFFER_TRACE(bh, "call get_create_access"); | 790 | BUFFER_TRACE(bh, "call get_create_access"); |
@@ -793,26 +823,27 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
793 | return err; | 823 | return err; |
794 | failed: | 824 | failed: |
795 | /* Allocation failed, free what we already allocated */ | 825 | /* Allocation failed, free what we already allocated */ |
796 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); |
797 | for (i = 1; i <= n ; i++) { | 827 | for (i = 1; i <= n ; i++) { |
798 | /* | 828 | /* |
799 | * branch[i].bh is newly allocated, so there is no | 829 | * branch[i].bh is newly allocated, so there is no |
800 | * need to revoke the block, which is why we don't | 830 | * need to revoke the block, which is why we don't |
801 | * need to set EXT4_FREE_BLOCKS_METADATA. | 831 | * need to set EXT4_FREE_BLOCKS_METADATA. |
802 | */ | 832 | */ |
803 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, | 833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, |
804 | EXT4_FREE_BLOCKS_FORGET); | 834 | EXT4_FREE_BLOCKS_FORGET); |
805 | } | 835 | } |
806 | for (i = n+1; i < indirect_blks; i++) | 836 | for (i = n+1; i < indirect_blks; i++) |
807 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
808 | 838 | ||
809 | ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); | 839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); |
810 | 840 | ||
811 | return err; | 841 | return err; |
812 | } | 842 | } |
813 | 843 | ||
814 | /** | 844 | /** |
815 | * ext4_splice_branch - splice the allocated branch onto inode. | 845 | * ext4_splice_branch - splice the allocated branch onto inode. |
846 | * @handle: handle for this transaction | ||
816 | * @inode: owner | 847 | * @inode: owner |
817 | * @block: (logical) number of block we are adding | 848 | * @block: (logical) number of block we are adding |
818 | * @chain: chain of indirect blocks (with a missing link - see | 849 | * @chain: chain of indirect blocks (with a missing link - see |
@@ -893,7 +924,7 @@ err_out: | |||
893 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, |
894 | EXT4_FREE_BLOCKS_FORGET); | 925 | EXT4_FREE_BLOCKS_FORGET); |
895 | } | 926 | } |
896 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), | 927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), |
897 | blks, 0); | 928 | blks, 0); |
898 | 929 | ||
899 | return err; | 930 | return err; |
@@ -942,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
942 | int count = 0; | 973 | int count = 0; |
943 | ext4_fsblk_t first_block = 0; | 974 | ext4_fsblk_t first_block = 0; |
944 | 975 | ||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
945 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
946 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
947 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
@@ -1027,6 +1059,8 @@ cleanup: | |||
1027 | partial--; | 1059 | partial--; |
1028 | } | 1060 | } |
1029 | out: | 1061 | out: |
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1063 | map->m_pblk, map->m_len, err); | ||
1030 | return err; | 1064 | return err; |
1031 | } | 1065 | } |
1032 | 1066 | ||
@@ -1068,7 +1102,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, | |||
1068 | * Calculate the number of metadata blocks need to reserve | 1102 | * Calculate the number of metadata blocks need to reserve |
1069 | * to allocate a block located at @lblock | 1103 | * to allocate a block located at @lblock |
1070 | */ | 1104 | */ |
1071 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) | 1105 | static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) |
1072 | { | 1106 | { |
1073 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1107 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1074 | return ext4_ext_calc_metadata_amount(inode, lblock); | 1108 | return ext4_ext_calc_metadata_amount(inode, lblock); |
@@ -1207,8 +1241,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1207 | break; | 1241 | break; |
1208 | idx++; | 1242 | idx++; |
1209 | num++; | 1243 | num++; |
1210 | if (num >= max_pages) | 1244 | if (num >= max_pages) { |
1245 | done = 1; | ||
1211 | break; | 1246 | break; |
1247 | } | ||
1212 | } | 1248 | } |
1213 | pagevec_release(&pvec); | 1249 | pagevec_release(&pvec); |
1214 | } | 1250 | } |
@@ -1305,7 +1341,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1305 | * avoid double accounting | 1341 | * avoid double accounting |
1306 | */ | 1342 | */ |
1307 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1343 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1308 | EXT4_I(inode)->i_delalloc_reserved_flag = 1; | 1344 | ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
1309 | /* | 1345 | /* |
1310 | * We need to check for EXT4 here because migrate | 1346 | * We need to check for EXT4 here because migrate |
1311 | * could have changed the inode type in between | 1347 | * could have changed the inode type in between |
@@ -1335,7 +1371,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1335 | ext4_da_update_reserve_space(inode, retval, 1); | 1371 | ext4_da_update_reserve_space(inode, retval, 1); |
1336 | } | 1372 | } |
1337 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 1373 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) |
1338 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1374 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
1339 | 1375 | ||
1340 | up_write((&EXT4_I(inode)->i_data_sem)); | 1376 | up_write((&EXT4_I(inode)->i_data_sem)); |
1341 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1377 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
@@ -1538,10 +1574,10 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1574 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1539 | return 0; | 1575 | return 0; |
1540 | /* | 1576 | /* |
1541 | * __block_prepare_write() could have dirtied some buffers. Clean | 1577 | * __block_write_begin() could have dirtied some buffers. Clean |
1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | 1578 | * the dirty bit as jbd2_journal_get_write_access() could complain |
1543 | * otherwise about fs integrity issues. Setting of the dirty bit | 1579 | * otherwise about fs integrity issues. Setting of the dirty bit |
1544 | * by __block_prepare_write() isn't a real problem here as we clear | 1580 | * by __block_write_begin() isn't a real problem here as we clear |
1545 | * the bit before releasing a page lock and thus writeback cannot | 1581 | * the bit before releasing a page lock and thus writeback cannot |
1546 | * ever write the buffer. | 1582 | * ever write the buffer. |
1547 | */ | 1583 | */ |
@@ -1863,7 +1899,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1863 | /* | 1899 | /* |
1864 | * Reserve a single block located at lblock | 1900 | * Reserve a single block located at lblock |
1865 | */ | 1901 | */ |
1866 | static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | 1902 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1867 | { | 1903 | { |
1868 | int retries = 0; | 1904 | int retries = 0; |
1869 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1905 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
@@ -1894,7 +1930,7 @@ repeat: | |||
1894 | * We do still charge estimated metadata to the sb though; | 1930 | * We do still charge estimated metadata to the sb though; |
1895 | * we cannot afford to run out of free blocks. | 1931 | * we cannot afford to run out of free blocks. |
1896 | */ | 1932 | */ |
1897 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1933 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { |
1898 | dquot_release_reservation_block(inode, 1); | 1934 | dquot_release_reservation_block(inode, 1); |
1899 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1935 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1900 | yield(); | 1936 | yield(); |
@@ -1995,16 +2031,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1995 | * | 2031 | * |
1996 | * As pages are already locked by write_cache_pages(), we can't use it | 2032 | * As pages are already locked by write_cache_pages(), we can't use it |
1997 | */ | 2033 | */ |
1998 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 2034 | static int mpage_da_submit_io(struct mpage_da_data *mpd, |
2035 | struct ext4_map_blocks *map) | ||
1999 | { | 2036 | { |
2000 | long pages_skipped; | ||
2001 | struct pagevec pvec; | 2037 | struct pagevec pvec; |
2002 | unsigned long index, end; | 2038 | unsigned long index, end; |
2003 | int ret = 0, err, nr_pages, i; | 2039 | int ret = 0, err, nr_pages, i; |
2004 | struct inode *inode = mpd->inode; | 2040 | struct inode *inode = mpd->inode; |
2005 | struct address_space *mapping = inode->i_mapping; | 2041 | struct address_space *mapping = inode->i_mapping; |
2042 | loff_t size = i_size_read(inode); | ||
2043 | unsigned int len, block_start; | ||
2044 | struct buffer_head *bh, *page_bufs = NULL; | ||
2045 | int journal_data = ext4_should_journal_data(inode); | ||
2046 | sector_t pblock = 0, cur_logical = 0; | ||
2047 | struct ext4_io_submit io_submit; | ||
2006 | 2048 | ||
2007 | BUG_ON(mpd->next_page <= mpd->first_page); | 2049 | BUG_ON(mpd->next_page <= mpd->first_page); |
2050 | memset(&io_submit, 0, sizeof(io_submit)); | ||
2008 | /* | 2051 | /* |
2009 | * We need to start from the first_page to the next_page - 1 | 2052 | * We need to start from the first_page to the next_page - 1 |
2010 | * to make sure we also write the mapped dirty buffer_heads. | 2053 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -2020,124 +2063,111 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2020 | if (nr_pages == 0) | 2063 | if (nr_pages == 0) |
2021 | break; | 2064 | break; |
2022 | for (i = 0; i < nr_pages; i++) { | 2065 | for (i = 0; i < nr_pages; i++) { |
2066 | int commit_write = 0, skip_page = 0; | ||
2023 | struct page *page = pvec.pages[i]; | 2067 | struct page *page = pvec.pages[i]; |
2024 | 2068 | ||
2025 | index = page->index; | 2069 | index = page->index; |
2026 | if (index > end) | 2070 | if (index > end) |
2027 | break; | 2071 | break; |
2072 | |||
2073 | if (index == size >> PAGE_CACHE_SHIFT) | ||
2074 | len = size & ~PAGE_CACHE_MASK; | ||
2075 | else | ||
2076 | len = PAGE_CACHE_SIZE; | ||
2077 | if (map) { | ||
2078 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
2079 | inode->i_blkbits); | ||
2080 | pblock = map->m_pblk + (cur_logical - | ||
2081 | map->m_lblk); | ||
2082 | } | ||
2028 | index++; | 2083 | index++; |
2029 | 2084 | ||
2030 | BUG_ON(!PageLocked(page)); | 2085 | BUG_ON(!PageLocked(page)); |
2031 | BUG_ON(PageWriteback(page)); | 2086 | BUG_ON(PageWriteback(page)); |
2032 | 2087 | ||
2033 | pages_skipped = mpd->wbc->pages_skipped; | ||
2034 | err = mapping->a_ops->writepage(page, mpd->wbc); | ||
2035 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) | ||
2036 | /* | ||
2037 | * have successfully written the page | ||
2038 | * without skipping the same | ||
2039 | */ | ||
2040 | mpd->pages_written++; | ||
2041 | /* | 2088 | /* |
2042 | * In error case, we have to continue because | 2089 | * If the page does not have buffers (for |
2043 | * remaining pages are still locked | 2090 | * whatever reason), try to create them using |
2044 | * XXX: unlock and re-dirty them? | 2091 | * __block_write_begin. If this fails, |
2092 | * skip the page and move on. | ||
2045 | */ | 2093 | */ |
2046 | if (ret == 0) | 2094 | if (!page_has_buffers(page)) { |
2047 | ret = err; | 2095 | if (__block_write_begin(page, 0, len, |
2048 | } | 2096 | noalloc_get_block_write)) { |
2049 | pagevec_release(&pvec); | 2097 | skip_page: |
2050 | } | 2098 | unlock_page(page); |
2051 | return ret; | 2099 | continue; |
2052 | } | 2100 | } |
2053 | 2101 | commit_write = 1; | |
2054 | /* | 2102 | } |
2055 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | ||
2056 | * | ||
2057 | * the function goes through all passed space and put actual disk | ||
2058 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | ||
2059 | */ | ||
2060 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, | ||
2061 | struct ext4_map_blocks *map) | ||
2062 | { | ||
2063 | struct inode *inode = mpd->inode; | ||
2064 | struct address_space *mapping = inode->i_mapping; | ||
2065 | int blocks = map->m_len; | ||
2066 | sector_t pblock = map->m_pblk, cur_logical; | ||
2067 | struct buffer_head *head, *bh; | ||
2068 | pgoff_t index, end; | ||
2069 | struct pagevec pvec; | ||
2070 | int nr_pages, i; | ||
2071 | |||
2072 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2073 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2074 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2075 | |||
2076 | pagevec_init(&pvec, 0); | ||
2077 | |||
2078 | while (index <= end) { | ||
2079 | /* XXX: optimize tail */ | ||
2080 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
2081 | if (nr_pages == 0) | ||
2082 | break; | ||
2083 | for (i = 0; i < nr_pages; i++) { | ||
2084 | struct page *page = pvec.pages[i]; | ||
2085 | |||
2086 | index = page->index; | ||
2087 | if (index > end) | ||
2088 | break; | ||
2089 | index++; | ||
2090 | |||
2091 | BUG_ON(!PageLocked(page)); | ||
2092 | BUG_ON(PageWriteback(page)); | ||
2093 | BUG_ON(!page_has_buffers(page)); | ||
2094 | |||
2095 | bh = page_buffers(page); | ||
2096 | head = bh; | ||
2097 | |||
2098 | /* skip blocks out of the range */ | ||
2099 | do { | ||
2100 | if (cur_logical >= map->m_lblk) | ||
2101 | break; | ||
2102 | cur_logical++; | ||
2103 | } while ((bh = bh->b_this_page) != head); | ||
2104 | 2103 | ||
2104 | bh = page_bufs = page_buffers(page); | ||
2105 | block_start = 0; | ||
2105 | do { | 2106 | do { |
2106 | if (cur_logical >= map->m_lblk + blocks) | 2107 | if (!bh) |
2107 | break; | 2108 | goto skip_page; |
2108 | 2109 | if (map && (cur_logical >= map->m_lblk) && | |
2109 | if (buffer_delay(bh) || buffer_unwritten(bh)) { | 2110 | (cur_logical <= (map->m_lblk + |
2110 | 2111 | (map->m_len - 1)))) { | |
2111 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | ||
2112 | |||
2113 | if (buffer_delay(bh)) { | 2112 | if (buffer_delay(bh)) { |
2114 | clear_buffer_delay(bh); | 2113 | clear_buffer_delay(bh); |
2115 | bh->b_blocknr = pblock; | 2114 | bh->b_blocknr = pblock; |
2116 | } else { | ||
2117 | /* | ||
2118 | * unwritten already should have | ||
2119 | * blocknr assigned. Verify that | ||
2120 | */ | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | BUG_ON(bh->b_blocknr != pblock); | ||
2123 | } | 2115 | } |
2116 | if (buffer_unwritten(bh) || | ||
2117 | buffer_mapped(bh)) | ||
2118 | BUG_ON(bh->b_blocknr != pblock); | ||
2119 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
2120 | set_buffer_uninit(bh); | ||
2121 | clear_buffer_unwritten(bh); | ||
2122 | } | ||
2124 | 2123 | ||
2125 | } else if (buffer_mapped(bh)) | 2124 | /* skip page if block allocation undone */ |
2126 | BUG_ON(bh->b_blocknr != pblock); | 2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2127 | 2126 | skip_page = 1; | |
2128 | if (map->m_flags & EXT4_MAP_UNINIT) | 2127 | bh = bh->b_this_page; |
2129 | set_buffer_uninit(bh); | 2128 | block_start += bh->b_size; |
2130 | cur_logical++; | 2129 | cur_logical++; |
2131 | pblock++; | 2130 | pblock++; |
2132 | } while ((bh = bh->b_this_page) != head); | 2131 | } while (bh != page_bufs); |
2132 | |||
2133 | if (skip_page) | ||
2134 | goto skip_page; | ||
2135 | |||
2136 | if (commit_write) | ||
2137 | /* mark the buffer_heads as dirty & uptodate */ | ||
2138 | block_commit_write(page, 0, len); | ||
2139 | |||
2140 | clear_page_dirty_for_io(page); | ||
2141 | /* | ||
2142 | * Delalloc doesn't support data journalling, | ||
2143 | * but eventually maybe we'll lift this | ||
2144 | * restriction. | ||
2145 | */ | ||
2146 | if (unlikely(journal_data && PageChecked(page))) | ||
2147 | err = __ext4_journalled_writepage(page, len); | ||
2148 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) | ||
2149 | err = ext4_bio_write_page(&io_submit, page, | ||
2150 | len, mpd->wbc); | ||
2151 | else | ||
2152 | err = block_write_full_page(page, | ||
2153 | noalloc_get_block_write, mpd->wbc); | ||
2154 | |||
2155 | if (!err) | ||
2156 | mpd->pages_written++; | ||
2157 | /* | ||
2158 | * In error case, we have to continue because | ||
2159 | * remaining pages are still locked | ||
2160 | */ | ||
2161 | if (ret == 0) | ||
2162 | ret = err; | ||
2133 | } | 2163 | } |
2134 | pagevec_release(&pvec); | 2164 | pagevec_release(&pvec); |
2135 | } | 2165 | } |
2166 | ext4_io_submit(&io_submit); | ||
2167 | return ret; | ||
2136 | } | 2168 | } |
2137 | 2169 | ||
2138 | 2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |
2139 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | ||
2140 | sector_t logical, long blk_cnt) | ||
2141 | { | 2171 | { |
2142 | int nr_pages, i; | 2172 | int nr_pages, i; |
2143 | pgoff_t index, end; | 2173 | pgoff_t index, end; |
@@ -2145,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2145 | struct inode *inode = mpd->inode; | 2175 | struct inode *inode = mpd->inode; |
2146 | struct address_space *mapping = inode->i_mapping; | 2176 | struct address_space *mapping = inode->i_mapping; |
2147 | 2177 | ||
2148 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2178 | index = mpd->first_page; |
2149 | end = (logical + blk_cnt - 1) >> | 2179 | end = mpd->next_page - 1; |
2150 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2151 | while (index <= end) { | 2180 | while (index <= end) { |
2152 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2153 | if (nr_pages == 0) | 2182 | if (nr_pages == 0) |
@@ -2187,35 +2216,32 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2187 | } | 2216 | } |
2188 | 2217 | ||
2189 | /* | 2218 | /* |
2190 | * mpage_da_map_blocks - go through given space | 2219 | * mpage_da_map_and_submit - go through given space, map them |
2220 | * if necessary, and then submit them for I/O | ||
2191 | * | 2221 | * |
2192 | * @mpd - bh describing space | 2222 | * @mpd - bh describing space |
2193 | * | 2223 | * |
2194 | * The function skips space we know is already mapped to disk blocks. | 2224 | * The function skips space we know is already mapped to disk blocks. |
2195 | * | 2225 | * |
2196 | */ | 2226 | */ |
2197 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2227 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) |
2198 | { | 2228 | { |
2199 | int err, blks, get_blocks_flags; | 2229 | int err, blks, get_blocks_flags; |
2200 | struct ext4_map_blocks map; | 2230 | struct ext4_map_blocks map, *mapp = NULL; |
2201 | sector_t next = mpd->b_blocknr; | 2231 | sector_t next = mpd->b_blocknr; |
2202 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2232 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2203 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2233 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
2204 | handle_t *handle = NULL; | 2234 | handle_t *handle = NULL; |
2205 | 2235 | ||
2206 | /* | 2236 | /* |
2207 | * We consider only non-mapped and non-allocated blocks | 2237 | * If the blocks are mapped already, or we couldn't accumulate |
2238 | * any blocks, then proceed immediately to the submission stage. | ||
2208 | */ | 2239 | */ |
2209 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2240 | if ((mpd->b_size == 0) || |
2210 | !(mpd->b_state & (1 << BH_Delay)) && | 2241 | ((mpd->b_state & (1 << BH_Mapped)) && |
2211 | !(mpd->b_state & (1 << BH_Unwritten))) | 2242 | !(mpd->b_state & (1 << BH_Delay)) && |
2212 | return 0; | 2243 | !(mpd->b_state & (1 << BH_Unwritten)))) |
2213 | 2244 | goto submit_io; | |
2214 | /* | ||
2215 | * If we didn't accumulate anything to write simply return | ||
2216 | */ | ||
2217 | if (!mpd->b_size) | ||
2218 | return 0; | ||
2219 | 2245 | ||
2220 | handle = ext4_journal_current_handle(); | 2246 | handle = ext4_journal_current_handle(); |
2221 | BUG_ON(!handle); | 2247 | BUG_ON(!handle); |
@@ -2231,7 +2257,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2231 | * affects functions in many different parts of the allocation | 2257 | * affects functions in many different parts of the allocation |
2232 | * call path. This flag exists primarily because we don't | 2258 | * call path. This flag exists primarily because we don't |
2233 | * want to change *many* call functions, so ext4_map_blocks() | 2259 | * want to change *many* call functions, so ext4_map_blocks() |
2234 | * will set the magic i_delalloc_reserved_flag once the | 2260 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the |
2235 | * inode's allocation semaphore is taken. | 2261 | * inode's allocation semaphore is taken. |
2236 | * | 2262 | * |
2237 | * If the blocks in questions were delalloc blocks, set | 2263 | * If the blocks in questions were delalloc blocks, set |
@@ -2252,17 +2278,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2252 | 2278 | ||
2253 | err = blks; | 2279 | err = blks; |
2254 | /* | 2280 | /* |
2255 | * If get block returns with error we simply | 2281 | * If get block returns EAGAIN or ENOSPC and there |
2256 | * return. Later writepage will redirty the page and | 2282 | * appears to be free blocks we will just let |
2257 | * writepages will find the dirty page again | 2283 | * mpage_da_submit_io() unlock all of the pages. |
2258 | */ | 2284 | */ |
2259 | if (err == -EAGAIN) | 2285 | if (err == -EAGAIN) |
2260 | return 0; | 2286 | goto submit_io; |
2261 | 2287 | ||
2262 | if (err == -ENOSPC && | 2288 | if (err == -ENOSPC && |
2263 | ext4_count_free_blocks(sb)) { | 2289 | ext4_count_free_blocks(sb)) { |
2264 | mpd->retval = err; | 2290 | mpd->retval = err; |
2265 | return 0; | 2291 | goto submit_io; |
2266 | } | 2292 | } |
2267 | 2293 | ||
2268 | /* | 2294 | /* |
@@ -2285,12 +2311,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2285 | ext4_print_free_blocks(mpd->inode); | 2311 | ext4_print_free_blocks(mpd->inode); |
2286 | } | 2312 | } |
2287 | /* invalidate all the pages */ | 2313 | /* invalidate all the pages */ |
2288 | ext4_da_block_invalidatepages(mpd, next, | 2314 | ext4_da_block_invalidatepages(mpd); |
2289 | mpd->b_size >> mpd->inode->i_blkbits); | 2315 | |
2290 | return err; | 2316 | /* Mark this page range as having been completed */ |
2317 | mpd->io_done = 1; | ||
2318 | return; | ||
2291 | } | 2319 | } |
2292 | BUG_ON(blks == 0); | 2320 | BUG_ON(blks == 0); |
2293 | 2321 | ||
2322 | mapp = ↦ | ||
2294 | if (map.m_flags & EXT4_MAP_NEW) { | 2323 | if (map.m_flags & EXT4_MAP_NEW) { |
2295 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | 2324 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; |
2296 | int i; | 2325 | int i; |
@@ -2299,18 +2328,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2299 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 2328 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2300 | } | 2329 | } |
2301 | 2330 | ||
2302 | /* | ||
2303 | * If blocks are delayed marked, we need to | ||
2304 | * put actual blocknr and drop delayed bit | ||
2305 | */ | ||
2306 | if ((mpd->b_state & (1 << BH_Delay)) || | ||
2307 | (mpd->b_state & (1 << BH_Unwritten))) | ||
2308 | mpage_put_bnr_to_bhs(mpd, &map); | ||
2309 | |||
2310 | if (ext4_should_order_data(mpd->inode)) { | 2331 | if (ext4_should_order_data(mpd->inode)) { |
2311 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2332 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
2312 | if (err) | 2333 | if (err) |
2313 | return err; | 2334 | /* This only happens if the journal is aborted */ |
2335 | return; | ||
2314 | } | 2336 | } |
2315 | 2337 | ||
2316 | /* | 2338 | /* |
@@ -2321,10 +2343,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2321 | disksize = i_size_read(mpd->inode); | 2343 | disksize = i_size_read(mpd->inode); |
2322 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | 2344 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { |
2323 | ext4_update_i_disksize(mpd->inode, disksize); | 2345 | ext4_update_i_disksize(mpd->inode, disksize); |
2324 | return ext4_mark_inode_dirty(handle, mpd->inode); | 2346 | err = ext4_mark_inode_dirty(handle, mpd->inode); |
2347 | if (err) | ||
2348 | ext4_error(mpd->inode->i_sb, | ||
2349 | "Failed to mark inode %lu dirty", | ||
2350 | mpd->inode->i_ino); | ||
2325 | } | 2351 | } |
2326 | 2352 | ||
2327 | return 0; | 2353 | submit_io: |
2354 | mpage_da_submit_io(mpd, mapp); | ||
2355 | mpd->io_done = 1; | ||
2328 | } | 2356 | } |
2329 | 2357 | ||
2330 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 2358 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
@@ -2401,9 +2429,7 @@ flush_it: | |||
2401 | * We couldn't merge the block to our extent, so we | 2429 | * We couldn't merge the block to our extent, so we |
2402 | * need to flush current extent and start new one | 2430 | * need to flush current extent and start new one |
2403 | */ | 2431 | */ |
2404 | if (mpage_da_map_blocks(mpd) == 0) | 2432 | mpage_da_map_and_submit(mpd); |
2405 | mpage_da_submit_io(mpd); | ||
2406 | mpd->io_done = 1; | ||
2407 | return; | 2433 | return; |
2408 | } | 2434 | } |
2409 | 2435 | ||
@@ -2413,104 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2413 | } | 2439 | } |
2414 | 2440 | ||
2415 | /* | 2441 | /* |
2416 | * __mpage_da_writepage - finds extent of pages and blocks | ||
2417 | * | ||
2418 | * @page: page to consider | ||
2419 | * @wbc: not used, we just follow rules | ||
2420 | * @data: context | ||
2421 | * | ||
2422 | * The function finds extents of pages and scan them for all blocks. | ||
2423 | */ | ||
2424 | static int __mpage_da_writepage(struct page *page, | ||
2425 | struct writeback_control *wbc, void *data) | ||
2426 | { | ||
2427 | struct mpage_da_data *mpd = data; | ||
2428 | struct inode *inode = mpd->inode; | ||
2429 | struct buffer_head *bh, *head; | ||
2430 | sector_t logical; | ||
2431 | |||
2432 | /* | ||
2433 | * Can we merge this page to current extent? | ||
2434 | */ | ||
2435 | if (mpd->next_page != page->index) { | ||
2436 | /* | ||
2437 | * Nope, we can't. So, we map non-allocated blocks | ||
2438 | * and start IO on them using writepage() | ||
2439 | */ | ||
2440 | if (mpd->next_page != mpd->first_page) { | ||
2441 | if (mpage_da_map_blocks(mpd) == 0) | ||
2442 | mpage_da_submit_io(mpd); | ||
2443 | /* | ||
2444 | * skip rest of the page in the page_vec | ||
2445 | */ | ||
2446 | mpd->io_done = 1; | ||
2447 | redirty_page_for_writepage(wbc, page); | ||
2448 | unlock_page(page); | ||
2449 | return MPAGE_DA_EXTENT_TAIL; | ||
2450 | } | ||
2451 | |||
2452 | /* | ||
2453 | * Start next extent of pages ... | ||
2454 | */ | ||
2455 | mpd->first_page = page->index; | ||
2456 | |||
2457 | /* | ||
2458 | * ... and blocks | ||
2459 | */ | ||
2460 | mpd->b_size = 0; | ||
2461 | mpd->b_state = 0; | ||
2462 | mpd->b_blocknr = 0; | ||
2463 | } | ||
2464 | |||
2465 | mpd->next_page = page->index + 1; | ||
2466 | logical = (sector_t) page->index << | ||
2467 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2468 | |||
2469 | if (!page_has_buffers(page)) { | ||
2470 | mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, | ||
2471 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2472 | if (mpd->io_done) | ||
2473 | return MPAGE_DA_EXTENT_TAIL; | ||
2474 | } else { | ||
2475 | /* | ||
2476 | * Page with regular buffer heads, just add all dirty ones | ||
2477 | */ | ||
2478 | head = page_buffers(page); | ||
2479 | bh = head; | ||
2480 | do { | ||
2481 | BUG_ON(buffer_locked(bh)); | ||
2482 | /* | ||
2483 | * We need to try to allocate | ||
2484 | * unmapped blocks in the same page. | ||
2485 | * Otherwise we won't make progress | ||
2486 | * with the page in ext4_writepage | ||
2487 | */ | ||
2488 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2489 | mpage_add_bh_to_extent(mpd, logical, | ||
2490 | bh->b_size, | ||
2491 | bh->b_state); | ||
2492 | if (mpd->io_done) | ||
2493 | return MPAGE_DA_EXTENT_TAIL; | ||
2494 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2495 | /* | ||
2496 | * mapped dirty buffer. We need to update | ||
2497 | * the b_state because we look at | ||
2498 | * b_state in mpage_da_map_blocks. We don't | ||
2499 | * update b_size because if we find an | ||
2500 | * unmapped buffer_head later we need to | ||
2501 | * use the b_state flag of that buffer_head. | ||
2502 | */ | ||
2503 | if (mpd->b_size == 0) | ||
2504 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2505 | } | ||
2506 | logical++; | ||
2507 | } while ((bh = bh->b_this_page) != head); | ||
2508 | } | ||
2509 | |||
2510 | return 0; | ||
2511 | } | ||
2512 | |||
2513 | /* | ||
2514 | * This is a special get_blocks_t callback which is used by | 2442 | * This is a special get_blocks_t callback which is used by |
2515 | * ext4_da_write_begin(). It will either return mapped block or | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2516 | * reserve space for a single block. | 2444 | * reserve space for a single block. |
@@ -2550,8 +2478,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2550 | if (buffer_delay(bh)) | 2478 | if (buffer_delay(bh)) |
2551 | return 0; /* Not sure this could or should happen */ | 2479 | return 0; /* Not sure this could or should happen */ |
2552 | /* | 2480 | /* |
2553 | * XXX: __block_prepare_write() unmaps passed block, | 2481 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
2554 | * is it OK? | ||
2555 | */ | 2482 | */ |
2556 | ret = ext4_da_reserve_space(inode, iblock); | 2483 | ret = ext4_da_reserve_space(inode, iblock); |
2557 | if (ret) | 2484 | if (ret) |
@@ -2583,7 +2510,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2583 | /* | 2510 | /* |
2584 | * This function is used as a standard get_block_t calback function | 2511 | * This function is used as a standard get_block_t calback function |
2585 | * when there is no desire to allocate any blocks. It is used as a | 2512 | * when there is no desire to allocate any blocks. It is used as a |
2586 | * callback function for block_prepare_write() and block_write_full_page(). | 2513 | * callback function for block_write_begin() and block_write_full_page(). |
2587 | * These functions should only try to map a single block at a time. | 2514 | * These functions should only try to map a single block at a time. |
2588 | * | 2515 | * |
2589 | * Since this function doesn't do block allocations even if the caller | 2516 | * Since this function doesn't do block allocations even if the caller |
@@ -2623,6 +2550,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2623 | int ret = 0; | 2550 | int ret = 0; |
2624 | int err; | 2551 | int err; |
2625 | 2552 | ||
2553 | ClearPageChecked(page); | ||
2626 | page_bufs = page_buffers(page); | 2554 | page_bufs = page_buffers(page); |
2627 | BUG_ON(!page_bufs); | 2555 | BUG_ON(!page_bufs); |
2628 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 2556 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
@@ -2661,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2661 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2589 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
2662 | * need to file the inode to the transaction's list in ordered mode because if | 2590 | * need to file the inode to the transaction's list in ordered mode because if |
2663 | * we are writing back data added by write(), the inode is already there and if | 2591 | * we are writing back data added by write(), the inode is already there and if |
2664 | * we are writing back data modified via mmap(), noone guarantees in which | 2592 | * we are writing back data modified via mmap(), no one guarantees in which |
2665 | * transaction the data will hit the disk. In case we are journaling data, we | 2593 | * transaction the data will hit the disk. In case we are journaling data, we |
2666 | * cannot start transaction directly because transaction start ranks above page | 2594 | * cannot start transaction directly because transaction start ranks above page |
2667 | * lock so we have to do some magic. | 2595 | * lock so we have to do some magic. |
@@ -2700,84 +2628,57 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2700 | static int ext4_writepage(struct page *page, | 2628 | static int ext4_writepage(struct page *page, |
2701 | struct writeback_control *wbc) | 2629 | struct writeback_control *wbc) |
2702 | { | 2630 | { |
2703 | int ret = 0; | 2631 | int ret = 0, commit_write = 0; |
2704 | loff_t size; | 2632 | loff_t size; |
2705 | unsigned int len; | 2633 | unsigned int len; |
2706 | struct buffer_head *page_bufs = NULL; | 2634 | struct buffer_head *page_bufs = NULL; |
2707 | struct inode *inode = page->mapping->host; | 2635 | struct inode *inode = page->mapping->host; |
2708 | 2636 | ||
2709 | trace_ext4_writepage(inode, page); | 2637 | trace_ext4_writepage(page); |
2710 | size = i_size_read(inode); | 2638 | size = i_size_read(inode); |
2711 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2639 | if (page->index == size >> PAGE_CACHE_SHIFT) |
2712 | len = size & ~PAGE_CACHE_MASK; | 2640 | len = size & ~PAGE_CACHE_MASK; |
2713 | else | 2641 | else |
2714 | len = PAGE_CACHE_SIZE; | 2642 | len = PAGE_CACHE_SIZE; |
2715 | 2643 | ||
2716 | if (page_has_buffers(page)) { | 2644 | /* |
2717 | page_bufs = page_buffers(page); | 2645 | * If the page does not have buffers (for whatever reason), |
2718 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2646 | * try to create them using __block_write_begin. If this |
2719 | ext4_bh_delay_or_unwritten)) { | 2647 | * fails, redirty the page and move on. |
2720 | /* | 2648 | */ |
2721 | * We don't want to do block allocation | 2649 | if (!page_has_buffers(page)) { |
2722 | * So redirty the page and return | 2650 | if (__block_write_begin(page, 0, len, |
2723 | * We may reach here when we do a journal commit | 2651 | noalloc_get_block_write)) { |
2724 | * via journal_submit_inode_data_buffers. | 2652 | redirty_page: |
2725 | * If we don't have mapping block we just ignore | ||
2726 | * them. We can also reach here via shrink_page_list | ||
2727 | */ | ||
2728 | redirty_page_for_writepage(wbc, page); | 2653 | redirty_page_for_writepage(wbc, page); |
2729 | unlock_page(page); | 2654 | unlock_page(page); |
2730 | return 0; | 2655 | return 0; |
2731 | } | 2656 | } |
2732 | } else { | 2657 | commit_write = 1; |
2658 | } | ||
2659 | page_bufs = page_buffers(page); | ||
2660 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2661 | ext4_bh_delay_or_unwritten)) { | ||
2733 | /* | 2662 | /* |
2734 | * The test for page_has_buffers() is subtle: | 2663 | * We don't want to do block allocation, so redirty |
2735 | * We know the page is dirty but it lost buffers. That means | 2664 | * the page and return. We may reach here when we do |
2736 | * that at some moment in time after write_begin()/write_end() | 2665 | * a journal commit via journal_submit_inode_data_buffers. |
2737 | * has been called all buffers have been clean and thus they | 2666 | * We can also reach here via shrink_page_list |
2738 | * must have been written at least once. So they are all | ||
2739 | * mapped and we can happily proceed with mapping them | ||
2740 | * and writing the page. | ||
2741 | * | ||
2742 | * Try to initialize the buffer_heads and check whether | ||
2743 | * all are mapped and non delay. We don't want to | ||
2744 | * do block allocation here. | ||
2745 | */ | 2667 | */ |
2746 | ret = block_prepare_write(page, 0, len, | 2668 | goto redirty_page; |
2747 | noalloc_get_block_write); | 2669 | } |
2748 | if (!ret) { | 2670 | if (commit_write) |
2749 | page_bufs = page_buffers(page); | ||
2750 | /* check whether all are mapped and non delay */ | ||
2751 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | ||
2752 | ext4_bh_delay_or_unwritten)) { | ||
2753 | redirty_page_for_writepage(wbc, page); | ||
2754 | unlock_page(page); | ||
2755 | return 0; | ||
2756 | } | ||
2757 | } else { | ||
2758 | /* | ||
2759 | * We can't do block allocation here | ||
2760 | * so just redity the page and unlock | ||
2761 | * and return | ||
2762 | */ | ||
2763 | redirty_page_for_writepage(wbc, page); | ||
2764 | unlock_page(page); | ||
2765 | return 0; | ||
2766 | } | ||
2767 | /* now mark the buffer_heads as dirty and uptodate */ | 2671 | /* now mark the buffer_heads as dirty and uptodate */ |
2768 | block_commit_write(page, 0, len); | 2672 | block_commit_write(page, 0, len); |
2769 | } | ||
2770 | 2673 | ||
2771 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | 2674 | if (PageChecked(page) && ext4_should_journal_data(inode)) |
2772 | /* | 2675 | /* |
2773 | * It's mmapped pagecache. Add buffers and journal it. There | 2676 | * It's mmapped pagecache. Add buffers and journal it. There |
2774 | * doesn't seem much point in redirtying the page here. | 2677 | * doesn't seem much point in redirtying the page here. |
2775 | */ | 2678 | */ |
2776 | ClearPageChecked(page); | ||
2777 | return __ext4_journalled_writepage(page, len); | 2679 | return __ext4_journalled_writepage(page, len); |
2778 | } | ||
2779 | 2680 | ||
2780 | if (page_bufs && buffer_uninit(page_bufs)) { | 2681 | if (buffer_uninit(page_bufs)) { |
2781 | ext4_set_bh_endio(page_bufs, inode); | 2682 | ext4_set_bh_endio(page_bufs, inode); |
2782 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2683 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2783 | wbc, ext4_end_io_buffer_write); | 2684 | wbc, ext4_end_io_buffer_write); |
@@ -2790,7 +2691,7 @@ static int ext4_writepage(struct page *page, | |||
2790 | 2691 | ||
2791 | /* | 2692 | /* |
2792 | * This is called via ext4_da_writepages() to | 2693 | * This is called via ext4_da_writepages() to |
2793 | * calulate the total number of credits to reserve to fit | 2694 | * calculate the total number of credits to reserve to fit |
2794 | * a single extent allocation into a single transaction, | 2695 | * a single extent allocation into a single transaction, |
2795 | * ext4_da_writpeages() will loop calling this before | 2696 | * ext4_da_writpeages() will loop calling this before |
2796 | * the block allocation. | 2697 | * the block allocation. |
@@ -2815,37 +2716,42 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2815 | 2716 | ||
2816 | /* | 2717 | /* |
2817 | * write_cache_pages_da - walk the list of dirty pages of the given | 2718 | * write_cache_pages_da - walk the list of dirty pages of the given |
2818 | * address space and call the callback function (which usually writes | 2719 | * address space and accumulate pages that need writing, and call |
2819 | * the pages). | 2720 | * mpage_da_map_and_submit to map a single contiguous memory region |
2820 | * | 2721 | * and then write them. |
2821 | * This is a forked version of write_cache_pages(). Differences: | ||
2822 | * Range cyclic is ignored. | ||
2823 | * no_nrwrite_index_update is always presumed true | ||
2824 | */ | 2722 | */ |
2825 | static int write_cache_pages_da(struct address_space *mapping, | 2723 | static int write_cache_pages_da(struct address_space *mapping, |
2826 | struct writeback_control *wbc, | 2724 | struct writeback_control *wbc, |
2827 | struct mpage_da_data *mpd) | 2725 | struct mpage_da_data *mpd, |
2726 | pgoff_t *done_index) | ||
2828 | { | 2727 | { |
2829 | int ret = 0; | 2728 | struct buffer_head *bh, *head; |
2830 | int done = 0; | 2729 | struct inode *inode = mapping->host; |
2831 | struct pagevec pvec; | 2730 | struct pagevec pvec; |
2832 | int nr_pages; | 2731 | unsigned int nr_pages; |
2833 | pgoff_t index; | 2732 | sector_t logical; |
2834 | pgoff_t end; /* Inclusive */ | 2733 | pgoff_t index, end; |
2835 | long nr_to_write = wbc->nr_to_write; | 2734 | long nr_to_write = wbc->nr_to_write; |
2836 | 2735 | int i, tag, ret = 0; | |
2736 | |||
2737 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2738 | mpd->wbc = wbc; | ||
2739 | mpd->inode = inode; | ||
2837 | pagevec_init(&pvec, 0); | 2740 | pagevec_init(&pvec, 0); |
2838 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2741 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2839 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2742 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2840 | 2743 | ||
2841 | while (!done && (index <= end)) { | 2744 | if (wbc->sync_mode == WB_SYNC_ALL) |
2842 | int i; | 2745 | tag = PAGECACHE_TAG_TOWRITE; |
2746 | else | ||
2747 | tag = PAGECACHE_TAG_DIRTY; | ||
2843 | 2748 | ||
2844 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2749 | *done_index = index; |
2845 | PAGECACHE_TAG_DIRTY, | 2750 | while (index <= end) { |
2751 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | ||
2846 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2752 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2847 | if (nr_pages == 0) | 2753 | if (nr_pages == 0) |
2848 | break; | 2754 | return 0; |
2849 | 2755 | ||
2850 | for (i = 0; i < nr_pages; i++) { | 2756 | for (i = 0; i < nr_pages; i++) { |
2851 | struct page *page = pvec.pages[i]; | 2757 | struct page *page = pvec.pages[i]; |
@@ -2857,58 +2763,98 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2857 | * mapping. However, page->index will not change | 2763 | * mapping. However, page->index will not change |
2858 | * because we have a reference on the page. | 2764 | * because we have a reference on the page. |
2859 | */ | 2765 | */ |
2860 | if (page->index > end) { | 2766 | if (page->index > end) |
2861 | done = 1; | 2767 | goto out; |
2862 | break; | 2768 | |
2769 | *done_index = page->index + 1; | ||
2770 | |||
2771 | /* | ||
2772 | * If we can't merge this page, and we have | ||
2773 | * accumulated an contiguous region, write it | ||
2774 | */ | ||
2775 | if ((mpd->next_page != page->index) && | ||
2776 | (mpd->next_page != mpd->first_page)) { | ||
2777 | mpage_da_map_and_submit(mpd); | ||
2778 | goto ret_extent_tail; | ||
2863 | } | 2779 | } |
2864 | 2780 | ||
2865 | lock_page(page); | 2781 | lock_page(page); |
2866 | 2782 | ||
2867 | /* | 2783 | /* |
2868 | * Page truncated or invalidated. We can freely skip it | 2784 | * If the page is no longer dirty, or its |
2869 | * then, even for data integrity operations: the page | 2785 | * mapping no longer corresponds to inode we |
2870 | * has disappeared concurrently, so there could be no | 2786 | * are writing (which means it has been |
2871 | * real expectation of this data interity operation | 2787 | * truncated or invalidated), or the page is |
2872 | * even if there is now a new, dirty page at the same | 2788 | * already under writeback and we are not |
2873 | * pagecache address. | 2789 | * doing a data integrity writeback, skip the page |
2874 | */ | 2790 | */ |
2875 | if (unlikely(page->mapping != mapping)) { | 2791 | if (!PageDirty(page) || |
2876 | continue_unlock: | 2792 | (PageWriteback(page) && |
2793 | (wbc->sync_mode == WB_SYNC_NONE)) || | ||
2794 | unlikely(page->mapping != mapping)) { | ||
2877 | unlock_page(page); | 2795 | unlock_page(page); |
2878 | continue; | 2796 | continue; |
2879 | } | 2797 | } |
2880 | 2798 | ||
2881 | if (!PageDirty(page)) { | 2799 | wait_on_page_writeback(page); |
2882 | /* someone wrote it for us */ | ||
2883 | goto continue_unlock; | ||
2884 | } | ||
2885 | |||
2886 | if (PageWriteback(page)) { | ||
2887 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2888 | wait_on_page_writeback(page); | ||
2889 | else | ||
2890 | goto continue_unlock; | ||
2891 | } | ||
2892 | |||
2893 | BUG_ON(PageWriteback(page)); | 2800 | BUG_ON(PageWriteback(page)); |
2894 | if (!clear_page_dirty_for_io(page)) | ||
2895 | goto continue_unlock; | ||
2896 | 2801 | ||
2897 | ret = __mpage_da_writepage(page, wbc, mpd); | 2802 | if (mpd->next_page != page->index) |
2898 | if (unlikely(ret)) { | 2803 | mpd->first_page = page->index; |
2899 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | 2804 | mpd->next_page = page->index + 1; |
2900 | unlock_page(page); | 2805 | logical = (sector_t) page->index << |
2901 | ret = 0; | 2806 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2902 | } else { | 2807 | |
2903 | done = 1; | 2808 | if (!page_has_buffers(page)) { |
2904 | break; | 2809 | mpage_add_bh_to_extent(mpd, logical, |
2905 | } | 2810 | PAGE_CACHE_SIZE, |
2811 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2812 | if (mpd->io_done) | ||
2813 | goto ret_extent_tail; | ||
2814 | } else { | ||
2815 | /* | ||
2816 | * Page with regular buffer heads, | ||
2817 | * just add all dirty ones | ||
2818 | */ | ||
2819 | head = page_buffers(page); | ||
2820 | bh = head; | ||
2821 | do { | ||
2822 | BUG_ON(buffer_locked(bh)); | ||
2823 | /* | ||
2824 | * We need to try to allocate | ||
2825 | * unmapped blocks in the same page. | ||
2826 | * Otherwise we won't make progress | ||
2827 | * with the page in ext4_writepage | ||
2828 | */ | ||
2829 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2830 | mpage_add_bh_to_extent(mpd, logical, | ||
2831 | bh->b_size, | ||
2832 | bh->b_state); | ||
2833 | if (mpd->io_done) | ||
2834 | goto ret_extent_tail; | ||
2835 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2836 | /* | ||
2837 | * mapped dirty buffer. We need | ||
2838 | * to update the b_state | ||
2839 | * because we look at b_state | ||
2840 | * in mpage_da_map_blocks. We | ||
2841 | * don't update b_size because | ||
2842 | * if we find an unmapped | ||
2843 | * buffer_head later we need to | ||
2844 | * use the b_state flag of that | ||
2845 | * buffer_head. | ||
2846 | */ | ||
2847 | if (mpd->b_size == 0) | ||
2848 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2849 | } | ||
2850 | logical++; | ||
2851 | } while ((bh = bh->b_this_page) != head); | ||
2906 | } | 2852 | } |
2907 | 2853 | ||
2908 | if (nr_to_write > 0) { | 2854 | if (nr_to_write > 0) { |
2909 | nr_to_write--; | 2855 | nr_to_write--; |
2910 | if (nr_to_write == 0 && | 2856 | if (nr_to_write == 0 && |
2911 | wbc->sync_mode == WB_SYNC_NONE) { | 2857 | wbc->sync_mode == WB_SYNC_NONE) |
2912 | /* | 2858 | /* |
2913 | * We stop writing back only if we are | 2859 | * We stop writing back only if we are |
2914 | * not doing integrity sync. In case of | 2860 | * not doing integrity sync. In case of |
@@ -2919,14 +2865,18 @@ continue_unlock: | |||
2919 | * pages, but have not synced all of the | 2865 | * pages, but have not synced all of the |
2920 | * old dirty pages. | 2866 | * old dirty pages. |
2921 | */ | 2867 | */ |
2922 | done = 1; | 2868 | goto out; |
2923 | break; | ||
2924 | } | ||
2925 | } | 2869 | } |
2926 | } | 2870 | } |
2927 | pagevec_release(&pvec); | 2871 | pagevec_release(&pvec); |
2928 | cond_resched(); | 2872 | cond_resched(); |
2929 | } | 2873 | } |
2874 | return 0; | ||
2875 | ret_extent_tail: | ||
2876 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2877 | out: | ||
2878 | pagevec_release(&pvec); | ||
2879 | cond_resched(); | ||
2930 | return ret; | 2880 | return ret; |
2931 | } | 2881 | } |
2932 | 2882 | ||
@@ -2940,13 +2890,14 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2940 | struct mpage_da_data mpd; | 2890 | struct mpage_da_data mpd; |
2941 | struct inode *inode = mapping->host; | 2891 | struct inode *inode = mapping->host; |
2942 | int pages_written = 0; | 2892 | int pages_written = 0; |
2943 | long pages_skipped; | ||
2944 | unsigned int max_pages; | 2893 | unsigned int max_pages; |
2945 | int range_cyclic, cycled = 1, io_done = 0; | 2894 | int range_cyclic, cycled = 1, io_done = 0; |
2946 | int needed_blocks, ret = 0; | 2895 | int needed_blocks, ret = 0; |
2947 | long desired_nr_to_write, nr_to_writebump = 0; | 2896 | long desired_nr_to_write, nr_to_writebump = 0; |
2948 | loff_t range_start = wbc->range_start; | 2897 | loff_t range_start = wbc->range_start; |
2949 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2898 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2899 | pgoff_t done_index = 0; | ||
2900 | pgoff_t end; | ||
2950 | 2901 | ||
2951 | trace_ext4_da_writepages(inode, wbc); | 2902 | trace_ext4_da_writepages(inode, wbc); |
2952 | 2903 | ||
@@ -2982,8 +2933,11 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2982 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2933 | wbc->range_start = index << PAGE_CACHE_SHIFT; |
2983 | wbc->range_end = LLONG_MAX; | 2934 | wbc->range_end = LLONG_MAX; |
2984 | wbc->range_cyclic = 0; | 2935 | wbc->range_cyclic = 0; |
2985 | } else | 2936 | end = -1; |
2937 | } else { | ||
2986 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2938 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2939 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2940 | } | ||
2987 | 2941 | ||
2988 | /* | 2942 | /* |
2989 | * This works around two forms of stupidity. The first is in | 2943 | * This works around two forms of stupidity. The first is in |
@@ -3002,9 +2956,12 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3002 | * sbi->max_writeback_mb_bump whichever is smaller. | 2956 | * sbi->max_writeback_mb_bump whichever is smaller. |
3003 | */ | 2957 | */ |
3004 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | 2958 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); |
3005 | if (!range_cyclic && range_whole) | 2959 | if (!range_cyclic && range_whole) { |
3006 | desired_nr_to_write = wbc->nr_to_write * 8; | 2960 | if (wbc->nr_to_write == LONG_MAX) |
3007 | else | 2961 | desired_nr_to_write = wbc->nr_to_write; |
2962 | else | ||
2963 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2964 | } else | ||
3008 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | 2965 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, |
3009 | max_pages); | 2966 | max_pages); |
3010 | if (desired_nr_to_write > max_pages) | 2967 | if (desired_nr_to_write > max_pages) |
@@ -3015,12 +2972,10 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3015 | wbc->nr_to_write = desired_nr_to_write; | 2972 | wbc->nr_to_write = desired_nr_to_write; |
3016 | } | 2973 | } |
3017 | 2974 | ||
3018 | mpd.wbc = wbc; | ||
3019 | mpd.inode = mapping->host; | ||
3020 | |||
3021 | pages_skipped = wbc->pages_skipped; | ||
3022 | |||
3023 | retry: | 2975 | retry: |
2976 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2977 | tag_pages_for_writeback(mapping, index, end); | ||
2978 | |||
3024 | while (!ret && wbc->nr_to_write > 0) { | 2979 | while (!ret && wbc->nr_to_write > 0) { |
3025 | 2980 | ||
3026 | /* | 2981 | /* |
@@ -3043,32 +2998,18 @@ retry: | |||
3043 | } | 2998 | } |
3044 | 2999 | ||
3045 | /* | 3000 | /* |
3046 | * Now call __mpage_da_writepage to find the next | 3001 | * Now call write_cache_pages_da() to find the next |
3047 | * contiguous region of logical blocks that need | 3002 | * contiguous region of logical blocks that need |
3048 | * blocks to be allocated by ext4. We don't actually | 3003 | * blocks to be allocated by ext4 and submit them. |
3049 | * submit the blocks for I/O here, even though | ||
3050 | * write_cache_pages thinks it will, and will set the | ||
3051 | * pages as clean for write before calling | ||
3052 | * __mpage_da_writepage(). | ||
3053 | */ | 3004 | */ |
3054 | mpd.b_size = 0; | 3005 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3055 | mpd.b_state = 0; | ||
3056 | mpd.b_blocknr = 0; | ||
3057 | mpd.first_page = 0; | ||
3058 | mpd.next_page = 0; | ||
3059 | mpd.io_done = 0; | ||
3060 | mpd.pages_written = 0; | ||
3061 | mpd.retval = 0; | ||
3062 | ret = write_cache_pages_da(mapping, wbc, &mpd); | ||
3063 | /* | 3006 | /* |
3064 | * If we have a contiguous extent of pages and we | 3007 | * If we have a contiguous extent of pages and we |
3065 | * haven't done the I/O yet, map the blocks and submit | 3008 | * haven't done the I/O yet, map the blocks and submit |
3066 | * them for I/O. | 3009 | * them for I/O. |
3067 | */ | 3010 | */ |
3068 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 3011 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
3069 | if (mpage_da_map_blocks(&mpd) == 0) | 3012 | mpage_da_map_and_submit(&mpd); |
3070 | mpage_da_submit_io(&mpd); | ||
3071 | mpd.io_done = 1; | ||
3072 | ret = MPAGE_DA_EXTENT_TAIL; | 3013 | ret = MPAGE_DA_EXTENT_TAIL; |
3073 | } | 3014 | } |
3074 | trace_ext4_da_write_pages(inode, &mpd); | 3015 | trace_ext4_da_write_pages(inode, &mpd); |
@@ -3082,7 +3023,6 @@ retry: | |||
3082 | * and try again | 3023 | * and try again |
3083 | */ | 3024 | */ |
3084 | jbd2_journal_force_commit_nested(sbi->s_journal); | 3025 | jbd2_journal_force_commit_nested(sbi->s_journal); |
3085 | wbc->pages_skipped = pages_skipped; | ||
3086 | ret = 0; | 3026 | ret = 0; |
3087 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 3027 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
3088 | /* | 3028 | /* |
@@ -3090,7 +3030,6 @@ retry: | |||
3090 | * rest of the pages | 3030 | * rest of the pages |
3091 | */ | 3031 | */ |
3092 | pages_written += mpd.pages_written; | 3032 | pages_written += mpd.pages_written; |
3093 | wbc->pages_skipped = pages_skipped; | ||
3094 | ret = 0; | 3033 | ret = 0; |
3095 | io_done = 1; | 3034 | io_done = 1; |
3096 | } else if (wbc->nr_to_write) | 3035 | } else if (wbc->nr_to_write) |
@@ -3108,21 +3047,15 @@ retry: | |||
3108 | wbc->range_end = mapping->writeback_index - 1; | 3047 | wbc->range_end = mapping->writeback_index - 1; |
3109 | goto retry; | 3048 | goto retry; |
3110 | } | 3049 | } |
3111 | if (pages_skipped != wbc->pages_skipped) | ||
3112 | ext4_msg(inode->i_sb, KERN_CRIT, | ||
3113 | "This should not happen leaving %s " | ||
3114 | "with nr_to_write = %ld ret = %d", | ||
3115 | __func__, wbc->nr_to_write, ret); | ||
3116 | 3050 | ||
3117 | /* Update index */ | 3051 | /* Update index */ |
3118 | index += pages_written; | ||
3119 | wbc->range_cyclic = range_cyclic; | 3052 | wbc->range_cyclic = range_cyclic; |
3120 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 3053 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
3121 | /* | 3054 | /* |
3122 | * set the writeback_index so that range_cyclic | 3055 | * set the writeback_index so that range_cyclic |
3123 | * mode will write it back later | 3056 | * mode will write it back later |
3124 | */ | 3057 | */ |
3125 | mapping->writeback_index = index; | 3058 | mapping->writeback_index = done_index; |
3126 | 3059 | ||
3127 | out_writepages: | 3060 | out_writepages: |
3128 | wbc->nr_to_write -= nr_to_writebump; | 3061 | wbc->nr_to_write -= nr_to_writebump; |
@@ -3367,10 +3300,10 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
3367 | * doing I/O at all. | 3300 | * doing I/O at all. |
3368 | * | 3301 | * |
3369 | * We could call write_cache_pages(), and then redirty all of | 3302 | * We could call write_cache_pages(), and then redirty all of |
3370 | * the pages by calling redirty_page_for_writeback() but that | 3303 | * the pages by calling redirty_page_for_writepage() but that |
3371 | * would be ugly in the extreme. So instead we would need to | 3304 | * would be ugly in the extreme. So instead we would need to |
3372 | * replicate parts of the code in the above functions, | 3305 | * replicate parts of the code in the above functions, |
3373 | * simplifying them becuase we wouldn't actually intend to | 3306 | * simplifying them because we wouldn't actually intend to |
3374 | * write out the pages, but rather only collect contiguous | 3307 | * write out the pages, but rather only collect contiguous |
3375 | * logical block extents, call the multi-block allocator, and | 3308 | * logical block extents, call the multi-block allocator, and |
3376 | * then update the buffer heads with the block allocations. | 3309 | * then update the buffer heads with the block allocations. |
@@ -3447,6 +3380,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3447 | 3380 | ||
3448 | static int ext4_readpage(struct file *file, struct page *page) | 3381 | static int ext4_readpage(struct file *file, struct page *page) |
3449 | { | 3382 | { |
3383 | trace_ext4_readpage(page); | ||
3450 | return mpage_readpage(page, ext4_get_block); | 3384 | return mpage_readpage(page, ext4_get_block); |
3451 | } | 3385 | } |
3452 | 3386 | ||
@@ -3457,15 +3391,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3457 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3391 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3458 | } | 3392 | } |
3459 | 3393 | ||
3460 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3461 | { | ||
3462 | BUG_ON(!io); | ||
3463 | if (io->page) | ||
3464 | put_page(io->page); | ||
3465 | iput(io->inode); | ||
3466 | kfree(io); | ||
3467 | } | ||
3468 | |||
3469 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3394 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3470 | { | 3395 | { |
3471 | struct buffer_head *head, *bh; | 3396 | struct buffer_head *head, *bh; |
@@ -3490,6 +3415,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
3490 | { | 3415 | { |
3491 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3416 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3492 | 3417 | ||
3418 | trace_ext4_invalidatepage(page, offset); | ||
3419 | |||
3493 | /* | 3420 | /* |
3494 | * free any io_end structure allocated for buffers to be discarded | 3421 | * free any io_end structure allocated for buffers to be discarded |
3495 | */ | 3422 | */ |
@@ -3511,6 +3438,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3511 | { | 3438 | { |
3512 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3439 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3513 | 3440 | ||
3441 | trace_ext4_releasepage(page); | ||
3442 | |||
3514 | WARN_ON(PageChecked(page)); | 3443 | WARN_ON(PageChecked(page)); |
3515 | if (!page_has_buffers(page)) | 3444 | if (!page_has_buffers(page)) |
3516 | return 0; | 3445 | return 0; |
@@ -3582,7 +3511,7 @@ retry: | |||
3582 | loff_t end = offset + iov_length(iov, nr_segs); | 3511 | loff_t end = offset + iov_length(iov, nr_segs); |
3583 | 3512 | ||
3584 | if (end > isize) | 3513 | if (end > isize) |
3585 | vmtruncate(inode, isize); | 3514 | ext4_truncate_failed_write(inode); |
3586 | } | 3515 | } |
3587 | } | 3516 | } |
3588 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3517 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3642,173 +3571,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
3642 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 3571 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3643 | } | 3572 | } |
3644 | 3573 | ||
3645 | static void dump_completed_IO(struct inode * inode) | ||
3646 | { | ||
3647 | #ifdef EXT4_DEBUG | ||
3648 | struct list_head *cur, *before, *after; | ||
3649 | ext4_io_end_t *io, *io0, *io1; | ||
3650 | unsigned long flags; | ||
3651 | |||
3652 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
3653 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
3654 | return; | ||
3655 | } | ||
3656 | |||
3657 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
3658 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3659 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3660 | cur = &io->list; | ||
3661 | before = cur->prev; | ||
3662 | io0 = container_of(before, ext4_io_end_t, list); | ||
3663 | after = cur->next; | ||
3664 | io1 = container_of(after, ext4_io_end_t, list); | ||
3665 | |||
3666 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
3667 | io, inode->i_ino, io0, io1); | ||
3668 | } | ||
3669 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3670 | #endif | ||
3671 | } | ||
3672 | |||
3673 | /* | ||
3674 | * check a range of space and convert unwritten extents to written. | ||
3675 | */ | ||
3676 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3677 | { | ||
3678 | struct inode *inode = io->inode; | ||
3679 | loff_t offset = io->offset; | ||
3680 | ssize_t size = io->size; | ||
3681 | int ret = 0; | ||
3682 | |||
3683 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3684 | "list->prev 0x%p\n", | ||
3685 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3686 | |||
3687 | if (list_empty(&io->list)) | ||
3688 | return ret; | ||
3689 | |||
3690 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3691 | return ret; | ||
3692 | |||
3693 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3694 | if (ret < 0) { | ||
3695 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3696 | "extents to written extents, error is %d" | ||
3697 | " io is still on inode %lu aio dio list\n", | ||
3698 | __func__, ret, inode->i_ino); | ||
3699 | return ret; | ||
3700 | } | ||
3701 | |||
3702 | if (io->iocb) | ||
3703 | aio_complete(io->iocb, io->result, 0); | ||
3704 | /* clear the DIO AIO unwritten flag */ | ||
3705 | io->flag = 0; | ||
3706 | return ret; | ||
3707 | } | ||
3708 | |||
3709 | /* | ||
3710 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3711 | */ | ||
3712 | static void ext4_end_io_work(struct work_struct *work) | ||
3713 | { | ||
3714 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3715 | struct inode *inode = io->inode; | ||
3716 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3717 | unsigned long flags; | ||
3718 | int ret; | ||
3719 | |||
3720 | mutex_lock(&inode->i_mutex); | ||
3721 | ret = ext4_end_io_nolock(io); | ||
3722 | if (ret < 0) { | ||
3723 | mutex_unlock(&inode->i_mutex); | ||
3724 | return; | ||
3725 | } | ||
3726 | |||
3727 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3728 | if (!list_empty(&io->list)) | ||
3729 | list_del_init(&io->list); | ||
3730 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3731 | mutex_unlock(&inode->i_mutex); | ||
3732 | ext4_free_io_end(io); | ||
3733 | } | ||
3734 | |||
3735 | /* | ||
3736 | * This function is called from ext4_sync_file(). | ||
3737 | * | ||
3738 | * When IO is completed, the work to convert unwritten extents to | ||
3739 | * written is queued on workqueue but may not get immediately | ||
3740 | * scheduled. When fsync is called, we need to ensure the | ||
3741 | * conversion is complete before fsync returns. | ||
3742 | * The inode keeps track of a list of pending/completed IO that | ||
3743 | * might needs to do the conversion. This function walks through | ||
3744 | * the list and convert the related unwritten extents for completed IO | ||
3745 | * to written. | ||
3746 | * The function return the number of pending IOs on success. | ||
3747 | */ | ||
3748 | int flush_completed_IO(struct inode *inode) | ||
3749 | { | ||
3750 | ext4_io_end_t *io; | ||
3751 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3752 | unsigned long flags; | ||
3753 | int ret = 0; | ||
3754 | int ret2 = 0; | ||
3755 | |||
3756 | if (list_empty(&ei->i_completed_io_list)) | ||
3757 | return ret; | ||
3758 | |||
3759 | dump_completed_IO(inode); | ||
3760 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3761 | while (!list_empty(&ei->i_completed_io_list)){ | ||
3762 | io = list_entry(ei->i_completed_io_list.next, | ||
3763 | ext4_io_end_t, list); | ||
3764 | /* | ||
3765 | * Calling ext4_end_io_nolock() to convert completed | ||
3766 | * IO to written. | ||
3767 | * | ||
3768 | * When ext4_sync_file() is called, run_queue() may already | ||
3769 | * about to flush the work corresponding to this io structure. | ||
3770 | * It will be upset if it founds the io structure related | ||
3771 | * to the work-to-be schedule is freed. | ||
3772 | * | ||
3773 | * Thus we need to keep the io structure still valid here after | ||
3774 | * convertion finished. The io structure has a flag to | ||
3775 | * avoid double converting from both fsync and background work | ||
3776 | * queue work. | ||
3777 | */ | ||
3778 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3779 | ret = ext4_end_io_nolock(io); | ||
3780 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3781 | if (ret < 0) | ||
3782 | ret2 = ret; | ||
3783 | else | ||
3784 | list_del_init(&io->list); | ||
3785 | } | ||
3786 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3787 | return (ret2 < 0) ? ret2 : 0; | ||
3788 | } | ||
3789 | |||
3790 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3791 | { | ||
3792 | ext4_io_end_t *io = NULL; | ||
3793 | |||
3794 | io = kmalloc(sizeof(*io), flags); | ||
3795 | |||
3796 | if (io) { | ||
3797 | igrab(inode); | ||
3798 | io->inode = inode; | ||
3799 | io->flag = 0; | ||
3800 | io->offset = 0; | ||
3801 | io->size = 0; | ||
3802 | io->page = NULL; | ||
3803 | io->iocb = NULL; | ||
3804 | io->result = 0; | ||
3805 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3806 | INIT_LIST_HEAD(&io->list); | ||
3807 | } | ||
3808 | |||
3809 | return io; | ||
3810 | } | ||
3811 | |||
3812 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3574 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3813 | ssize_t size, void *private, int ret, | 3575 | ssize_t size, void *private, int ret, |
3814 | bool is_async) | 3576 | bool is_async) |
@@ -3828,7 +3590,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3828 | size); | 3590 | size); |
3829 | 3591 | ||
3830 | /* if not aio dio with unwritten extents, just free io and return */ | 3592 | /* if not aio dio with unwritten extents, just free io and return */ |
3831 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3593 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3832 | ext4_free_io_end(io_end); | 3594 | ext4_free_io_end(io_end); |
3833 | iocb->private = NULL; | 3595 | iocb->private = NULL; |
3834 | out: | 3596 | out: |
@@ -3845,14 +3607,14 @@ out: | |||
3845 | } | 3607 | } |
3846 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3608 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3847 | 3609 | ||
3848 | /* queue the work to convert unwritten extents to written */ | ||
3849 | queue_work(wq, &io_end->work); | ||
3850 | |||
3851 | /* Add the io_end to per-inode completed aio dio list*/ | 3610 | /* Add the io_end to per-inode completed aio dio list*/ |
3852 | ei = EXT4_I(io_end->inode); | 3611 | ei = EXT4_I(io_end->inode); |
3853 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 3612 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3854 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3613 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3855 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3614 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3615 | |||
3616 | /* queue the work to convert unwritten extents to written */ | ||
3617 | queue_work(wq, &io_end->work); | ||
3856 | iocb->private = NULL; | 3618 | iocb->private = NULL; |
3857 | } | 3619 | } |
3858 | 3620 | ||
@@ -3873,7 +3635,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3873 | goto out; | 3635 | goto out; |
3874 | } | 3636 | } |
3875 | 3637 | ||
3876 | io_end->flag = EXT4_IO_UNWRITTEN; | 3638 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3877 | inode = io_end->inode; | 3639 | inode = io_end->inode; |
3878 | 3640 | ||
3879 | /* Add the io_end to per-inode completed io list*/ | 3641 | /* Add the io_end to per-inode completed io list*/ |
@@ -3901,8 +3663,7 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | |||
3901 | retry: | 3663 | retry: |
3902 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | 3664 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); |
3903 | if (!io_end) { | 3665 | if (!io_end) { |
3904 | if (printk_ratelimit()) | 3666 | pr_warn_ratelimited("%s: allocation fail\n", __func__); |
3905 | printk(KERN_WARNING "%s: allocation fail\n", __func__); | ||
3906 | schedule(); | 3667 | schedule(); |
3907 | goto retry; | 3668 | goto retry; |
3908 | } | 3669 | } |
@@ -3926,13 +3687,13 @@ retry: | |||
3926 | * preallocated extents, and those write extend the file, no need to | 3687 | * preallocated extents, and those write extend the file, no need to |
3927 | * fall back to buffered IO. | 3688 | * fall back to buffered IO. |
3928 | * | 3689 | * |
3929 | * For holes, we fallocate those blocks, mark them as unintialized | 3690 | * For holes, we fallocate those blocks, mark them as uninitialized |
3930 | * If those blocks were preallocated, we mark sure they are splited, but | 3691 | * If those blocks were preallocated, we mark sure they are splited, but |
3931 | * still keep the range to write as unintialized. | 3692 | * still keep the range to write as uninitialized. |
3932 | * | 3693 | * |
3933 | * The unwrritten extents will be converted to written when DIO is completed. | 3694 | * The unwrritten extents will be converted to written when DIO is completed. |
3934 | * For async direct IO, since the IO may still pending when return, we | 3695 | * For async direct IO, since the IO may still pending when return, we |
3935 | * set up an end_io call back function, which will do the convertion | 3696 | * set up an end_io call back function, which will do the conversion |
3936 | * when async direct IO completed. | 3697 | * when async direct IO completed. |
3937 | * | 3698 | * |
3938 | * If the O_DIRECT write will extend the file then add this inode to the | 3699 | * If the O_DIRECT write will extend the file then add this inode to the |
@@ -3955,7 +3716,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3955 | * We could direct write to holes and fallocate. | 3716 | * We could direct write to holes and fallocate. |
3956 | * | 3717 | * |
3957 | * Allocated blocks to fill the hole are marked as uninitialized | 3718 | * Allocated blocks to fill the hole are marked as uninitialized |
3958 | * to prevent paralel buffered read to expose the stale data | 3719 | * to prevent parallel buffered read to expose the stale data |
3959 | * before DIO complete the data IO. | 3720 | * before DIO complete the data IO. |
3960 | * | 3721 | * |
3961 | * As to previously fallocated extents, ext4 get_block | 3722 | * As to previously fallocated extents, ext4 get_block |
@@ -4016,7 +3777,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
4016 | int err; | 3777 | int err; |
4017 | /* | 3778 | /* |
4018 | * for non AIO case, since the IO is already | 3779 | * for non AIO case, since the IO is already |
4019 | * completed, we could do the convertion right here | 3780 | * completed, we could do the conversion right here |
4020 | */ | 3781 | */ |
4021 | err = ext4_convert_unwritten_extents(inode, | 3782 | err = ext4_convert_unwritten_extents(inode, |
4022 | offset, ret); | 3783 | offset, ret); |
@@ -4037,11 +3798,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
4037 | { | 3798 | { |
4038 | struct file *file = iocb->ki_filp; | 3799 | struct file *file = iocb->ki_filp; |
4039 | struct inode *inode = file->f_mapping->host; | 3800 | struct inode *inode = file->f_mapping->host; |
3801 | ssize_t ret; | ||
4040 | 3802 | ||
3803 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
4041 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3804 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
4042 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3805 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
4043 | 3806 | else | |
4044 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3807 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3808 | trace_ext4_direct_IO_exit(inode, offset, | ||
3809 | iov_length(iov, nr_segs), rw, ret); | ||
3810 | return ret; | ||
4045 | } | 3811 | } |
4046 | 3812 | ||
4047 | /* | 3813 | /* |
@@ -4067,7 +3833,6 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
4067 | .readpage = ext4_readpage, | 3833 | .readpage = ext4_readpage, |
4068 | .readpages = ext4_readpages, | 3834 | .readpages = ext4_readpages, |
4069 | .writepage = ext4_writepage, | 3835 | .writepage = ext4_writepage, |
4070 | .sync_page = block_sync_page, | ||
4071 | .write_begin = ext4_write_begin, | 3836 | .write_begin = ext4_write_begin, |
4072 | .write_end = ext4_ordered_write_end, | 3837 | .write_end = ext4_ordered_write_end, |
4073 | .bmap = ext4_bmap, | 3838 | .bmap = ext4_bmap, |
@@ -4083,7 +3848,6 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
4083 | .readpage = ext4_readpage, | 3848 | .readpage = ext4_readpage, |
4084 | .readpages = ext4_readpages, | 3849 | .readpages = ext4_readpages, |
4085 | .writepage = ext4_writepage, | 3850 | .writepage = ext4_writepage, |
4086 | .sync_page = block_sync_page, | ||
4087 | .write_begin = ext4_write_begin, | 3851 | .write_begin = ext4_write_begin, |
4088 | .write_end = ext4_writeback_write_end, | 3852 | .write_end = ext4_writeback_write_end, |
4089 | .bmap = ext4_bmap, | 3853 | .bmap = ext4_bmap, |
@@ -4099,7 +3863,6 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
4099 | .readpage = ext4_readpage, | 3863 | .readpage = ext4_readpage, |
4100 | .readpages = ext4_readpages, | 3864 | .readpages = ext4_readpages, |
4101 | .writepage = ext4_writepage, | 3865 | .writepage = ext4_writepage, |
4102 | .sync_page = block_sync_page, | ||
4103 | .write_begin = ext4_write_begin, | 3866 | .write_begin = ext4_write_begin, |
4104 | .write_end = ext4_journalled_write_end, | 3867 | .write_end = ext4_journalled_write_end, |
4105 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3868 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -4115,7 +3878,6 @@ static const struct address_space_operations ext4_da_aops = { | |||
4115 | .readpages = ext4_readpages, | 3878 | .readpages = ext4_readpages, |
4116 | .writepage = ext4_writepage, | 3879 | .writepage = ext4_writepage, |
4117 | .writepages = ext4_da_writepages, | 3880 | .writepages = ext4_da_writepages, |
4118 | .sync_page = block_sync_page, | ||
4119 | .write_begin = ext4_da_write_begin, | 3881 | .write_begin = ext4_da_write_begin, |
4120 | .write_end = ext4_da_write_end, | 3882 | .write_end = ext4_da_write_end, |
4121 | .bmap = ext4_bmap, | 3883 | .bmap = ext4_bmap, |
@@ -4152,9 +3914,30 @@ void ext4_set_aops(struct inode *inode) | |||
4152 | int ext4_block_truncate_page(handle_t *handle, | 3914 | int ext4_block_truncate_page(handle_t *handle, |
4153 | struct address_space *mapping, loff_t from) | 3915 | struct address_space *mapping, loff_t from) |
4154 | { | 3916 | { |
3917 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3918 | unsigned length; | ||
3919 | unsigned blocksize; | ||
3920 | struct inode *inode = mapping->host; | ||
3921 | |||
3922 | blocksize = inode->i_sb->s_blocksize; | ||
3923 | length = blocksize - (offset & (blocksize - 1)); | ||
3924 | |||
3925 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3926 | } | ||
3927 | |||
3928 | /* | ||
3929 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3930 | * starting from file offset 'from'. The range to be zero'd must | ||
3931 | * be contained with in one block. If the specified range exceeds | ||
3932 | * the end of the block it will be shortened to end of the block | ||
3933 | * that cooresponds to 'from' | ||
3934 | */ | ||
3935 | int ext4_block_zero_page_range(handle_t *handle, | ||
3936 | struct address_space *mapping, loff_t from, loff_t length) | ||
3937 | { | ||
4155 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3938 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
4156 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3939 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
4157 | unsigned blocksize, length, pos; | 3940 | unsigned blocksize, max, pos; |
4158 | ext4_lblk_t iblock; | 3941 | ext4_lblk_t iblock; |
4159 | struct inode *inode = mapping->host; | 3942 | struct inode *inode = mapping->host; |
4160 | struct buffer_head *bh; | 3943 | struct buffer_head *bh; |
@@ -4167,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4167 | return -EINVAL; | 3950 | return -EINVAL; |
4168 | 3951 | ||
4169 | blocksize = inode->i_sb->s_blocksize; | 3952 | blocksize = inode->i_sb->s_blocksize; |
4170 | length = blocksize - (offset & (blocksize - 1)); | 3953 | max = blocksize - (offset & (blocksize - 1)); |
3954 | |||
3955 | /* | ||
3956 | * correct length if it does not fall between | ||
3957 | * 'from' and the end of the block | ||
3958 | */ | ||
3959 | if (length > max || length < 0) | ||
3960 | length = max; | ||
3961 | |||
4171 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3962 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
4172 | 3963 | ||
4173 | if (!page_has_buffers(page)) | 3964 | if (!page_has_buffers(page)) |
@@ -4226,7 +4017,7 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4226 | if (ext4_should_journal_data(inode)) { | 4017 | if (ext4_should_journal_data(inode)) { |
4227 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4018 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4228 | } else { | 4019 | } else { |
4229 | if (ext4_should_order_data(inode)) | 4020 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) |
4230 | err = ext4_jbd2_file_inode(handle, inode); | 4021 | err = ext4_jbd2_file_inode(handle, inode); |
4231 | mark_buffer_dirty(bh); | 4022 | mark_buffer_dirty(bh); |
4232 | } | 4023 | } |
@@ -4262,7 +4053,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) | |||
4262 | * | 4053 | * |
4263 | * When we do truncate() we may have to clean the ends of several | 4054 | * When we do truncate() we may have to clean the ends of several |
4264 | * indirect blocks but leave the blocks themselves alive. Block is | 4055 | * indirect blocks but leave the blocks themselves alive. Block is |
4265 | * partially truncated if some data below the new i_size is refered | 4056 | * partially truncated if some data below the new i_size is referred |
4266 | * from it (and it is on the path to the first completely truncated | 4057 | * from it (and it is on the path to the first completely truncated |
4267 | * data block, indeed). We have to free the top of that path along | 4058 | * data block, indeed). We have to free the top of that path along |
4268 | * with everything to the right of the path. Since no allocation | 4059 | * with everything to the right of the path. Since no allocation |
@@ -4341,6 +4132,9 @@ no_top: | |||
4341 | * | 4132 | * |
4342 | * We release `count' blocks on disk, but (last - first) may be greater | 4133 | * We release `count' blocks on disk, but (last - first) may be greater |
4343 | * than `count' because there can be holes in there. | 4134 | * than `count' because there can be holes in there. |
4135 | * | ||
4136 | * Return 0 on success, 1 on invalid block range | ||
4137 | * and < 0 on fatal error. | ||
4344 | */ | 4138 | */ |
4345 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4139 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4346 | struct buffer_head *bh, | 4140 | struct buffer_head *bh, |
@@ -4350,6 +4144,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4350 | { | 4144 | { |
4351 | __le32 *p; | 4145 | __le32 *p; |
4352 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 4146 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; |
4147 | int err; | ||
4353 | 4148 | ||
4354 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 4149 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
4355 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4150 | flags |= EXT4_FREE_BLOCKS_METADATA; |
@@ -4365,22 +4160,33 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4365 | if (try_to_extend_transaction(handle, inode)) { | 4160 | if (try_to_extend_transaction(handle, inode)) { |
4366 | if (bh) { | 4161 | if (bh) { |
4367 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4162 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4368 | ext4_handle_dirty_metadata(handle, inode, bh); | 4163 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4164 | if (unlikely(err)) | ||
4165 | goto out_err; | ||
4369 | } | 4166 | } |
4370 | ext4_mark_inode_dirty(handle, inode); | 4167 | err = ext4_mark_inode_dirty(handle, inode); |
4371 | ext4_truncate_restart_trans(handle, inode, | 4168 | if (unlikely(err)) |
4372 | blocks_for_truncate(inode)); | 4169 | goto out_err; |
4170 | err = ext4_truncate_restart_trans(handle, inode, | ||
4171 | blocks_for_truncate(inode)); | ||
4172 | if (unlikely(err)) | ||
4173 | goto out_err; | ||
4373 | if (bh) { | 4174 | if (bh) { |
4374 | BUFFER_TRACE(bh, "retaking write access"); | 4175 | BUFFER_TRACE(bh, "retaking write access"); |
4375 | ext4_journal_get_write_access(handle, bh); | 4176 | err = ext4_journal_get_write_access(handle, bh); |
4177 | if (unlikely(err)) | ||
4178 | goto out_err; | ||
4376 | } | 4179 | } |
4377 | } | 4180 | } |
4378 | 4181 | ||
4379 | for (p = first; p < last; p++) | 4182 | for (p = first; p < last; p++) |
4380 | *p = 0; | 4183 | *p = 0; |
4381 | 4184 | ||
4382 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4185 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
4383 | return 0; | 4186 | return 0; |
4187 | out_err: | ||
4188 | ext4_std_error(inode->i_sb, err); | ||
4189 | return err; | ||
4384 | } | 4190 | } |
4385 | 4191 | ||
4386 | /** | 4192 | /** |
@@ -4391,7 +4197,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4391 | * @first: array of block numbers | 4197 | * @first: array of block numbers |
4392 | * @last: points immediately past the end of array | 4198 | * @last: points immediately past the end of array |
4393 | * | 4199 | * |
4394 | * We are freeing all blocks refered from that array (numbers are stored as | 4200 | * We are freeing all blocks referred from that array (numbers are stored as |
4395 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | 4201 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. |
4396 | * | 4202 | * |
4397 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | 4203 | * We accumulate contiguous runs of blocks to free. Conveniently, if these |
@@ -4414,7 +4220,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4414 | ext4_fsblk_t nr; /* Current block # */ | 4220 | ext4_fsblk_t nr; /* Current block # */ |
4415 | __le32 *p; /* Pointer into inode/ind | 4221 | __le32 *p; /* Pointer into inode/ind |
4416 | for current block */ | 4222 | for current block */ |
4417 | int err; | 4223 | int err = 0; |
4418 | 4224 | ||
4419 | if (this_bh) { /* For indirect block */ | 4225 | if (this_bh) { /* For indirect block */ |
4420 | BUFFER_TRACE(this_bh, "get_write_access"); | 4226 | BUFFER_TRACE(this_bh, "get_write_access"); |
@@ -4436,9 +4242,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4436 | } else if (nr == block_to_free + count) { | 4242 | } else if (nr == block_to_free + count) { |
4437 | count++; | 4243 | count++; |
4438 | } else { | 4244 | } else { |
4439 | if (ext4_clear_blocks(handle, inode, this_bh, | 4245 | err = ext4_clear_blocks(handle, inode, this_bh, |
4440 | block_to_free, count, | 4246 | block_to_free, count, |
4441 | block_to_free_p, p)) | 4247 | block_to_free_p, p); |
4248 | if (err) | ||
4442 | break; | 4249 | break; |
4443 | block_to_free = nr; | 4250 | block_to_free = nr; |
4444 | block_to_free_p = p; | 4251 | block_to_free_p = p; |
@@ -4447,9 +4254,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4447 | } | 4254 | } |
4448 | } | 4255 | } |
4449 | 4256 | ||
4450 | if (count > 0) | 4257 | if (!err && count > 0) |
4451 | ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 4258 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
4452 | count, block_to_free_p, p); | 4259 | count, block_to_free_p, p); |
4260 | if (err < 0) | ||
4261 | /* fatal error */ | ||
4262 | return; | ||
4453 | 4263 | ||
4454 | if (this_bh) { | 4264 | if (this_bh) { |
4455 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 4265 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
@@ -4479,7 +4289,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4479 | * @last: pointer immediately past the end of array | 4289 | * @last: pointer immediately past the end of array |
4480 | * @depth: depth of the branches to free | 4290 | * @depth: depth of the branches to free |
4481 | * | 4291 | * |
4482 | * We are freeing all blocks refered from these branches (numbers are | 4292 | * We are freeing all blocks referred from these branches (numbers are |
4483 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 4293 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
4484 | * appropriately. | 4294 | * appropriately. |
4485 | */ | 4295 | */ |
@@ -4530,6 +4340,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4530 | (__le32 *) bh->b_data, | 4340 | (__le32 *) bh->b_data, |
4531 | (__le32 *) bh->b_data + addr_per_block, | 4341 | (__le32 *) bh->b_data + addr_per_block, |
4532 | depth); | 4342 | depth); |
4343 | brelse(bh); | ||
4533 | 4344 | ||
4534 | /* | 4345 | /* |
4535 | * Everything below this this pointer has been | 4346 | * Everything below this this pointer has been |
@@ -4566,7 +4377,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4566 | * transaction where the data blocks are | 4377 | * transaction where the data blocks are |
4567 | * actually freed. | 4378 | * actually freed. |
4568 | */ | 4379 | */ |
4569 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4380 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
4570 | EXT4_FREE_BLOCKS_METADATA| | 4381 | EXT4_FREE_BLOCKS_METADATA| |
4571 | EXT4_FREE_BLOCKS_FORGET); | 4382 | EXT4_FREE_BLOCKS_FORGET); |
4572 | 4383 | ||
@@ -4596,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4596 | 4407 | ||
4597 | int ext4_can_truncate(struct inode *inode) | 4408 | int ext4_can_truncate(struct inode *inode) |
4598 | { | 4409 | { |
4599 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
4600 | return 0; | ||
4601 | if (S_ISREG(inode->i_mode)) | 4410 | if (S_ISREG(inode->i_mode)) |
4602 | return 1; | 4411 | return 1; |
4603 | if (S_ISDIR(inode->i_mode)) | 4412 | if (S_ISDIR(inode->i_mode)) |
@@ -4608,6 +4417,31 @@ int ext4_can_truncate(struct inode *inode) | |||
4608 | } | 4417 | } |
4609 | 4418 | ||
4610 | /* | 4419 | /* |
4420 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks | ||
4421 | * associated with the given offset and length | ||
4422 | * | ||
4423 | * @inode: File inode | ||
4424 | * @offset: The offset where the hole will begin | ||
4425 | * @len: The length of the hole | ||
4426 | * | ||
4427 | * Returns: 0 on sucess or negative on failure | ||
4428 | */ | ||
4429 | |||
4430 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
4431 | { | ||
4432 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4433 | if (!S_ISREG(inode->i_mode)) | ||
4434 | return -ENOTSUPP; | ||
4435 | |||
4436 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
4437 | /* TODO: Add support for non extent hole punching */ | ||
4438 | return -ENOTSUPP; | ||
4439 | } | ||
4440 | |||
4441 | return ext4_ext_punch_hole(file, offset, length); | ||
4442 | } | ||
4443 | |||
4444 | /* | ||
4611 | * ext4_truncate() | 4445 | * ext4_truncate() |
4612 | * | 4446 | * |
4613 | * We block out ext4_get_block() block instantiations across the entire | 4447 | * We block out ext4_get_block() block instantiations across the entire |
@@ -4646,10 +4480,12 @@ void ext4_truncate(struct inode *inode) | |||
4646 | Indirect chain[4]; | 4480 | Indirect chain[4]; |
4647 | Indirect *partial; | 4481 | Indirect *partial; |
4648 | __le32 nr = 0; | 4482 | __le32 nr = 0; |
4649 | int n; | 4483 | int n = 0; |
4650 | ext4_lblk_t last_block; | 4484 | ext4_lblk_t last_block, max_block; |
4651 | unsigned blocksize = inode->i_sb->s_blocksize; | 4485 | unsigned blocksize = inode->i_sb->s_blocksize; |
4652 | 4486 | ||
4487 | trace_ext4_truncate_enter(inode); | ||
4488 | |||
4653 | if (!ext4_can_truncate(inode)) | 4489 | if (!ext4_can_truncate(inode)) |
4654 | return; | 4490 | return; |
4655 | 4491 | ||
@@ -4660,6 +4496,7 @@ void ext4_truncate(struct inode *inode) | |||
4660 | 4496 | ||
4661 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4497 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4662 | ext4_ext_truncate(inode); | 4498 | ext4_ext_truncate(inode); |
4499 | trace_ext4_truncate_exit(inode); | ||
4663 | return; | 4500 | return; |
4664 | } | 4501 | } |
4665 | 4502 | ||
@@ -4669,14 +4506,18 @@ void ext4_truncate(struct inode *inode) | |||
4669 | 4506 | ||
4670 | last_block = (inode->i_size + blocksize-1) | 4507 | last_block = (inode->i_size + blocksize-1) |
4671 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 4508 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
4509 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
4510 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4672 | 4511 | ||
4673 | if (inode->i_size & (blocksize - 1)) | 4512 | if (inode->i_size & (blocksize - 1)) |
4674 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | 4513 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
4675 | goto out_stop; | 4514 | goto out_stop; |
4676 | 4515 | ||
4677 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 4516 | if (last_block != max_block) { |
4678 | if (n == 0) | 4517 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
4679 | goto out_stop; /* error */ | 4518 | if (n == 0) |
4519 | goto out_stop; /* error */ | ||
4520 | } | ||
4680 | 4521 | ||
4681 | /* | 4522 | /* |
4682 | * OK. This truncate is going to happen. We add the inode to the | 4523 | * OK. This truncate is going to happen. We add the inode to the |
@@ -4707,7 +4548,13 @@ void ext4_truncate(struct inode *inode) | |||
4707 | */ | 4548 | */ |
4708 | ei->i_disksize = inode->i_size; | 4549 | ei->i_disksize = inode->i_size; |
4709 | 4550 | ||
4710 | if (n == 1) { /* direct blocks */ | 4551 | if (last_block == max_block) { |
4552 | /* | ||
4553 | * It is unnecessary to free any data blocks if last_block is | ||
4554 | * equal to the indirect block limit. | ||
4555 | */ | ||
4556 | goto out_unlock; | ||
4557 | } else if (n == 1) { /* direct blocks */ | ||
4711 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 4558 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
4712 | i_data + EXT4_NDIR_BLOCKS); | 4559 | i_data + EXT4_NDIR_BLOCKS); |
4713 | goto do_indirects; | 4560 | goto do_indirects; |
@@ -4767,6 +4614,7 @@ do_indirects: | |||
4767 | ; | 4614 | ; |
4768 | } | 4615 | } |
4769 | 4616 | ||
4617 | out_unlock: | ||
4770 | up_write(&ei->i_data_sem); | 4618 | up_write(&ei->i_data_sem); |
4771 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4619 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4772 | ext4_mark_inode_dirty(handle, inode); | 4620 | ext4_mark_inode_dirty(handle, inode); |
@@ -4789,6 +4637,7 @@ out_stop: | |||
4789 | ext4_orphan_del(handle, inode); | 4637 | ext4_orphan_del(handle, inode); |
4790 | 4638 | ||
4791 | ext4_journal_stop(handle); | 4639 | ext4_journal_stop(handle); |
4640 | trace_ext4_truncate_exit(inode); | ||
4792 | } | 4641 | } |
4793 | 4642 | ||
4794 | /* | 4643 | /* |
@@ -4818,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4818 | /* | 4667 | /* |
4819 | * Figure out the offset within the block group inode table | 4668 | * Figure out the offset within the block group inode table |
4820 | */ | 4669 | */ |
4821 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | 4670 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; |
4822 | inode_offset = ((inode->i_ino - 1) % | 4671 | inode_offset = ((inode->i_ino - 1) % |
4823 | EXT4_INODES_PER_GROUP(sb)); | 4672 | EXT4_INODES_PER_GROUP(sb)); |
4824 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | 4673 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); |
@@ -4920,6 +4769,7 @@ make_io: | |||
4920 | * has in-inode xattrs, or we don't have this inode in memory. | 4769 | * has in-inode xattrs, or we don't have this inode in memory. |
4921 | * Read the block from disk. | 4770 | * Read the block from disk. |
4922 | */ | 4771 | */ |
4772 | trace_ext4_load_inode(inode); | ||
4923 | get_bh(bh); | 4773 | get_bh(bh); |
4924 | bh->b_end_io = end_buffer_read_sync; | 4774 | bh->b_end_io = end_buffer_read_sync; |
4925 | submit_bh(READ_META, bh); | 4775 | submit_bh(READ_META, bh); |
@@ -5025,7 +4875,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5025 | return inode; | 4875 | return inode; |
5026 | 4876 | ||
5027 | ei = EXT4_I(inode); | 4877 | ei = EXT4_I(inode); |
5028 | iloc.bh = 0; | 4878 | iloc.bh = NULL; |
5029 | 4879 | ||
5030 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
5031 | if (ret < 0) | 4881 | if (ret < 0) |
@@ -5040,7 +4890,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5040 | } | 4890 | } |
5041 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 4891 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
5042 | 4892 | ||
5043 | ei->i_state_flags = 0; | 4893 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
5044 | ei->i_dir_start_lookup = 0; | 4894 | ei->i_dir_start_lookup = 0; |
5045 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 4895 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
5046 | /* We now have enough fields to check if the inode was active or not. | 4896 | /* We now have enough fields to check if the inode was active or not. |
@@ -5299,7 +5149,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5299 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) | 5149 | if (ext4_inode_blocks_set(handle, raw_inode, ei)) |
5300 | goto out_brelse; | 5150 | goto out_brelse; |
5301 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 5151 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
5302 | raw_inode->i_flags = cpu_to_le32(ei->i_flags); | 5152 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); |
5303 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 5153 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != |
5304 | cpu_to_le32(EXT4_OS_HURD)) | 5154 | cpu_to_le32(EXT4_OS_HURD)) |
5305 | raw_inode->i_file_acl_high = | 5155 | raw_inode->i_file_acl_high = |
@@ -5464,6 +5314,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5464 | { | 5314 | { |
5465 | struct inode *inode = dentry->d_inode; | 5315 | struct inode *inode = dentry->d_inode; |
5466 | int error, rc = 0; | 5316 | int error, rc = 0; |
5317 | int orphan = 0; | ||
5467 | const unsigned int ia_valid = attr->ia_valid; | 5318 | const unsigned int ia_valid = attr->ia_valid; |
5468 | 5319 | ||
5469 | error = inode_change_ok(inode, attr); | 5320 | error = inode_change_ok(inode, attr); |
@@ -5510,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5510 | 5361 | ||
5511 | if (S_ISREG(inode->i_mode) && | 5362 | if (S_ISREG(inode->i_mode) && |
5512 | attr->ia_valid & ATTR_SIZE && | 5363 | attr->ia_valid & ATTR_SIZE && |
5513 | (attr->ia_size < inode->i_size || | 5364 | (attr->ia_size < inode->i_size)) { |
5514 | (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { | ||
5515 | handle_t *handle; | 5365 | handle_t *handle; |
5516 | 5366 | ||
5517 | handle = ext4_journal_start(inode, 3); | 5367 | handle = ext4_journal_start(inode, 3); |
@@ -5519,8 +5369,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5519 | error = PTR_ERR(handle); | 5369 | error = PTR_ERR(handle); |
5520 | goto err_out; | 5370 | goto err_out; |
5521 | } | 5371 | } |
5522 | 5372 | if (ext4_handle_valid(handle)) { | |
5523 | error = ext4_orphan_add(handle, inode); | 5373 | error = ext4_orphan_add(handle, inode); |
5374 | orphan = 1; | ||
5375 | } | ||
5524 | EXT4_I(inode)->i_disksize = attr->ia_size; | 5376 | EXT4_I(inode)->i_disksize = attr->ia_size; |
5525 | rc = ext4_mark_inode_dirty(handle, inode); | 5377 | rc = ext4_mark_inode_dirty(handle, inode); |
5526 | if (!error) | 5378 | if (!error) |
@@ -5538,18 +5390,20 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5538 | goto err_out; | 5390 | goto err_out; |
5539 | } | 5391 | } |
5540 | ext4_orphan_del(handle, inode); | 5392 | ext4_orphan_del(handle, inode); |
5393 | orphan = 0; | ||
5541 | ext4_journal_stop(handle); | 5394 | ext4_journal_stop(handle); |
5542 | goto err_out; | 5395 | goto err_out; |
5543 | } | 5396 | } |
5544 | } | 5397 | } |
5545 | /* ext4_truncate will clear the flag */ | ||
5546 | if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) | ||
5547 | ext4_truncate(inode); | ||
5548 | } | 5398 | } |
5549 | 5399 | ||
5550 | if ((attr->ia_valid & ATTR_SIZE) && | 5400 | if (attr->ia_valid & ATTR_SIZE) { |
5551 | attr->ia_size != i_size_read(inode)) | 5401 | if (attr->ia_size != i_size_read(inode)) { |
5552 | rc = vmtruncate(inode, attr->ia_size); | 5402 | truncate_setsize(inode, attr->ia_size); |
5403 | ext4_truncate(inode); | ||
5404 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
5405 | ext4_truncate(inode); | ||
5406 | } | ||
5553 | 5407 | ||
5554 | if (!rc) { | 5408 | if (!rc) { |
5555 | setattr_copy(inode, attr); | 5409 | setattr_copy(inode, attr); |
@@ -5560,7 +5414,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5560 | * If the call to ext4_truncate failed to get a transaction handle at | 5414 | * If the call to ext4_truncate failed to get a transaction handle at |
5561 | * all, we need to clean up the in-core orphan list manually. | 5415 | * all, we need to clean up the in-core orphan list manually. |
5562 | */ | 5416 | */ |
5563 | if (inode->i_nlink) | 5417 | if (orphan && inode->i_nlink) |
5564 | ext4_orphan_del(NULL, inode); | 5418 | ext4_orphan_del(NULL, inode); |
5565 | 5419 | ||
5566 | if (!rc && (ia_valid & ATTR_MODE)) | 5420 | if (!rc && (ia_valid & ATTR_MODE)) |
@@ -5592,9 +5446,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
5592 | * will return the blocks that include the delayed allocation | 5446 | * will return the blocks that include the delayed allocation |
5593 | * blocks for this file. | 5447 | * blocks for this file. |
5594 | */ | 5448 | */ |
5595 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5596 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 5449 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; |
5597 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5598 | 5450 | ||
5599 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 5451 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; |
5600 | return 0; | 5452 | return 0; |
@@ -5608,13 +5460,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | |||
5608 | /* if nrblocks are contiguous */ | 5460 | /* if nrblocks are contiguous */ |
5609 | if (chunk) { | 5461 | if (chunk) { |
5610 | /* | 5462 | /* |
5611 | * With N contiguous data blocks, it need at most | 5463 | * With N contiguous data blocks, we need at most |
5612 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | 5464 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
5613 | * 2 dindirect blocks | 5465 | * 2 dindirect blocks, and 1 tindirect block |
5614 | * 1 tindirect block | ||
5615 | */ | 5466 | */ |
5616 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | 5467 | return DIV_ROUND_UP(nrblocks, |
5617 | return indirects + 3; | 5468 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
5618 | } | 5469 | } |
5619 | /* | 5470 | /* |
5620 | * if nrblocks are not contiguous, worse case, each block touch | 5471 | * if nrblocks are not contiguous, worse case, each block touch |
@@ -5643,7 +5494,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5643 | * | 5494 | * |
5644 | * Also account for superblock, inode, quota and xattr blocks | 5495 | * Also account for superblock, inode, quota and xattr blocks |
5645 | */ | 5496 | */ |
5646 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5497 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5647 | { | 5498 | { |
5648 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 5499 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
5649 | int gdpblocks; | 5500 | int gdpblocks; |
@@ -5688,7 +5539,7 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5688 | } | 5539 | } |
5689 | 5540 | ||
5690 | /* | 5541 | /* |
5691 | * Calulate the total number of credits to reserve to fit | 5542 | * Calculate the total number of credits to reserve to fit |
5692 | * the modification of a single pages into a single transaction, | 5543 | * the modification of a single pages into a single transaction, |
5693 | * which may include multiple chunks of block allocations. | 5544 | * which may include multiple chunks of block allocations. |
5694 | * | 5545 | * |
@@ -5831,6 +5682,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5831 | int err, ret; | 5682 | int err, ret; |
5832 | 5683 | ||
5833 | might_sleep(); | 5684 | might_sleep(); |
5685 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | ||
5834 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5686 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5835 | if (ext4_handle_valid(handle) && | 5687 | if (ext4_handle_valid(handle) && |
5836 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5688 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
@@ -5881,7 +5733,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5881 | * so would cause a commit on atime updates, which we don't bother doing. | 5733 | * so would cause a commit on atime updates, which we don't bother doing. |
5882 | * We handle synchronous inodes at the highest possible level. | 5734 | * We handle synchronous inodes at the highest possible level. |
5883 | */ | 5735 | */ |
5884 | void ext4_dirty_inode(struct inode *inode) | 5736 | void ext4_dirty_inode(struct inode *inode, int flags) |
5885 | { | 5737 | { |
5886 | handle_t *handle; | 5738 | handle_t *handle; |
5887 | 5739 | ||
@@ -6009,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6009 | goto out_unlock; | 5861 | goto out_unlock; |
6010 | } | 5862 | } |
6011 | ret = 0; | 5863 | ret = 0; |
6012 | if (PageMappedToDisk(page)) | 5864 | |
6013 | goto out_unlock; | 5865 | lock_page(page); |
5866 | wait_on_page_writeback(page); | ||
5867 | if (PageMappedToDisk(page)) { | ||
5868 | up_read(&inode->i_alloc_sem); | ||
5869 | return VM_FAULT_LOCKED; | ||
5870 | } | ||
6014 | 5871 | ||
6015 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5872 | if (page->index == size >> PAGE_CACHE_SHIFT) |
6016 | len = size & ~PAGE_CACHE_MASK; | 5873 | len = size & ~PAGE_CACHE_MASK; |
6017 | else | 5874 | else |
6018 | len = PAGE_CACHE_SIZE; | 5875 | len = PAGE_CACHE_SIZE; |
6019 | 5876 | ||
6020 | lock_page(page); | ||
6021 | /* | 5877 | /* |
6022 | * return if we have all the buffers mapped. This avoid | 5878 | * return if we have all the buffers mapped. This avoid |
6023 | * the need to call write_begin/write_end which does a | 5879 | * the need to call write_begin/write_end which does a |
@@ -6027,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6027 | if (page_has_buffers(page)) { | 5883 | if (page_has_buffers(page)) { |
6028 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5884 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
6029 | ext4_bh_unmapped)) { | 5885 | ext4_bh_unmapped)) { |
6030 | unlock_page(page); | 5886 | up_read(&inode->i_alloc_sem); |
6031 | goto out_unlock; | 5887 | return VM_FAULT_LOCKED; |
6032 | } | 5888 | } |
6033 | } | 5889 | } |
6034 | unlock_page(page); | 5890 | unlock_page(page); |
@@ -6048,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6048 | if (ret < 0) | 5904 | if (ret < 0) |
6049 | goto out_unlock; | 5905 | goto out_unlock; |
6050 | ret = 0; | 5906 | ret = 0; |
5907 | |||
5908 | /* | ||
5909 | * write_begin/end might have created a dirty page and someone | ||
5910 | * could wander in and start the IO. Make sure that hasn't | ||
5911 | * happened. | ||
5912 | */ | ||
5913 | lock_page(page); | ||
5914 | wait_on_page_writeback(page); | ||
5915 | up_read(&inode->i_alloc_sem); | ||
5916 | return VM_FAULT_LOCKED; | ||
6051 | out_unlock: | 5917 | out_unlock: |
6052 | if (ret) | 5918 | if (ret) |
6053 | ret = VM_FAULT_SIGBUS; | 5919 | ret = VM_FAULT_SIGBUS; |