diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 629 |
1 files changed, 357 insertions, 272 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b3c243b9afa5..cb1c1ab2720b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -484,49 +484,6 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
484 | } | 484 | } |
485 | 485 | ||
486 | /* | 486 | /* |
487 | * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map. | ||
488 | */ | ||
489 | static void set_buffers_da_mapped(struct inode *inode, | ||
490 | struct ext4_map_blocks *map) | ||
491 | { | ||
492 | struct address_space *mapping = inode->i_mapping; | ||
493 | struct pagevec pvec; | ||
494 | int i, nr_pages; | ||
495 | pgoff_t index, end; | ||
496 | |||
497 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
498 | end = (map->m_lblk + map->m_len - 1) >> | ||
499 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
500 | |||
501 | pagevec_init(&pvec, 0); | ||
502 | while (index <= end) { | ||
503 | nr_pages = pagevec_lookup(&pvec, mapping, index, | ||
504 | min(end - index + 1, | ||
505 | (pgoff_t)PAGEVEC_SIZE)); | ||
506 | if (nr_pages == 0) | ||
507 | break; | ||
508 | for (i = 0; i < nr_pages; i++) { | ||
509 | struct page *page = pvec.pages[i]; | ||
510 | struct buffer_head *bh, *head; | ||
511 | |||
512 | if (unlikely(page->mapping != mapping) || | ||
513 | !PageDirty(page)) | ||
514 | break; | ||
515 | |||
516 | if (page_has_buffers(page)) { | ||
517 | bh = head = page_buffers(page); | ||
518 | do { | ||
519 | set_buffer_da_mapped(bh); | ||
520 | bh = bh->b_this_page; | ||
521 | } while (bh != head); | ||
522 | } | ||
523 | index++; | ||
524 | } | ||
525 | pagevec_release(&pvec); | ||
526 | } | ||
527 | } | ||
528 | |||
529 | /* | ||
530 | * The ext4_map_blocks() function tries to look up the requested blocks, | 487 | * The ext4_map_blocks() function tries to look up the requested blocks, |
531 | * and returns if the blocks are already mapped. | 488 | * and returns if the blocks are already mapped. |
532 | * | 489 | * |
@@ -574,7 +531,16 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
574 | up_read((&EXT4_I(inode)->i_data_sem)); | 531 | up_read((&EXT4_I(inode)->i_data_sem)); |
575 | 532 | ||
576 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 533 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
577 | int ret = check_block_validity(inode, map); | 534 | int ret; |
535 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { | ||
536 | /* delayed alloc may be allocated by fallocate and | ||
537 | * coverted to initialized by directIO. | ||
538 | * we need to handle delayed extent here. | ||
539 | */ | ||
540 | down_write((&EXT4_I(inode)->i_data_sem)); | ||
541 | goto delayed_mapped; | ||
542 | } | ||
543 | ret = check_block_validity(inode, map); | ||
578 | if (ret != 0) | 544 | if (ret != 0) |
579 | return ret; | 545 | return ret; |
580 | } | 546 | } |
@@ -652,12 +618,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
652 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { | 618 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
653 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 619 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
654 | 620 | ||
655 | /* If we have successfully mapped the delayed allocated blocks, | 621 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
656 | * set the BH_Da_Mapped bit on them. Its important to do this | 622 | int ret; |
657 | * under the protection of i_data_sem. | 623 | delayed_mapped: |
658 | */ | 624 | /* delayed allocation blocks has been allocated */ |
659 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 625 | ret = ext4_es_remove_extent(inode, map->m_lblk, |
660 | set_buffers_da_mapped(inode, map); | 626 | map->m_len); |
627 | if (ret < 0) | ||
628 | retval = ret; | ||
629 | } | ||
661 | } | 630 | } |
662 | 631 | ||
663 | up_write((&EXT4_I(inode)->i_data_sem)); | 632 | up_write((&EXT4_I(inode)->i_data_sem)); |
@@ -680,10 +649,13 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
680 | int ret = 0, started = 0; | 649 | int ret = 0, started = 0; |
681 | int dio_credits; | 650 | int dio_credits; |
682 | 651 | ||
652 | if (ext4_has_inline_data(inode)) | ||
653 | return -ERANGE; | ||
654 | |||
683 | map.m_lblk = iblock; | 655 | map.m_lblk = iblock; |
684 | map.m_len = bh->b_size >> inode->i_blkbits; | 656 | map.m_len = bh->b_size >> inode->i_blkbits; |
685 | 657 | ||
686 | if (flags && !handle) { | 658 | if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) { |
687 | /* Direct IO write... */ | 659 | /* Direct IO write... */ |
688 | if (map.m_len > DIO_MAX_BLOCKS) | 660 | if (map.m_len > DIO_MAX_BLOCKS) |
689 | map.m_len = DIO_MAX_BLOCKS; | 661 | map.m_len = DIO_MAX_BLOCKS; |
@@ -798,13 +770,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
798 | return NULL; | 770 | return NULL; |
799 | } | 771 | } |
800 | 772 | ||
801 | static int walk_page_buffers(handle_t *handle, | 773 | int ext4_walk_page_buffers(handle_t *handle, |
802 | struct buffer_head *head, | 774 | struct buffer_head *head, |
803 | unsigned from, | 775 | unsigned from, |
804 | unsigned to, | 776 | unsigned to, |
805 | int *partial, | 777 | int *partial, |
806 | int (*fn)(handle_t *handle, | 778 | int (*fn)(handle_t *handle, |
807 | struct buffer_head *bh)) | 779 | struct buffer_head *bh)) |
808 | { | 780 | { |
809 | struct buffer_head *bh; | 781 | struct buffer_head *bh; |
810 | unsigned block_start, block_end; | 782 | unsigned block_start, block_end; |
@@ -854,8 +826,8 @@ static int walk_page_buffers(handle_t *handle, | |||
854 | * is elevated. We'll still have enough credits for the tiny quotafile | 826 | * is elevated. We'll still have enough credits for the tiny quotafile |
855 | * write. | 827 | * write. |
856 | */ | 828 | */ |
857 | static int do_journal_get_write_access(handle_t *handle, | 829 | int do_journal_get_write_access(handle_t *handle, |
858 | struct buffer_head *bh) | 830 | struct buffer_head *bh) |
859 | { | 831 | { |
860 | int dirty = buffer_dirty(bh); | 832 | int dirty = buffer_dirty(bh); |
861 | int ret; | 833 | int ret; |
@@ -878,7 +850,7 @@ static int do_journal_get_write_access(handle_t *handle, | |||
878 | return ret; | 850 | return ret; |
879 | } | 851 | } |
880 | 852 | ||
881 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 853 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, |
882 | struct buffer_head *bh_result, int create); | 854 | struct buffer_head *bh_result, int create); |
883 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 855 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
884 | loff_t pos, unsigned len, unsigned flags, | 856 | loff_t pos, unsigned len, unsigned flags, |
@@ -902,6 +874,17 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
902 | from = pos & (PAGE_CACHE_SIZE - 1); | 874 | from = pos & (PAGE_CACHE_SIZE - 1); |
903 | to = from + len; | 875 | to = from + len; |
904 | 876 | ||
877 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { | ||
878 | ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, | ||
879 | flags, pagep); | ||
880 | if (ret < 0) | ||
881 | goto out; | ||
882 | if (ret == 1) { | ||
883 | ret = 0; | ||
884 | goto out; | ||
885 | } | ||
886 | } | ||
887 | |||
905 | retry: | 888 | retry: |
906 | handle = ext4_journal_start(inode, needed_blocks); | 889 | handle = ext4_journal_start(inode, needed_blocks); |
907 | if (IS_ERR(handle)) { | 890 | if (IS_ERR(handle)) { |
@@ -919,6 +902,7 @@ retry: | |||
919 | ret = -ENOMEM; | 902 | ret = -ENOMEM; |
920 | goto out; | 903 | goto out; |
921 | } | 904 | } |
905 | |||
922 | *pagep = page; | 906 | *pagep = page; |
923 | 907 | ||
924 | if (ext4_should_dioread_nolock(inode)) | 908 | if (ext4_should_dioread_nolock(inode)) |
@@ -927,8 +911,9 @@ retry: | |||
927 | ret = __block_write_begin(page, pos, len, ext4_get_block); | 911 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
928 | 912 | ||
929 | if (!ret && ext4_should_journal_data(inode)) { | 913 | if (!ret && ext4_should_journal_data(inode)) { |
930 | ret = walk_page_buffers(handle, page_buffers(page), | 914 | ret = ext4_walk_page_buffers(handle, page_buffers(page), |
931 | from, to, NULL, do_journal_get_write_access); | 915 | from, to, NULL, |
916 | do_journal_get_write_access); | ||
932 | } | 917 | } |
933 | 918 | ||
934 | if (ret) { | 919 | if (ret) { |
@@ -983,7 +968,12 @@ static int ext4_generic_write_end(struct file *file, | |||
983 | struct inode *inode = mapping->host; | 968 | struct inode *inode = mapping->host; |
984 | handle_t *handle = ext4_journal_current_handle(); | 969 | handle_t *handle = ext4_journal_current_handle(); |
985 | 970 | ||
986 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 971 | if (ext4_has_inline_data(inode)) |
972 | copied = ext4_write_inline_data_end(inode, pos, len, | ||
973 | copied, page); | ||
974 | else | ||
975 | copied = block_write_end(file, mapping, pos, | ||
976 | len, copied, page, fsdata); | ||
987 | 977 | ||
988 | /* | 978 | /* |
989 | * No need to use i_size_read() here, the i_size | 979 | * No need to use i_size_read() here, the i_size |
@@ -1134,16 +1124,21 @@ static int ext4_journalled_write_end(struct file *file, | |||
1134 | 1124 | ||
1135 | BUG_ON(!ext4_handle_valid(handle)); | 1125 | BUG_ON(!ext4_handle_valid(handle)); |
1136 | 1126 | ||
1137 | if (copied < len) { | 1127 | if (ext4_has_inline_data(inode)) |
1138 | if (!PageUptodate(page)) | 1128 | copied = ext4_write_inline_data_end(inode, pos, len, |
1139 | copied = 0; | 1129 | copied, page); |
1140 | page_zero_new_buffers(page, from+copied, to); | 1130 | else { |
1141 | } | 1131 | if (copied < len) { |
1132 | if (!PageUptodate(page)) | ||
1133 | copied = 0; | ||
1134 | page_zero_new_buffers(page, from+copied, to); | ||
1135 | } | ||
1142 | 1136 | ||
1143 | ret = walk_page_buffers(handle, page_buffers(page), from, | 1137 | ret = ext4_walk_page_buffers(handle, page_buffers(page), from, |
1144 | to, &partial, write_end_fn); | 1138 | to, &partial, write_end_fn); |
1145 | if (!partial) | 1139 | if (!partial) |
1146 | SetPageUptodate(page); | 1140 | SetPageUptodate(page); |
1141 | } | ||
1147 | new_i_size = pos + copied; | 1142 | new_i_size = pos + copied; |
1148 | if (new_i_size > inode->i_size) | 1143 | if (new_i_size > inode->i_size) |
1149 | i_size_write(inode, pos+copied); | 1144 | i_size_write(inode, pos+copied); |
@@ -1301,6 +1296,7 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1301 | struct inode *inode = page->mapping->host; | 1296 | struct inode *inode = page->mapping->host; |
1302 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1297 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1303 | int num_clusters; | 1298 | int num_clusters; |
1299 | ext4_fsblk_t lblk; | ||
1304 | 1300 | ||
1305 | head = page_buffers(page); | 1301 | head = page_buffers(page); |
1306 | bh = head; | 1302 | bh = head; |
@@ -1310,20 +1306,23 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1310 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1306 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1311 | to_release++; | 1307 | to_release++; |
1312 | clear_buffer_delay(bh); | 1308 | clear_buffer_delay(bh); |
1313 | clear_buffer_da_mapped(bh); | ||
1314 | } | 1309 | } |
1315 | curr_off = next_off; | 1310 | curr_off = next_off; |
1316 | } while ((bh = bh->b_this_page) != head); | 1311 | } while ((bh = bh->b_this_page) != head); |
1317 | 1312 | ||
1313 | if (to_release) { | ||
1314 | lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1315 | ext4_es_remove_extent(inode, lblk, to_release); | ||
1316 | } | ||
1317 | |||
1318 | /* If we have released all the blocks belonging to a cluster, then we | 1318 | /* If we have released all the blocks belonging to a cluster, then we |
1319 | * need to release the reserved space for that cluster. */ | 1319 | * need to release the reserved space for that cluster. */ |
1320 | num_clusters = EXT4_NUM_B2C(sbi, to_release); | 1320 | num_clusters = EXT4_NUM_B2C(sbi, to_release); |
1321 | while (num_clusters > 0) { | 1321 | while (num_clusters > 0) { |
1322 | ext4_fsblk_t lblk; | ||
1323 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + | 1322 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + |
1324 | ((num_clusters - 1) << sbi->s_cluster_bits); | 1323 | ((num_clusters - 1) << sbi->s_cluster_bits); |
1325 | if (sbi->s_cluster_ratio == 1 || | 1324 | if (sbi->s_cluster_ratio == 1 || |
1326 | !ext4_find_delalloc_cluster(inode, lblk, 1)) | 1325 | !ext4_find_delalloc_cluster(inode, lblk)) |
1327 | ext4_da_release_space(inode, 1); | 1326 | ext4_da_release_space(inode, 1); |
1328 | 1327 | ||
1329 | num_clusters--; | 1328 | num_clusters--; |
@@ -1429,8 +1428,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1429 | clear_buffer_delay(bh); | 1428 | clear_buffer_delay(bh); |
1430 | bh->b_blocknr = pblock; | 1429 | bh->b_blocknr = pblock; |
1431 | } | 1430 | } |
1432 | if (buffer_da_mapped(bh)) | ||
1433 | clear_buffer_da_mapped(bh); | ||
1434 | if (buffer_unwritten(bh) || | 1431 | if (buffer_unwritten(bh) || |
1435 | buffer_mapped(bh)) | 1432 | buffer_mapped(bh)) |
1436 | BUG_ON(bh->b_blocknr != pblock); | 1433 | BUG_ON(bh->b_blocknr != pblock); |
@@ -1500,9 +1497,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
1500 | struct pagevec pvec; | 1497 | struct pagevec pvec; |
1501 | struct inode *inode = mpd->inode; | 1498 | struct inode *inode = mpd->inode; |
1502 | struct address_space *mapping = inode->i_mapping; | 1499 | struct address_space *mapping = inode->i_mapping; |
1500 | ext4_lblk_t start, last; | ||
1503 | 1501 | ||
1504 | index = mpd->first_page; | 1502 | index = mpd->first_page; |
1505 | end = mpd->next_page - 1; | 1503 | end = mpd->next_page - 1; |
1504 | |||
1505 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1506 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1507 | ext4_es_remove_extent(inode, start, last - start + 1); | ||
1508 | |||
1509 | pagevec_init(&pvec, 0); | ||
1506 | while (index <= end) { | 1510 | while (index <= end) { |
1507 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 1511 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
1508 | if (nr_pages == 0) | 1512 | if (nr_pages == 0) |
@@ -1656,15 +1660,6 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1656 | 1660 | ||
1657 | for (i = 0; i < map.m_len; i++) | 1661 | for (i = 0; i < map.m_len; i++) |
1658 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 1662 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
1659 | |||
1660 | if (ext4_should_order_data(mpd->inode)) { | ||
1661 | err = ext4_jbd2_file_inode(handle, mpd->inode); | ||
1662 | if (err) { | ||
1663 | /* Only if the journal is aborted */ | ||
1664 | mpd->retval = err; | ||
1665 | goto submit_io; | ||
1666 | } | ||
1667 | } | ||
1668 | } | 1663 | } |
1669 | 1664 | ||
1670 | /* | 1665 | /* |
@@ -1795,7 +1790,19 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1795 | * file system block. | 1790 | * file system block. |
1796 | */ | 1791 | */ |
1797 | down_read((&EXT4_I(inode)->i_data_sem)); | 1792 | down_read((&EXT4_I(inode)->i_data_sem)); |
1798 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 1793 | if (ext4_has_inline_data(inode)) { |
1794 | /* | ||
1795 | * We will soon create blocks for this page, and let | ||
1796 | * us pretend as if the blocks aren't allocated yet. | ||
1797 | * In case of clusters, we have to handle the work | ||
1798 | * of mapping from cluster so that the reserved space | ||
1799 | * is calculated properly. | ||
1800 | */ | ||
1801 | if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) && | ||
1802 | ext4_find_delalloc_cluster(inode, map->m_lblk)) | ||
1803 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
1804 | retval = 0; | ||
1805 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
1799 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); | 1806 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); |
1800 | else | 1807 | else |
1801 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); | 1808 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); |
@@ -1814,6 +1821,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
1814 | goto out_unlock; | 1821 | goto out_unlock; |
1815 | } | 1822 | } |
1816 | 1823 | ||
1824 | retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); | ||
1825 | if (retval) | ||
1826 | goto out_unlock; | ||
1827 | |||
1817 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | 1828 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served |
1818 | * and it should not appear on the bh->b_state. | 1829 | * and it should not appear on the bh->b_state. |
1819 | */ | 1830 | */ |
@@ -1842,8 +1853,8 @@ out_unlock: | |||
1842 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev | 1853 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev |
1843 | * initialized properly. | 1854 | * initialized properly. |
1844 | */ | 1855 | */ |
1845 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 1856 | int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
1846 | struct buffer_head *bh, int create) | 1857 | struct buffer_head *bh, int create) |
1847 | { | 1858 | { |
1848 | struct ext4_map_blocks map; | 1859 | struct ext4_map_blocks map; |
1849 | int ret = 0; | 1860 | int ret = 0; |
@@ -1917,15 +1928,29 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1917 | { | 1928 | { |
1918 | struct address_space *mapping = page->mapping; | 1929 | struct address_space *mapping = page->mapping; |
1919 | struct inode *inode = mapping->host; | 1930 | struct inode *inode = mapping->host; |
1920 | struct buffer_head *page_bufs; | 1931 | struct buffer_head *page_bufs = NULL; |
1921 | handle_t *handle = NULL; | 1932 | handle_t *handle = NULL; |
1922 | int ret = 0; | 1933 | int ret = 0, err = 0; |
1923 | int err; | 1934 | int inline_data = ext4_has_inline_data(inode); |
1935 | struct buffer_head *inode_bh = NULL; | ||
1924 | 1936 | ||
1925 | ClearPageChecked(page); | 1937 | ClearPageChecked(page); |
1926 | page_bufs = page_buffers(page); | 1938 | |
1927 | BUG_ON(!page_bufs); | 1939 | if (inline_data) { |
1928 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 1940 | BUG_ON(page->index != 0); |
1941 | BUG_ON(len > ext4_get_max_inline_size(inode)); | ||
1942 | inode_bh = ext4_journalled_write_inline_data(inode, len, page); | ||
1943 | if (inode_bh == NULL) | ||
1944 | goto out; | ||
1945 | } else { | ||
1946 | page_bufs = page_buffers(page); | ||
1947 | if (!page_bufs) { | ||
1948 | BUG(); | ||
1949 | goto out; | ||
1950 | } | ||
1951 | ext4_walk_page_buffers(handle, page_bufs, 0, len, | ||
1952 | NULL, bget_one); | ||
1953 | } | ||
1929 | /* As soon as we unlock the page, it can go away, but we have | 1954 | /* As soon as we unlock the page, it can go away, but we have |
1930 | * references to buffers so we are safe */ | 1955 | * references to buffers so we are safe */ |
1931 | unlock_page(page); | 1956 | unlock_page(page); |
@@ -1938,11 +1963,18 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1938 | 1963 | ||
1939 | BUG_ON(!ext4_handle_valid(handle)); | 1964 | BUG_ON(!ext4_handle_valid(handle)); |
1940 | 1965 | ||
1941 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1966 | if (inline_data) { |
1942 | do_journal_get_write_access); | 1967 | ret = ext4_journal_get_write_access(handle, inode_bh); |
1968 | |||
1969 | err = ext4_handle_dirty_metadata(handle, inode, inode_bh); | ||
1943 | 1970 | ||
1944 | err = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1971 | } else { |
1945 | write_end_fn); | 1972 | ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, |
1973 | do_journal_get_write_access); | ||
1974 | |||
1975 | err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, | ||
1976 | write_end_fn); | ||
1977 | } | ||
1946 | if (ret == 0) | 1978 | if (ret == 0) |
1947 | ret = err; | 1979 | ret = err; |
1948 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; | 1980 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; |
@@ -1950,9 +1982,12 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1950 | if (!ret) | 1982 | if (!ret) |
1951 | ret = err; | 1983 | ret = err; |
1952 | 1984 | ||
1953 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 1985 | if (!ext4_has_inline_data(inode)) |
1986 | ext4_walk_page_buffers(handle, page_bufs, 0, len, | ||
1987 | NULL, bput_one); | ||
1954 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 1988 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1955 | out: | 1989 | out: |
1990 | brelse(inode_bh); | ||
1956 | return ret; | 1991 | return ret; |
1957 | } | 1992 | } |
1958 | 1993 | ||
@@ -2029,8 +2064,8 @@ static int ext4_writepage(struct page *page, | |||
2029 | commit_write = 1; | 2064 | commit_write = 1; |
2030 | } | 2065 | } |
2031 | page_bufs = page_buffers(page); | 2066 | page_bufs = page_buffers(page); |
2032 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2067 | if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
2033 | ext4_bh_delay_or_unwritten)) { | 2068 | ext4_bh_delay_or_unwritten)) { |
2034 | /* | 2069 | /* |
2035 | * We don't want to do block allocation, so redirty | 2070 | * We don't want to do block allocation, so redirty |
2036 | * the page and return. We may reach here when we do | 2071 | * the page and return. We may reach here when we do |
@@ -2096,7 +2131,8 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2096 | * mpage_da_map_and_submit to map a single contiguous memory region | 2131 | * mpage_da_map_and_submit to map a single contiguous memory region |
2097 | * and then write them. | 2132 | * and then write them. |
2098 | */ | 2133 | */ |
2099 | static int write_cache_pages_da(struct address_space *mapping, | 2134 | static int write_cache_pages_da(handle_t *handle, |
2135 | struct address_space *mapping, | ||
2100 | struct writeback_control *wbc, | 2136 | struct writeback_control *wbc, |
2101 | struct mpage_da_data *mpd, | 2137 | struct mpage_da_data *mpd, |
2102 | pgoff_t *done_index) | 2138 | pgoff_t *done_index) |
@@ -2175,6 +2211,17 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2175 | wait_on_page_writeback(page); | 2211 | wait_on_page_writeback(page); |
2176 | BUG_ON(PageWriteback(page)); | 2212 | BUG_ON(PageWriteback(page)); |
2177 | 2213 | ||
2214 | /* | ||
2215 | * If we have inline data and arrive here, it means that | ||
2216 | * we will soon create the block for the 1st page, so | ||
2217 | * we'd better clear the inline data here. | ||
2218 | */ | ||
2219 | if (ext4_has_inline_data(inode)) { | ||
2220 | BUG_ON(ext4_test_inode_state(inode, | ||
2221 | EXT4_STATE_MAY_INLINE_DATA)); | ||
2222 | ext4_destroy_inline_data(handle, inode); | ||
2223 | } | ||
2224 | |||
2178 | if (mpd->next_page != page->index) | 2225 | if (mpd->next_page != page->index) |
2179 | mpd->first_page = page->index; | 2226 | mpd->first_page = page->index; |
2180 | mpd->next_page = page->index + 1; | 2227 | mpd->next_page = page->index + 1; |
@@ -2381,7 +2428,8 @@ retry: | |||
2381 | * contiguous region of logical blocks that need | 2428 | * contiguous region of logical blocks that need |
2382 | * blocks to be allocated by ext4 and submit them. | 2429 | * blocks to be allocated by ext4 and submit them. |
2383 | */ | 2430 | */ |
2384 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); | 2431 | ret = write_cache_pages_da(handle, mapping, |
2432 | wbc, &mpd, &done_index); | ||
2385 | /* | 2433 | /* |
2386 | * If we have a contiguous extent of pages and we | 2434 | * If we have a contiguous extent of pages and we |
2387 | * haven't done the I/O yet, map the blocks and submit | 2435 | * haven't done the I/O yet, map the blocks and submit |
@@ -2445,7 +2493,6 @@ out_writepages: | |||
2445 | return ret; | 2493 | return ret; |
2446 | } | 2494 | } |
2447 | 2495 | ||
2448 | #define FALL_BACK_TO_NONDELALLOC 1 | ||
2449 | static int ext4_nonda_switch(struct super_block *sb) | 2496 | static int ext4_nonda_switch(struct super_block *sb) |
2450 | { | 2497 | { |
2451 | s64 free_blocks, dirty_blocks; | 2498 | s64 free_blocks, dirty_blocks; |
@@ -2502,6 +2549,19 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2502 | } | 2549 | } |
2503 | *fsdata = (void *)0; | 2550 | *fsdata = (void *)0; |
2504 | trace_ext4_da_write_begin(inode, pos, len, flags); | 2551 | trace_ext4_da_write_begin(inode, pos, len, flags); |
2552 | |||
2553 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { | ||
2554 | ret = ext4_da_write_inline_data_begin(mapping, inode, | ||
2555 | pos, len, flags, | ||
2556 | pagep, fsdata); | ||
2557 | if (ret < 0) | ||
2558 | goto out; | ||
2559 | if (ret == 1) { | ||
2560 | ret = 0; | ||
2561 | goto out; | ||
2562 | } | ||
2563 | } | ||
2564 | |||
2505 | retry: | 2565 | retry: |
2506 | /* | 2566 | /* |
2507 | * With delayed allocation, we don't log the i_disksize update | 2567 | * With delayed allocation, we don't log the i_disksize update |
@@ -2603,22 +2663,13 @@ static int ext4_da_write_end(struct file *file, | |||
2603 | * changes. So let's piggyback the i_disksize mark_inode_dirty | 2663 | * changes. So let's piggyback the i_disksize mark_inode_dirty |
2604 | * into that. | 2664 | * into that. |
2605 | */ | 2665 | */ |
2606 | |||
2607 | new_i_size = pos + copied; | 2666 | new_i_size = pos + copied; |
2608 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { | 2667 | if (copied && new_i_size > EXT4_I(inode)->i_disksize) { |
2609 | if (ext4_da_should_update_i_disksize(page, end)) { | 2668 | if (ext4_has_inline_data(inode) || |
2669 | ext4_da_should_update_i_disksize(page, end)) { | ||
2610 | down_write(&EXT4_I(inode)->i_data_sem); | 2670 | down_write(&EXT4_I(inode)->i_data_sem); |
2611 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 2671 | if (new_i_size > EXT4_I(inode)->i_disksize) |
2612 | /* | ||
2613 | * Updating i_disksize when extending file | ||
2614 | * without needing block allocation | ||
2615 | */ | ||
2616 | if (ext4_should_order_data(inode)) | ||
2617 | ret = ext4_jbd2_file_inode(handle, | ||
2618 | inode); | ||
2619 | |||
2620 | EXT4_I(inode)->i_disksize = new_i_size; | 2672 | EXT4_I(inode)->i_disksize = new_i_size; |
2621 | } | ||
2622 | up_write(&EXT4_I(inode)->i_data_sem); | 2673 | up_write(&EXT4_I(inode)->i_data_sem); |
2623 | /* We need to mark inode dirty even if | 2674 | /* We need to mark inode dirty even if |
2624 | * new_i_size is less that inode->i_size | 2675 | * new_i_size is less that inode->i_size |
@@ -2627,8 +2678,16 @@ static int ext4_da_write_end(struct file *file, | |||
2627 | ext4_mark_inode_dirty(handle, inode); | 2678 | ext4_mark_inode_dirty(handle, inode); |
2628 | } | 2679 | } |
2629 | } | 2680 | } |
2630 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2681 | |
2682 | if (write_mode != CONVERT_INLINE_DATA && | ||
2683 | ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && | ||
2684 | ext4_has_inline_data(inode)) | ||
2685 | ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, | ||
2686 | page); | ||
2687 | else | ||
2688 | ret2 = generic_write_end(file, mapping, pos, len, copied, | ||
2631 | page, fsdata); | 2689 | page, fsdata); |
2690 | |||
2632 | copied = ret2; | 2691 | copied = ret2; |
2633 | if (ret2 < 0) | 2692 | if (ret2 < 0) |
2634 | ret = ret2; | 2693 | ret = ret2; |
@@ -2721,6 +2780,12 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
2721 | journal_t *journal; | 2780 | journal_t *journal; |
2722 | int err; | 2781 | int err; |
2723 | 2782 | ||
2783 | /* | ||
2784 | * We can get here for an inline file via the FIBMAP ioctl | ||
2785 | */ | ||
2786 | if (ext4_has_inline_data(inode)) | ||
2787 | return 0; | ||
2788 | |||
2724 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && | 2789 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && |
2725 | test_opt(inode->i_sb, DELALLOC)) { | 2790 | test_opt(inode->i_sb, DELALLOC)) { |
2726 | /* | 2791 | /* |
@@ -2766,14 +2831,30 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
2766 | 2831 | ||
2767 | static int ext4_readpage(struct file *file, struct page *page) | 2832 | static int ext4_readpage(struct file *file, struct page *page) |
2768 | { | 2833 | { |
2834 | int ret = -EAGAIN; | ||
2835 | struct inode *inode = page->mapping->host; | ||
2836 | |||
2769 | trace_ext4_readpage(page); | 2837 | trace_ext4_readpage(page); |
2770 | return mpage_readpage(page, ext4_get_block); | 2838 | |
2839 | if (ext4_has_inline_data(inode)) | ||
2840 | ret = ext4_readpage_inline(inode, page); | ||
2841 | |||
2842 | if (ret == -EAGAIN) | ||
2843 | return mpage_readpage(page, ext4_get_block); | ||
2844 | |||
2845 | return ret; | ||
2771 | } | 2846 | } |
2772 | 2847 | ||
2773 | static int | 2848 | static int |
2774 | ext4_readpages(struct file *file, struct address_space *mapping, | 2849 | ext4_readpages(struct file *file, struct address_space *mapping, |
2775 | struct list_head *pages, unsigned nr_pages) | 2850 | struct list_head *pages, unsigned nr_pages) |
2776 | { | 2851 | { |
2852 | struct inode *inode = mapping->host; | ||
2853 | |||
2854 | /* If the file has inline data, no need to do readpages. */ | ||
2855 | if (ext4_has_inline_data(inode)) | ||
2856 | return 0; | ||
2857 | |||
2777 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2858 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
2778 | } | 2859 | } |
2779 | 2860 | ||
@@ -2840,7 +2921,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
2840 | * We allocate an uinitialized extent if blocks haven't been allocated. | 2921 | * We allocate an uinitialized extent if blocks haven't been allocated. |
2841 | * The extent will be converted to initialized after the IO is complete. | 2922 | * The extent will be converted to initialized after the IO is complete. |
2842 | */ | 2923 | */ |
2843 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2924 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
2844 | struct buffer_head *bh_result, int create) | 2925 | struct buffer_head *bh_result, int create) |
2845 | { | 2926 | { |
2846 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 2927 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
@@ -2850,29 +2931,12 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
2850 | } | 2931 | } |
2851 | 2932 | ||
2852 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | 2933 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, |
2853 | struct buffer_head *bh_result, int flags) | 2934 | struct buffer_head *bh_result, int create) |
2854 | { | 2935 | { |
2855 | handle_t *handle = ext4_journal_current_handle(); | 2936 | ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n", |
2856 | struct ext4_map_blocks map; | 2937 | inode->i_ino, create); |
2857 | int ret = 0; | 2938 | return _ext4_get_block(inode, iblock, bh_result, |
2858 | 2939 | EXT4_GET_BLOCKS_NO_LOCK); | |
2859 | ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n", | ||
2860 | inode->i_ino, flags); | ||
2861 | |||
2862 | flags = EXT4_GET_BLOCKS_NO_LOCK; | ||
2863 | |||
2864 | map.m_lblk = iblock; | ||
2865 | map.m_len = bh_result->b_size >> inode->i_blkbits; | ||
2866 | |||
2867 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
2868 | if (ret > 0) { | ||
2869 | map_bh(bh_result, inode->i_sb, map.m_pblk); | ||
2870 | bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) | | ||
2871 | map.m_flags; | ||
2872 | bh_result->b_size = inode->i_sb->s_blocksize * map.m_len; | ||
2873 | ret = 0; | ||
2874 | } | ||
2875 | return ret; | ||
2876 | } | 2940 | } |
2877 | 2941 | ||
2878 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 2942 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
@@ -2978,10 +3042,10 @@ retry: | |||
2978 | * fall back to buffered IO. | 3042 | * fall back to buffered IO. |
2979 | * | 3043 | * |
2980 | * For holes, we fallocate those blocks, mark them as uninitialized | 3044 | * For holes, we fallocate those blocks, mark them as uninitialized |
2981 | * If those blocks were preallocated, we mark sure they are splited, but | 3045 | * If those blocks were preallocated, we mark sure they are split, but |
2982 | * still keep the range to write as uninitialized. | 3046 | * still keep the range to write as uninitialized. |
2983 | * | 3047 | * |
2984 | * The unwrritten extents will be converted to written when DIO is completed. | 3048 | * The unwritten extents will be converted to written when DIO is completed. |
2985 | * For async direct IO, since the IO may still pending when return, we | 3049 | * For async direct IO, since the IO may still pending when return, we |
2986 | * set up an end_io call back function, which will do the conversion | 3050 | * set up an end_io call back function, which will do the conversion |
2987 | * when async direct IO completed. | 3051 | * when async direct IO completed. |
@@ -2999,125 +3063,120 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
2999 | struct inode *inode = file->f_mapping->host; | 3063 | struct inode *inode = file->f_mapping->host; |
3000 | ssize_t ret; | 3064 | ssize_t ret; |
3001 | size_t count = iov_length(iov, nr_segs); | 3065 | size_t count = iov_length(iov, nr_segs); |
3002 | 3066 | int overwrite = 0; | |
3067 | get_block_t *get_block_func = NULL; | ||
3068 | int dio_flags = 0; | ||
3003 | loff_t final_size = offset + count; | 3069 | loff_t final_size = offset + count; |
3004 | if (rw == WRITE && final_size <= inode->i_size) { | ||
3005 | int overwrite = 0; | ||
3006 | 3070 | ||
3007 | BUG_ON(iocb->private == NULL); | 3071 | /* Use the old path for reads and writes beyond i_size. */ |
3072 | if (rw != WRITE || final_size > inode->i_size) | ||
3073 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | ||
3008 | 3074 | ||
3009 | /* If we do a overwrite dio, i_mutex locking can be released */ | 3075 | BUG_ON(iocb->private == NULL); |
3010 | overwrite = *((int *)iocb->private); | ||
3011 | 3076 | ||
3012 | if (overwrite) { | 3077 | /* If we do a overwrite dio, i_mutex locking can be released */ |
3013 | atomic_inc(&inode->i_dio_count); | 3078 | overwrite = *((int *)iocb->private); |
3014 | down_read(&EXT4_I(inode)->i_data_sem); | ||
3015 | mutex_unlock(&inode->i_mutex); | ||
3016 | } | ||
3017 | 3079 | ||
3018 | /* | 3080 | if (overwrite) { |
3019 | * We could direct write to holes and fallocate. | 3081 | atomic_inc(&inode->i_dio_count); |
3020 | * | 3082 | down_read(&EXT4_I(inode)->i_data_sem); |
3021 | * Allocated blocks to fill the hole are marked as uninitialized | 3083 | mutex_unlock(&inode->i_mutex); |
3022 | * to prevent parallel buffered read to expose the stale data | 3084 | } |
3023 | * before DIO complete the data IO. | ||
3024 | * | ||
3025 | * As to previously fallocated extents, ext4 get_block | ||
3026 | * will just simply mark the buffer mapped but still | ||
3027 | * keep the extents uninitialized. | ||
3028 | * | ||
3029 | * for non AIO case, we will convert those unwritten extents | ||
3030 | * to written after return back from blockdev_direct_IO. | ||
3031 | * | ||
3032 | * for async DIO, the conversion needs to be defered when | ||
3033 | * the IO is completed. The ext4 end_io callback function | ||
3034 | * will be called to take care of the conversion work. | ||
3035 | * Here for async case, we allocate an io_end structure to | ||
3036 | * hook to the iocb. | ||
3037 | */ | ||
3038 | iocb->private = NULL; | ||
3039 | ext4_inode_aio_set(inode, NULL); | ||
3040 | if (!is_sync_kiocb(iocb)) { | ||
3041 | ext4_io_end_t *io_end = | ||
3042 | ext4_init_io_end(inode, GFP_NOFS); | ||
3043 | if (!io_end) { | ||
3044 | ret = -ENOMEM; | ||
3045 | goto retake_lock; | ||
3046 | } | ||
3047 | io_end->flag |= EXT4_IO_END_DIRECT; | ||
3048 | iocb->private = io_end; | ||
3049 | /* | ||
3050 | * we save the io structure for current async | ||
3051 | * direct IO, so that later ext4_map_blocks() | ||
3052 | * could flag the io structure whether there | ||
3053 | * is a unwritten extents needs to be converted | ||
3054 | * when IO is completed. | ||
3055 | */ | ||
3056 | ext4_inode_aio_set(inode, io_end); | ||
3057 | } | ||
3058 | 3085 | ||
3059 | if (overwrite) | 3086 | /* |
3060 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3087 | * We could direct write to holes and fallocate. |
3061 | inode->i_sb->s_bdev, iov, | 3088 | * |
3062 | offset, nr_segs, | 3089 | * Allocated blocks to fill the hole are marked as |
3063 | ext4_get_block_write_nolock, | 3090 | * uninitialized to prevent parallel buffered read to expose |
3064 | ext4_end_io_dio, | 3091 | * the stale data before DIO complete the data IO. |
3065 | NULL, | 3092 | * |
3066 | 0); | 3093 | * As to previously fallocated extents, ext4 get_block will |
3067 | else | 3094 | * just simply mark the buffer mapped but still keep the |
3068 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3095 | * extents uninitialized. |
3069 | inode->i_sb->s_bdev, iov, | 3096 | * |
3070 | offset, nr_segs, | 3097 | * For non AIO case, we will convert those unwritten extents |
3071 | ext4_get_block_write, | 3098 | * to written after return back from blockdev_direct_IO. |
3072 | ext4_end_io_dio, | 3099 | * |
3073 | NULL, | 3100 | * For async DIO, the conversion needs to be deferred when the |
3074 | DIO_LOCKING); | 3101 | * IO is completed. The ext4 end_io callback function will be |
3075 | if (iocb->private) | 3102 | * called to take care of the conversion work. Here for async |
3076 | ext4_inode_aio_set(inode, NULL); | 3103 | * case, we allocate an io_end structure to hook to the iocb. |
3104 | */ | ||
3105 | iocb->private = NULL; | ||
3106 | ext4_inode_aio_set(inode, NULL); | ||
3107 | if (!is_sync_kiocb(iocb)) { | ||
3108 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
3109 | if (!io_end) { | ||
3110 | ret = -ENOMEM; | ||
3111 | goto retake_lock; | ||
3112 | } | ||
3113 | io_end->flag |= EXT4_IO_END_DIRECT; | ||
3114 | iocb->private = io_end; | ||
3077 | /* | 3115 | /* |
3078 | * The io_end structure takes a reference to the inode, | 3116 | * we save the io structure for current async direct |
3079 | * that structure needs to be destroyed and the | 3117 | * IO, so that later ext4_map_blocks() could flag the |
3080 | * reference to the inode need to be dropped, when IO is | 3118 | * io structure whether there is a unwritten extents |
3081 | * complete, even with 0 byte write, or failed. | 3119 | * needs to be converted when IO is completed. |
3082 | * | ||
3083 | * In the successful AIO DIO case, the io_end structure will be | ||
3084 | * desctroyed and the reference to the inode will be dropped | ||
3085 | * after the end_io call back function is called. | ||
3086 | * | ||
3087 | * In the case there is 0 byte write, or error case, since | ||
3088 | * VFS direct IO won't invoke the end_io call back function, | ||
3089 | * we need to free the end_io structure here. | ||
3090 | */ | 3120 | */ |
3091 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3121 | ext4_inode_aio_set(inode, io_end); |
3092 | ext4_free_io_end(iocb->private); | 3122 | } |
3093 | iocb->private = NULL; | ||
3094 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3095 | EXT4_STATE_DIO_UNWRITTEN)) { | ||
3096 | int err; | ||
3097 | /* | ||
3098 | * for non AIO case, since the IO is already | ||
3099 | * completed, we could do the conversion right here | ||
3100 | */ | ||
3101 | err = ext4_convert_unwritten_extents(inode, | ||
3102 | offset, ret); | ||
3103 | if (err < 0) | ||
3104 | ret = err; | ||
3105 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | ||
3106 | } | ||
3107 | 3123 | ||
3108 | retake_lock: | 3124 | if (overwrite) { |
3109 | /* take i_mutex locking again if we do a ovewrite dio */ | 3125 | get_block_func = ext4_get_block_write_nolock; |
3110 | if (overwrite) { | 3126 | } else { |
3111 | inode_dio_done(inode); | 3127 | get_block_func = ext4_get_block_write; |
3112 | up_read(&EXT4_I(inode)->i_data_sem); | 3128 | dio_flags = DIO_LOCKING; |
3113 | mutex_lock(&inode->i_mutex); | 3129 | } |
3114 | } | 3130 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3131 | inode->i_sb->s_bdev, iov, | ||
3132 | offset, nr_segs, | ||
3133 | get_block_func, | ||
3134 | ext4_end_io_dio, | ||
3135 | NULL, | ||
3136 | dio_flags); | ||
3137 | |||
3138 | if (iocb->private) | ||
3139 | ext4_inode_aio_set(inode, NULL); | ||
3140 | /* | ||
3141 | * The io_end structure takes a reference to the inode, that | ||
3142 | * structure needs to be destroyed and the reference to the | ||
3143 | * inode need to be dropped, when IO is complete, even with 0 | ||
3144 | * byte write, or failed. | ||
3145 | * | ||
3146 | * In the successful AIO DIO case, the io_end structure will | ||
3147 | * be destroyed and the reference to the inode will be dropped | ||
3148 | * after the end_io call back function is called. | ||
3149 | * | ||
3150 | * In the case there is 0 byte write, or error case, since VFS | ||
3151 | * direct IO won't invoke the end_io call back function, we | ||
3152 | * need to free the end_io structure here. | ||
3153 | */ | ||
3154 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | ||
3155 | ext4_free_io_end(iocb->private); | ||
3156 | iocb->private = NULL; | ||
3157 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3158 | EXT4_STATE_DIO_UNWRITTEN)) { | ||
3159 | int err; | ||
3160 | /* | ||
3161 | * for non AIO case, since the IO is already | ||
3162 | * completed, we could do the conversion right here | ||
3163 | */ | ||
3164 | err = ext4_convert_unwritten_extents(inode, | ||
3165 | offset, ret); | ||
3166 | if (err < 0) | ||
3167 | ret = err; | ||
3168 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | ||
3169 | } | ||
3115 | 3170 | ||
3116 | return ret; | 3171 | retake_lock: |
3172 | /* take i_mutex locking again if we do a ovewrite dio */ | ||
3173 | if (overwrite) { | ||
3174 | inode_dio_done(inode); | ||
3175 | up_read(&EXT4_I(inode)->i_data_sem); | ||
3176 | mutex_lock(&inode->i_mutex); | ||
3117 | } | 3177 | } |
3118 | 3178 | ||
3119 | /* for write the the end of file case, we fall back to old way */ | 3179 | return ret; |
3120 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | ||
3121 | } | 3180 | } |
3122 | 3181 | ||
3123 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | 3182 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, |
@@ -3134,6 +3193,10 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3134 | if (ext4_should_journal_data(inode)) | 3193 | if (ext4_should_journal_data(inode)) |
3135 | return 0; | 3194 | return 0; |
3136 | 3195 | ||
3196 | /* Let buffer I/O handle the inline data case. */ | ||
3197 | if (ext4_has_inline_data(inode)) | ||
3198 | return 0; | ||
3199 | |||
3137 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 3200 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); |
3138 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3201 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3139 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3202 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
@@ -3531,6 +3594,14 @@ void ext4_truncate(struct inode *inode) | |||
3531 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 3594 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
3532 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 3595 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
3533 | 3596 | ||
3597 | if (ext4_has_inline_data(inode)) { | ||
3598 | int has_inline = 1; | ||
3599 | |||
3600 | ext4_inline_data_truncate(inode, &has_inline); | ||
3601 | if (has_inline) | ||
3602 | return; | ||
3603 | } | ||
3604 | |||
3534 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3605 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3535 | ext4_ext_truncate(inode); | 3606 | ext4_ext_truncate(inode); |
3536 | else | 3607 | else |
@@ -3756,6 +3827,19 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | |||
3756 | } | 3827 | } |
3757 | } | 3828 | } |
3758 | 3829 | ||
3830 | static inline void ext4_iget_extra_inode(struct inode *inode, | ||
3831 | struct ext4_inode *raw_inode, | ||
3832 | struct ext4_inode_info *ei) | ||
3833 | { | ||
3834 | __le32 *magic = (void *)raw_inode + | ||
3835 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; | ||
3836 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { | ||
3837 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | ||
3838 | ext4_find_inline_data_nolock(inode); | ||
3839 | } else | ||
3840 | EXT4_I(inode)->i_inline_off = 0; | ||
3841 | } | ||
3842 | |||
3759 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | 3843 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) |
3760 | { | 3844 | { |
3761 | struct ext4_iloc iloc; | 3845 | struct ext4_iloc iloc; |
@@ -3826,6 +3910,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3826 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); | 3910 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); |
3827 | 3911 | ||
3828 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ | 3912 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
3913 | ei->i_inline_off = 0; | ||
3829 | ei->i_dir_start_lookup = 0; | 3914 | ei->i_dir_start_lookup = 0; |
3830 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 3915 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
3831 | /* We now have enough fields to check if the inode was active or not. | 3916 | /* We now have enough fields to check if the inode was active or not. |
@@ -3898,11 +3983,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3898 | ei->i_extra_isize = sizeof(struct ext4_inode) - | 3983 | ei->i_extra_isize = sizeof(struct ext4_inode) - |
3899 | EXT4_GOOD_OLD_INODE_SIZE; | 3984 | EXT4_GOOD_OLD_INODE_SIZE; |
3900 | } else { | 3985 | } else { |
3901 | __le32 *magic = (void *)raw_inode + | 3986 | ext4_iget_extra_inode(inode, raw_inode, ei); |
3902 | EXT4_GOOD_OLD_INODE_SIZE + | ||
3903 | ei->i_extra_isize; | ||
3904 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | ||
3905 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | ||
3906 | } | 3987 | } |
3907 | } | 3988 | } |
3908 | 3989 | ||
@@ -3925,17 +4006,19 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3925 | ei->i_file_acl); | 4006 | ei->i_file_acl); |
3926 | ret = -EIO; | 4007 | ret = -EIO; |
3927 | goto bad_inode; | 4008 | goto bad_inode; |
3928 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4009 | } else if (!ext4_has_inline_data(inode)) { |
3929 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4010 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
3930 | (S_ISLNK(inode->i_mode) && | 4011 | if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
3931 | !ext4_inode_is_fast_symlink(inode))) | 4012 | (S_ISLNK(inode->i_mode) && |
3932 | /* Validate extent which is part of inode */ | 4013 | !ext4_inode_is_fast_symlink(inode)))) |
3933 | ret = ext4_ext_check_inode(inode); | 4014 | /* Validate extent which is part of inode */ |
3934 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4015 | ret = ext4_ext_check_inode(inode); |
3935 | (S_ISLNK(inode->i_mode) && | 4016 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
3936 | !ext4_inode_is_fast_symlink(inode))) { | 4017 | (S_ISLNK(inode->i_mode) && |
3937 | /* Validate block references which are part of inode */ | 4018 | !ext4_inode_is_fast_symlink(inode))) { |
3938 | ret = ext4_ind_check_inode(inode); | 4019 | /* Validate block references which are part of inode */ |
4020 | ret = ext4_ind_check_inode(inode); | ||
4021 | } | ||
3939 | } | 4022 | } |
3940 | if (ret) | 4023 | if (ret) |
3941 | goto bad_inode; | 4024 | goto bad_inode; |
@@ -4122,9 +4205,10 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4122 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | 4205 | cpu_to_le32(new_encode_dev(inode->i_rdev)); |
4123 | raw_inode->i_block[2] = 0; | 4206 | raw_inode->i_block[2] = 0; |
4124 | } | 4207 | } |
4125 | } else | 4208 | } else if (!ext4_has_inline_data(inode)) { |
4126 | for (block = 0; block < EXT4_N_BLOCKS; block++) | 4209 | for (block = 0; block < EXT4_N_BLOCKS; block++) |
4127 | raw_inode->i_block[block] = ei->i_data[block]; | 4210 | raw_inode->i_block[block] = ei->i_data[block]; |
4211 | } | ||
4128 | 4212 | ||
4129 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); | 4213 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
4130 | if (ei->i_extra_isize) { | 4214 | if (ei->i_extra_isize) { |
@@ -4811,8 +4895,9 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
4811 | * journal_start/journal_stop which can block and take a long time | 4895 | * journal_start/journal_stop which can block and take a long time |
4812 | */ | 4896 | */ |
4813 | if (page_has_buffers(page)) { | 4897 | if (page_has_buffers(page)) { |
4814 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 4898 | if (!ext4_walk_page_buffers(NULL, page_buffers(page), |
4815 | ext4_bh_unmapped)) { | 4899 | 0, len, NULL, |
4900 | ext4_bh_unmapped)) { | ||
4816 | /* Wait so that we don't change page under IO */ | 4901 | /* Wait so that we don't change page under IO */ |
4817 | wait_on_page_writeback(page); | 4902 | wait_on_page_writeback(page); |
4818 | ret = VM_FAULT_LOCKED; | 4903 | ret = VM_FAULT_LOCKED; |
@@ -4833,7 +4918,7 @@ retry_alloc: | |||
4833 | } | 4918 | } |
4834 | ret = __block_page_mkwrite(vma, vmf, get_block); | 4919 | ret = __block_page_mkwrite(vma, vmf, get_block); |
4835 | if (!ret && ext4_should_journal_data(inode)) { | 4920 | if (!ret && ext4_should_journal_data(inode)) { |
4836 | if (walk_page_buffers(handle, page_buffers(page), 0, | 4921 | if (ext4_walk_page_buffers(handle, page_buffers(page), 0, |
4837 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 4922 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
4838 | unlock_page(page); | 4923 | unlock_page(page); |
4839 | ret = VM_FAULT_SIGBUS; | 4924 | ret = VM_FAULT_SIGBUS; |