diff options
author | Tao Ma <boyu.mt@taobao.com> | 2012-12-10 14:05:51 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-12-10 14:05:51 -0500 |
commit | f19d5870cbf72d4cb2a8e1f749dff97af99b071e (patch) | |
tree | e919ba96fde504b6d697d1e13abb080d7800f61b /fs/ext4/inode.c | |
parent | 46c7f254543dedcf134ad05091ed2b935a9a597d (diff) |
ext4: add normal write support for inline data
For a normal write case (not journalled write, not delayed
allocation), we write to the inline if the file is small and convert
it to an extent based file when the write is larger than the max
inline size.
Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 103 |
1 files changed, 62 insertions, 41 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1668abf80549..70c8d5f323f0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -770,13 +770,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
770 | return NULL; | 770 | return NULL; |
771 | } | 771 | } |
772 | 772 | ||
773 | static int walk_page_buffers(handle_t *handle, | 773 | int ext4_walk_page_buffers(handle_t *handle, |
774 | struct buffer_head *head, | 774 | struct buffer_head *head, |
775 | unsigned from, | 775 | unsigned from, |
776 | unsigned to, | 776 | unsigned to, |
777 | int *partial, | 777 | int *partial, |
778 | int (*fn)(handle_t *handle, | 778 | int (*fn)(handle_t *handle, |
779 | struct buffer_head *bh)) | 779 | struct buffer_head *bh)) |
780 | { | 780 | { |
781 | struct buffer_head *bh; | 781 | struct buffer_head *bh; |
782 | unsigned block_start, block_end; | 782 | unsigned block_start, block_end; |
@@ -826,8 +826,8 @@ static int walk_page_buffers(handle_t *handle, | |||
826 | * is elevated. We'll still have enough credits for the tiny quotafile | 826 | * is elevated. We'll still have enough credits for the tiny quotafile |
827 | * write. | 827 | * write. |
828 | */ | 828 | */ |
829 | static int do_journal_get_write_access(handle_t *handle, | 829 | int do_journal_get_write_access(handle_t *handle, |
830 | struct buffer_head *bh) | 830 | struct buffer_head *bh) |
831 | { | 831 | { |
832 | int dirty = buffer_dirty(bh); | 832 | int dirty = buffer_dirty(bh); |
833 | int ret; | 833 | int ret; |
@@ -850,8 +850,6 @@ static int do_journal_get_write_access(handle_t *handle, | |||
850 | return ret; | 850 | return ret; |
851 | } | 851 | } |
852 | 852 | ||
853 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | ||
854 | struct buffer_head *bh_result, int create); | ||
855 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | 853 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, |
856 | struct buffer_head *bh_result, int create); | 854 | struct buffer_head *bh_result, int create); |
857 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 855 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
@@ -876,6 +874,17 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
876 | from = pos & (PAGE_CACHE_SIZE - 1); | 874 | from = pos & (PAGE_CACHE_SIZE - 1); |
877 | to = from + len; | 875 | to = from + len; |
878 | 876 | ||
877 | if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { | ||
878 | ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, | ||
879 | flags, pagep); | ||
880 | if (ret < 0) | ||
881 | goto out; | ||
882 | if (ret == 1) { | ||
883 | ret = 0; | ||
884 | goto out; | ||
885 | } | ||
886 | } | ||
887 | |||
879 | retry: | 888 | retry: |
880 | handle = ext4_journal_start(inode, needed_blocks); | 889 | handle = ext4_journal_start(inode, needed_blocks); |
881 | if (IS_ERR(handle)) { | 890 | if (IS_ERR(handle)) { |
@@ -893,6 +902,7 @@ retry: | |||
893 | ret = -ENOMEM; | 902 | ret = -ENOMEM; |
894 | goto out; | 903 | goto out; |
895 | } | 904 | } |
905 | |||
896 | *pagep = page; | 906 | *pagep = page; |
897 | 907 | ||
898 | if (ext4_should_dioread_nolock(inode)) | 908 | if (ext4_should_dioread_nolock(inode)) |
@@ -901,8 +911,9 @@ retry: | |||
901 | ret = __block_write_begin(page, pos, len, ext4_get_block); | 911 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
902 | 912 | ||
903 | if (!ret && ext4_should_journal_data(inode)) { | 913 | if (!ret && ext4_should_journal_data(inode)) { |
904 | ret = walk_page_buffers(handle, page_buffers(page), | 914 | ret = ext4_walk_page_buffers(handle, page_buffers(page), |
905 | from, to, NULL, do_journal_get_write_access); | 915 | from, to, NULL, |
916 | do_journal_get_write_access); | ||
906 | } | 917 | } |
907 | 918 | ||
908 | if (ret) { | 919 | if (ret) { |
@@ -957,7 +968,12 @@ static int ext4_generic_write_end(struct file *file, | |||
957 | struct inode *inode = mapping->host; | 968 | struct inode *inode = mapping->host; |
958 | handle_t *handle = ext4_journal_current_handle(); | 969 | handle_t *handle = ext4_journal_current_handle(); |
959 | 970 | ||
960 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 971 | if (ext4_has_inline_data(inode)) |
972 | copied = ext4_write_inline_data_end(inode, pos, len, | ||
973 | copied, page); | ||
974 | else | ||
975 | copied = block_write_end(file, mapping, pos, | ||
976 | len, copied, page, fsdata); | ||
961 | 977 | ||
962 | /* | 978 | /* |
963 | * No need to use i_size_read() here, the i_size | 979 | * No need to use i_size_read() here, the i_size |
@@ -1114,8 +1130,8 @@ static int ext4_journalled_write_end(struct file *file, | |||
1114 | page_zero_new_buffers(page, from+copied, to); | 1130 | page_zero_new_buffers(page, from+copied, to); |
1115 | } | 1131 | } |
1116 | 1132 | ||
1117 | ret = walk_page_buffers(handle, page_buffers(page), from, | 1133 | ret = ext4_walk_page_buffers(handle, page_buffers(page), from, |
1118 | to, &partial, write_end_fn); | 1134 | to, &partial, write_end_fn); |
1119 | if (!partial) | 1135 | if (!partial) |
1120 | SetPageUptodate(page); | 1136 | SetPageUptodate(page); |
1121 | new_i_size = pos + copied; | 1137 | new_i_size = pos + copied; |
@@ -1903,7 +1919,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1903 | ClearPageChecked(page); | 1919 | ClearPageChecked(page); |
1904 | page_bufs = page_buffers(page); | 1920 | page_bufs = page_buffers(page); |
1905 | BUG_ON(!page_bufs); | 1921 | BUG_ON(!page_bufs); |
1906 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | 1922 | ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); |
1907 | /* As soon as we unlock the page, it can go away, but we have | 1923 | /* As soon as we unlock the page, it can go away, but we have |
1908 | * references to buffers so we are safe */ | 1924 | * references to buffers so we are safe */ |
1909 | unlock_page(page); | 1925 | unlock_page(page); |
@@ -1916,11 +1932,11 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1916 | 1932 | ||
1917 | BUG_ON(!ext4_handle_valid(handle)); | 1933 | BUG_ON(!ext4_handle_valid(handle)); |
1918 | 1934 | ||
1919 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1935 | ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, |
1920 | do_journal_get_write_access); | 1936 | do_journal_get_write_access); |
1921 | 1937 | ||
1922 | err = walk_page_buffers(handle, page_bufs, 0, len, NULL, | 1938 | err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, |
1923 | write_end_fn); | 1939 | write_end_fn); |
1924 | if (ret == 0) | 1940 | if (ret == 0) |
1925 | ret = err; | 1941 | ret = err; |
1926 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; | 1942 | EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; |
@@ -1928,7 +1944,7 @@ static int __ext4_journalled_writepage(struct page *page, | |||
1928 | if (!ret) | 1944 | if (!ret) |
1929 | ret = err; | 1945 | ret = err; |
1930 | 1946 | ||
1931 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 1947 | ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); |
1932 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 1948 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1933 | out: | 1949 | out: |
1934 | return ret; | 1950 | return ret; |
@@ -2007,8 +2023,8 @@ static int ext4_writepage(struct page *page, | |||
2007 | commit_write = 1; | 2023 | commit_write = 1; |
2008 | } | 2024 | } |
2009 | page_bufs = page_buffers(page); | 2025 | page_bufs = page_buffers(page); |
2010 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2026 | if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
2011 | ext4_bh_delay_or_unwritten)) { | 2027 | ext4_bh_delay_or_unwritten)) { |
2012 | /* | 2028 | /* |
2013 | * We don't want to do block allocation, so redirty | 2029 | * We don't want to do block allocation, so redirty |
2014 | * the page and return. We may reach here when we do | 2030 | * the page and return. We may reach here when we do |
@@ -2831,7 +2847,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
2831 | * We allocate an uinitialized extent if blocks haven't been allocated. | 2847 | * We allocate an uinitialized extent if blocks haven't been allocated. |
2832 | * The extent will be converted to initialized after the IO is complete. | 2848 | * The extent will be converted to initialized after the IO is complete. |
2833 | */ | 2849 | */ |
2834 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 2850 | int ext4_get_block_write(struct inode *inode, sector_t iblock, |
2835 | struct buffer_head *bh_result, int create) | 2851 | struct buffer_head *bh_result, int create) |
2836 | { | 2852 | { |
2837 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 2853 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
@@ -3738,7 +3754,8 @@ static inline void ext4_iget_extra_inode(struct inode *inode, | |||
3738 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { | 3754 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { |
3739 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | 3755 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
3740 | ext4_find_inline_data_nolock(inode); | 3756 | ext4_find_inline_data_nolock(inode); |
3741 | } | 3757 | } else |
3758 | EXT4_I(inode)->i_inline_off = 0; | ||
3742 | } | 3759 | } |
3743 | 3760 | ||
3744 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | 3761 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) |
@@ -3907,17 +3924,19 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3907 | ei->i_file_acl); | 3924 | ei->i_file_acl); |
3908 | ret = -EIO; | 3925 | ret = -EIO; |
3909 | goto bad_inode; | 3926 | goto bad_inode; |
3910 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 3927 | } else if (!ext4_has_inline_data(inode)) { |
3911 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 3928 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
3912 | (S_ISLNK(inode->i_mode) && | 3929 | if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
3913 | !ext4_inode_is_fast_symlink(inode))) | 3930 | (S_ISLNK(inode->i_mode) && |
3914 | /* Validate extent which is part of inode */ | 3931 | !ext4_inode_is_fast_symlink(inode)))) |
3915 | ret = ext4_ext_check_inode(inode); | 3932 | /* Validate extent which is part of inode */ |
3916 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 3933 | ret = ext4_ext_check_inode(inode); |
3917 | (S_ISLNK(inode->i_mode) && | 3934 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
3918 | !ext4_inode_is_fast_symlink(inode))) { | 3935 | (S_ISLNK(inode->i_mode) && |
3919 | /* Validate block references which are part of inode */ | 3936 | !ext4_inode_is_fast_symlink(inode))) { |
3920 | ret = ext4_ind_check_inode(inode); | 3937 | /* Validate block references which are part of inode */ |
3938 | ret = ext4_ind_check_inode(inode); | ||
3939 | } | ||
3921 | } | 3940 | } |
3922 | if (ret) | 3941 | if (ret) |
3923 | goto bad_inode; | 3942 | goto bad_inode; |
@@ -4104,9 +4123,10 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4104 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | 4123 | cpu_to_le32(new_encode_dev(inode->i_rdev)); |
4105 | raw_inode->i_block[2] = 0; | 4124 | raw_inode->i_block[2] = 0; |
4106 | } | 4125 | } |
4107 | } else | 4126 | } else if (!ext4_has_inline_data(inode)) { |
4108 | for (block = 0; block < EXT4_N_BLOCKS; block++) | 4127 | for (block = 0; block < EXT4_N_BLOCKS; block++) |
4109 | raw_inode->i_block[block] = ei->i_data[block]; | 4128 | raw_inode->i_block[block] = ei->i_data[block]; |
4129 | } | ||
4110 | 4130 | ||
4111 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); | 4131 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
4112 | if (ei->i_extra_isize) { | 4132 | if (ei->i_extra_isize) { |
@@ -4793,8 +4813,9 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
4793 | * journal_start/journal_stop which can block and take a long time | 4813 | * journal_start/journal_stop which can block and take a long time |
4794 | */ | 4814 | */ |
4795 | if (page_has_buffers(page)) { | 4815 | if (page_has_buffers(page)) { |
4796 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 4816 | if (!ext4_walk_page_buffers(NULL, page_buffers(page), |
4797 | ext4_bh_unmapped)) { | 4817 | 0, len, NULL, |
4818 | ext4_bh_unmapped)) { | ||
4798 | /* Wait so that we don't change page under IO */ | 4819 | /* Wait so that we don't change page under IO */ |
4799 | wait_on_page_writeback(page); | 4820 | wait_on_page_writeback(page); |
4800 | ret = VM_FAULT_LOCKED; | 4821 | ret = VM_FAULT_LOCKED; |
@@ -4815,7 +4836,7 @@ retry_alloc: | |||
4815 | } | 4836 | } |
4816 | ret = __block_page_mkwrite(vma, vmf, get_block); | 4837 | ret = __block_page_mkwrite(vma, vmf, get_block); |
4817 | if (!ret && ext4_should_journal_data(inode)) { | 4838 | if (!ret && ext4_should_journal_data(inode)) { |
4818 | if (walk_page_buffers(handle, page_buffers(page), 0, | 4839 | if (ext4_walk_page_buffers(handle, page_buffers(page), 0, |
4819 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 4840 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
4820 | unlock_page(page); | 4841 | unlock_page(page); |
4821 | ret = VM_FAULT_SIGBUS; | 4842 | ret = VM_FAULT_SIGBUS; |