diff options
Diffstat (limited to 'fs/ext4/inode.c')
| -rw-r--r-- | fs/ext4/inode.c | 496 | 
1 files changed, 339 insertions, 157 deletions
| diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e02..11119e07233b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include <linux/uio.h> | 38 | #include <linux/uio.h> | 
| 39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> | 
| 40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> | 
| 41 | #include <linux/kernel.h> | ||
| 41 | 42 | ||
| 42 | #include "ext4_jbd2.h" | 43 | #include "ext4_jbd2.h" | 
| 43 | #include "xattr.h" | 44 | #include "xattr.h" | 
| @@ -170,6 +171,9 @@ void ext4_delete_inode(struct inode *inode) | |||
| 170 | handle_t *handle; | 171 | handle_t *handle; | 
| 171 | int err; | 172 | int err; | 
| 172 | 173 | ||
| 174 | if (!is_bad_inode(inode)) | ||
| 175 | dquot_initialize(inode); | ||
| 176 | |||
| 173 | if (ext4_should_order_data(inode)) | 177 | if (ext4_should_order_data(inode)) | 
| 174 | ext4_begin_ordered_truncate(inode, 0); | 178 | ext4_begin_ordered_truncate(inode, 0); | 
| 175 | truncate_inode_pages(&inode->i_data, 0); | 179 | truncate_inode_pages(&inode->i_data, 0); | 
| @@ -194,7 +198,7 @@ void ext4_delete_inode(struct inode *inode) | |||
| 194 | inode->i_size = 0; | 198 | inode->i_size = 0; | 
| 195 | err = ext4_mark_inode_dirty(handle, inode); | 199 | err = ext4_mark_inode_dirty(handle, inode); | 
| 196 | if (err) { | 200 | if (err) { | 
| 197 | ext4_warning(inode->i_sb, __func__, | 201 | ext4_warning(inode->i_sb, | 
| 198 | "couldn't mark inode dirty (err %d)", err); | 202 | "couldn't mark inode dirty (err %d)", err); | 
| 199 | goto stop_handle; | 203 | goto stop_handle; | 
| 200 | } | 204 | } | 
| @@ -212,7 +216,7 @@ void ext4_delete_inode(struct inode *inode) | |||
| 212 | if (err > 0) | 216 | if (err > 0) | 
| 213 | err = ext4_journal_restart(handle, 3); | 217 | err = ext4_journal_restart(handle, 3); | 
| 214 | if (err != 0) { | 218 | if (err != 0) { | 
| 215 | ext4_warning(inode->i_sb, __func__, | 219 | ext4_warning(inode->i_sb, | 
| 216 | "couldn't extend journal (err %d)", err); | 220 | "couldn't extend journal (err %d)", err); | 
| 217 | stop_handle: | 221 | stop_handle: | 
| 218 | ext4_journal_stop(handle); | 222 | ext4_journal_stop(handle); | 
| @@ -323,8 +327,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
| 323 | offsets[n++] = i_block & (ptrs - 1); | 327 | offsets[n++] = i_block & (ptrs - 1); | 
| 324 | final = ptrs; | 328 | final = ptrs; | 
| 325 | } else { | 329 | } else { | 
| 326 | ext4_warning(inode->i_sb, "ext4_block_to_path", | 330 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", | 
| 327 | "block %lu > max in inode %lu", | ||
| 328 | i_block + direct_blocks + | 331 | i_block + direct_blocks + | 
| 329 | indirect_blocks + double_blocks, inode->i_ino); | 332 | indirect_blocks + double_blocks, inode->i_ino); | 
| 330 | } | 333 | } | 
| @@ -344,7 +347,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
| 344 | if (blk && | 347 | if (blk && | 
| 345 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 348 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 
| 346 | blk, 1))) { | 349 | blk, 1))) { | 
| 347 | ext4_error(inode->i_sb, function, | 350 | __ext4_error(inode->i_sb, function, | 
| 348 | "invalid block reference %u " | 351 | "invalid block reference %u " | 
| 349 | "in inode #%lu", blk, inode->i_ino); | 352 | "in inode #%lu", blk, inode->i_ino); | 
| 350 | return -EIO; | 353 | return -EIO; | 
| @@ -607,7 +610,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 607 | if (*err) | 610 | if (*err) | 
| 608 | goto failed_out; | 611 | goto failed_out; | 
| 609 | 612 | ||
| 610 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | 613 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { | 
| 614 | EXT4_ERROR_INODE(inode, | ||
| 615 | "current_block %llu + count %lu > %d!", | ||
| 616 | current_block, count, | ||
| 617 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
| 618 | *err = -EIO; | ||
| 619 | goto failed_out; | ||
| 620 | } | ||
| 611 | 621 | ||
| 612 | target -= count; | 622 | target -= count; | 
| 613 | /* allocate blocks for indirect blocks */ | 623 | /* allocate blocks for indirect blocks */ | 
| @@ -643,7 +653,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
| 643 | ar.flags = EXT4_MB_HINT_DATA; | 653 | ar.flags = EXT4_MB_HINT_DATA; | 
| 644 | 654 | ||
| 645 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 655 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 
| 646 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | 656 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { | 
| 657 | EXT4_ERROR_INODE(inode, | ||
| 658 | "current_block %llu + ar.len %d > %d!", | ||
| 659 | current_block, ar.len, | ||
| 660 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
| 661 | *err = -EIO; | ||
| 662 | goto failed_out; | ||
| 663 | } | ||
| 647 | 664 | ||
| 648 | if (*err && (target == blks)) { | 665 | if (*err && (target == blks)) { | 
| 649 | /* | 666 | /* | 
| @@ -1018,7 +1035,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, | |||
| 1018 | sector_t lblock) | 1035 | sector_t lblock) | 
| 1019 | { | 1036 | { | 
| 1020 | struct ext4_inode_info *ei = EXT4_I(inode); | 1037 | struct ext4_inode_info *ei = EXT4_I(inode); | 
| 1021 | int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; | 1038 | sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); | 
| 1022 | int blk_bits; | 1039 | int blk_bits; | 
| 1023 | 1040 | ||
| 1024 | if (lblock < EXT4_NDIR_BLOCKS) | 1041 | if (lblock < EXT4_NDIR_BLOCKS) | 
| @@ -1033,7 +1050,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, | |||
| 1033 | } | 1050 | } | 
| 1034 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | 1051 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | 
| 1035 | ei->i_da_metadata_calc_len = 1; | 1052 | ei->i_da_metadata_calc_len = 1; | 
| 1036 | blk_bits = roundup_pow_of_two(lblock + 1); | 1053 | blk_bits = order_base_2(lblock); | 
| 1037 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 1054 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 
| 1038 | } | 1055 | } | 
| 1039 | 1056 | ||
| @@ -1061,6 +1078,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 1061 | int mdb_free = 0, allocated_meta_blocks = 0; | 1078 | int mdb_free = 0, allocated_meta_blocks = 0; | 
| 1062 | 1079 | ||
| 1063 | spin_lock(&ei->i_block_reservation_lock); | 1080 | spin_lock(&ei->i_block_reservation_lock); | 
| 1081 | trace_ext4_da_update_reserve_space(inode, used); | ||
| 1064 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 1082 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 
| 1065 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 1083 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 
| 1066 | "with only %d reserved data blocks\n", | 1084 | "with only %d reserved data blocks\n", | 
| @@ -1093,9 +1111,9 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 1093 | 1111 | ||
| 1094 | /* Update quota subsystem */ | 1112 | /* Update quota subsystem */ | 
| 1095 | if (quota_claim) { | 1113 | if (quota_claim) { | 
| 1096 | vfs_dq_claim_block(inode, used); | 1114 | dquot_claim_block(inode, used); | 
| 1097 | if (mdb_free) | 1115 | if (mdb_free) | 
| 1098 | vfs_dq_release_reservation_block(inode, mdb_free); | 1116 | dquot_release_reservation_block(inode, mdb_free); | 
| 1099 | } else { | 1117 | } else { | 
| 1100 | /* | 1118 | /* | 
| 1101 | * We did fallocate with an offset that is already delayed | 1119 | * We did fallocate with an offset that is already delayed | 
| @@ -1106,8 +1124,8 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
| 1106 | * that | 1124 | * that | 
| 1107 | */ | 1125 | */ | 
| 1108 | if (allocated_meta_blocks) | 1126 | if (allocated_meta_blocks) | 
| 1109 | vfs_dq_claim_block(inode, allocated_meta_blocks); | 1127 | dquot_claim_block(inode, allocated_meta_blocks); | 
| 1110 | vfs_dq_release_reservation_block(inode, mdb_free + used); | 1128 | dquot_release_reservation_block(inode, mdb_free + used); | 
| 1111 | } | 1129 | } | 
| 1112 | 1130 | ||
| 1113 | /* | 1131 | /* | 
| @@ -1124,7 +1142,7 @@ static int check_block_validity(struct inode *inode, const char *msg, | |||
| 1124 | sector_t logical, sector_t phys, int len) | 1142 | sector_t logical, sector_t phys, int len) | 
| 1125 | { | 1143 | { | 
| 1126 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1144 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 
| 1127 | ext4_error(inode->i_sb, msg, | 1145 | __ext4_error(inode->i_sb, msg, | 
| 1128 | "inode #%lu logical block %llu mapped to %llu " | 1146 | "inode #%lu logical block %llu mapped to %llu " | 
| 1129 | "(size %d)", inode->i_ino, | 1147 | "(size %d)", inode->i_ino, | 
| 1130 | (unsigned long long) logical, | 1148 | (unsigned long long) logical, | 
| @@ -1306,7 +1324,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
| 1306 | * i_data's format changing. Force the migrate | 1324 | * i_data's format changing. Force the migrate | 
| 1307 | * to fail by clearing migrate flags | 1325 | * to fail by clearing migrate flags | 
| 1308 | */ | 1326 | */ | 
| 1309 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; | 1327 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); | 
| 1310 | } | 1328 | } | 
| 1311 | 1329 | ||
| 1312 | /* | 1330 | /* | 
| @@ -1534,6 +1552,8 @@ static void ext4_truncate_failed_write(struct inode *inode) | |||
| 1534 | ext4_truncate(inode); | 1552 | ext4_truncate(inode); | 
| 1535 | } | 1553 | } | 
| 1536 | 1554 | ||
| 1555 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | ||
| 1556 | struct buffer_head *bh_result, int create); | ||
| 1537 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 1557 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 
| 1538 | loff_t pos, unsigned len, unsigned flags, | 1558 | loff_t pos, unsigned len, unsigned flags, | 
| 1539 | struct page **pagep, void **fsdata) | 1559 | struct page **pagep, void **fsdata) | 
| @@ -1575,8 +1595,12 @@ retry: | |||
| 1575 | } | 1595 | } | 
| 1576 | *pagep = page; | 1596 | *pagep = page; | 
| 1577 | 1597 | ||
| 1578 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1598 | if (ext4_should_dioread_nolock(inode)) | 
| 1579 | ext4_get_block); | 1599 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 
| 1600 | fsdata, ext4_get_block_write); | ||
| 1601 | else | ||
| 1602 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | ||
| 1603 | fsdata, ext4_get_block); | ||
| 1580 | 1604 | ||
| 1581 | if (!ret && ext4_should_journal_data(inode)) { | 1605 | if (!ret && ext4_should_journal_data(inode)) { | 
| 1582 | ret = walk_page_buffers(handle, page_buffers(page), | 1606 | ret = walk_page_buffers(handle, page_buffers(page), | 
| @@ -1793,7 +1817,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1793 | new_i_size = pos + copied; | 1817 | new_i_size = pos + copied; | 
| 1794 | if (new_i_size > inode->i_size) | 1818 | if (new_i_size > inode->i_size) | 
| 1795 | i_size_write(inode, pos+copied); | 1819 | i_size_write(inode, pos+copied); | 
| 1796 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1820 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 
| 1797 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 1821 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 
| 1798 | ext4_update_i_disksize(inode, new_i_size); | 1822 | ext4_update_i_disksize(inode, new_i_size); | 
| 1799 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1823 | ret2 = ext4_mark_inode_dirty(handle, inode); | 
| @@ -1836,6 +1860,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
| 1836 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1860 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 
| 1837 | struct ext4_inode_info *ei = EXT4_I(inode); | 1861 | struct ext4_inode_info *ei = EXT4_I(inode); | 
| 1838 | unsigned long md_needed, md_reserved; | 1862 | unsigned long md_needed, md_reserved; | 
| 1863 | int ret; | ||
| 1839 | 1864 | ||
| 1840 | /* | 1865 | /* | 
| 1841 | * recalculate the amount of metadata blocks to reserve | 1866 | * recalculate the amount of metadata blocks to reserve | 
| @@ -1846,6 +1871,7 @@ repeat: | |||
| 1846 | spin_lock(&ei->i_block_reservation_lock); | 1871 | spin_lock(&ei->i_block_reservation_lock); | 
| 1847 | md_reserved = ei->i_reserved_meta_blocks; | 1872 | md_reserved = ei->i_reserved_meta_blocks; | 
| 1848 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1873 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 
| 1874 | trace_ext4_da_reserve_space(inode, md_needed); | ||
| 1849 | spin_unlock(&ei->i_block_reservation_lock); | 1875 | spin_unlock(&ei->i_block_reservation_lock); | 
| 1850 | 1876 | ||
| 1851 | /* | 1877 | /* | 
| @@ -1853,11 +1879,12 @@ repeat: | |||
| 1853 | * later. Real quota accounting is done at pages writeout | 1879 | * later. Real quota accounting is done at pages writeout | 
| 1854 | * time. | 1880 | * time. | 
| 1855 | */ | 1881 | */ | 
| 1856 | if (vfs_dq_reserve_block(inode, md_needed + 1)) | 1882 | ret = dquot_reserve_block(inode, md_needed + 1); | 
| 1857 | return -EDQUOT; | 1883 | if (ret) | 
| 1884 | return ret; | ||
| 1858 | 1885 | ||
| 1859 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1886 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 
| 1860 | vfs_dq_release_reservation_block(inode, md_needed + 1); | 1887 | dquot_release_reservation_block(inode, md_needed + 1); | 
| 1861 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1888 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 
| 1862 | yield(); | 1889 | yield(); | 
| 1863 | goto repeat; | 1890 | goto repeat; | 
| @@ -1914,7 +1941,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
| 1914 | 1941 | ||
| 1915 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1942 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 
| 1916 | 1943 | ||
| 1917 | vfs_dq_release_reservation_block(inode, to_free); | 1944 | dquot_release_reservation_block(inode, to_free); | 
| 1918 | } | 1945 | } | 
| 1919 | 1946 | ||
| 1920 | static void ext4_da_page_release_reservation(struct page *page, | 1947 | static void ext4_da_page_release_reservation(struct page *page, | 
| @@ -2091,6 +2118,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
| 2091 | } else if (buffer_mapped(bh)) | 2118 | } else if (buffer_mapped(bh)) | 
| 2092 | BUG_ON(bh->b_blocknr != pblock); | 2119 | BUG_ON(bh->b_blocknr != pblock); | 
| 2093 | 2120 | ||
| 2121 | if (buffer_uninit(exbh)) | ||
| 2122 | set_buffer_uninit(bh); | ||
| 2094 | cur_logical++; | 2123 | cur_logical++; | 
| 2095 | pblock++; | 2124 | pblock++; | 
| 2096 | } while ((bh = bh->b_this_page) != head); | 2125 | } while ((bh = bh->b_this_page) != head); | 
| @@ -2133,17 +2162,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
| 2133 | break; | 2162 | break; | 
| 2134 | for (i = 0; i < nr_pages; i++) { | 2163 | for (i = 0; i < nr_pages; i++) { | 
| 2135 | struct page *page = pvec.pages[i]; | 2164 | struct page *page = pvec.pages[i]; | 
| 2136 | index = page->index; | 2165 | if (page->index > end) | 
| 2137 | if (index > end) | ||
| 2138 | break; | 2166 | break; | 
| 2139 | index++; | ||
| 2140 | |||
| 2141 | BUG_ON(!PageLocked(page)); | 2167 | BUG_ON(!PageLocked(page)); | 
| 2142 | BUG_ON(PageWriteback(page)); | 2168 | BUG_ON(PageWriteback(page)); | 
| 2143 | block_invalidatepage(page, 0); | 2169 | block_invalidatepage(page, 0); | 
| 2144 | ClearPageUptodate(page); | 2170 | ClearPageUptodate(page); | 
| 2145 | unlock_page(page); | 2171 | unlock_page(page); | 
| 2146 | } | 2172 | } | 
| 2173 | index = pvec.pages[nr_pages - 1]->index + 1; | ||
| 2174 | pagevec_release(&pvec); | ||
| 2147 | } | 2175 | } | 
| 2148 | return; | 2176 | return; | 
| 2149 | } | 2177 | } | 
| @@ -2220,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 2220 | */ | 2248 | */ | 
| 2221 | new.b_state = 0; | 2249 | new.b_state = 0; | 
| 2222 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; | 2250 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; | 
| 2251 | if (ext4_should_dioread_nolock(mpd->inode)) | ||
| 2252 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
| 2223 | if (mpd->b_state & (1 << BH_Delay)) | 2253 | if (mpd->b_state & (1 << BH_Delay)) | 
| 2224 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 2254 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 
| 2225 | 2255 | ||
| @@ -2630,11 +2660,14 @@ static int __ext4_journalled_writepage(struct page *page, | |||
| 2630 | ret = err; | 2660 | ret = err; | 
| 2631 | 2661 | ||
| 2632 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 2662 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 
| 2633 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 2663 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 
| 2634 | out: | 2664 | out: | 
| 2635 | return ret; | 2665 | return ret; | 
| 2636 | } | 2666 | } | 
| 2637 | 2667 | ||
| 2668 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
| 2669 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
| 2670 | |||
| 2638 | /* | 2671 | /* | 
| 2639 | * Note that we don't need to start a transaction unless we're journaling data | 2672 | * Note that we don't need to start a transaction unless we're journaling data | 
| 2640 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2673 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 
| @@ -2682,7 +2715,7 @@ static int ext4_writepage(struct page *page, | |||
| 2682 | int ret = 0; | 2715 | int ret = 0; | 
| 2683 | loff_t size; | 2716 | loff_t size; | 
| 2684 | unsigned int len; | 2717 | unsigned int len; | 
| 2685 | struct buffer_head *page_bufs; | 2718 | struct buffer_head *page_bufs = NULL; | 
| 2686 | struct inode *inode = page->mapping->host; | 2719 | struct inode *inode = page->mapping->host; | 
| 2687 | 2720 | ||
| 2688 | trace_ext4_writepage(inode, page); | 2721 | trace_ext4_writepage(inode, page); | 
| @@ -2758,7 +2791,11 @@ static int ext4_writepage(struct page *page, | |||
| 2758 | 2791 | ||
| 2759 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2792 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 
| 2760 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | 2793 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | 
| 2761 | else | 2794 | else if (page_bufs && buffer_uninit(page_bufs)) { | 
| 2795 | ext4_set_bh_endio(page_bufs, inode); | ||
| 2796 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | ||
| 2797 | wbc, ext4_end_io_buffer_write); | ||
| 2798 | } else | ||
| 2762 | ret = block_write_full_page(page, noalloc_get_block_write, | 2799 | ret = block_write_full_page(page, noalloc_get_block_write, | 
| 2763 | wbc); | 2800 | wbc); | 
| 2764 | 2801 | ||
| @@ -3301,7 +3338,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 3301 | filemap_write_and_wait(mapping); | 3338 | filemap_write_and_wait(mapping); | 
| 3302 | } | 3339 | } | 
| 3303 | 3340 | ||
| 3304 | if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 3341 | if (EXT4_JOURNAL(inode) && | 
| 3342 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { | ||
| 3305 | /* | 3343 | /* | 
| 3306 | * This is a REALLY heavyweight approach, but the use of | 3344 | * This is a REALLY heavyweight approach, but the use of | 
| 3307 | * bmap on dirty files is expected to be extremely rare: | 3345 | * bmap on dirty files is expected to be extremely rare: | 
| @@ -3320,7 +3358,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 3320 | * everything they get. | 3358 | * everything they get. | 
| 3321 | */ | 3359 | */ | 
| 3322 | 3360 | ||
| 3323 | EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; | 3361 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); | 
| 3324 | journal = EXT4_JOURNAL(inode); | 3362 | journal = EXT4_JOURNAL(inode); | 
| 3325 | jbd2_journal_lock_updates(journal); | 3363 | jbd2_journal_lock_updates(journal); | 
| 3326 | err = jbd2_journal_flush(journal); | 3364 | err = jbd2_journal_flush(journal); | 
| @@ -3345,11 +3383,45 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
| 3345 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3383 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 
| 3346 | } | 3384 | } | 
| 3347 | 3385 | ||
| 3386 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
| 3387 | { | ||
| 3388 | BUG_ON(!io); | ||
| 3389 | if (io->page) | ||
| 3390 | put_page(io->page); | ||
| 3391 | iput(io->inode); | ||
| 3392 | kfree(io); | ||
| 3393 | } | ||
| 3394 | |||
| 3395 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | ||
| 3396 | { | ||
| 3397 | struct buffer_head *head, *bh; | ||
| 3398 | unsigned int curr_off = 0; | ||
| 3399 | |||
| 3400 | if (!page_has_buffers(page)) | ||
| 3401 | return; | ||
| 3402 | head = bh = page_buffers(page); | ||
| 3403 | do { | ||
| 3404 | if (offset <= curr_off && test_clear_buffer_uninit(bh) | ||
| 3405 | && bh->b_private) { | ||
| 3406 | ext4_free_io_end(bh->b_private); | ||
| 3407 | bh->b_private = NULL; | ||
| 3408 | bh->b_end_io = NULL; | ||
| 3409 | } | ||
| 3410 | curr_off = curr_off + bh->b_size; | ||
| 3411 | bh = bh->b_this_page; | ||
| 3412 | } while (bh != head); | ||
| 3413 | } | ||
| 3414 | |||
| 3348 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 3415 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 
| 3349 | { | 3416 | { | 
| 3350 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3417 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 
| 3351 | 3418 | ||
| 3352 | /* | 3419 | /* | 
| 3420 | * free any io_end structure allocated for buffers to be discarded | ||
| 3421 | */ | ||
| 3422 | if (ext4_should_dioread_nolock(page->mapping->host)) | ||
| 3423 | ext4_invalidatepage_free_endio(page, offset); | ||
| 3424 | /* | ||
| 3353 | * If it's a full truncate we just forget about the pending dirtying | 3425 | * If it's a full truncate we just forget about the pending dirtying | 
| 3354 | */ | 3426 | */ | 
| 3355 | if (offset == 0) | 3427 | if (offset == 0) | 
| @@ -3420,7 +3492,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
| 3420 | } | 3492 | } | 
| 3421 | 3493 | ||
| 3422 | retry: | 3494 | retry: | 
| 3423 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 3495 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 
| 3496 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | ||
| 3497 | inode->i_sb->s_bdev, iov, | ||
| 3498 | offset, nr_segs, | ||
| 3499 | ext4_get_block, NULL); | ||
| 3500 | else | ||
| 3501 | ret = blockdev_direct_IO(rw, iocb, inode, | ||
| 3502 | inode->i_sb->s_bdev, iov, | ||
| 3424 | offset, nr_segs, | 3503 | offset, nr_segs, | 
| 3425 | ext4_get_block, NULL); | 3504 | ext4_get_block, NULL); | 
| 3426 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3505 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 
| @@ -3436,6 +3515,9 @@ retry: | |||
| 3436 | * but cannot extend i_size. Bail out and pretend | 3515 | * but cannot extend i_size. Bail out and pretend | 
| 3437 | * the write failed... */ | 3516 | * the write failed... */ | 
| 3438 | ret = PTR_ERR(handle); | 3517 | ret = PTR_ERR(handle); | 
| 3518 | if (inode->i_nlink) | ||
| 3519 | ext4_orphan_del(NULL, inode); | ||
| 3520 | |||
| 3439 | goto out; | 3521 | goto out; | 
| 3440 | } | 3522 | } | 
| 3441 | if (inode->i_nlink) | 3523 | if (inode->i_nlink) | 
| @@ -3463,75 +3545,63 @@ out: | |||
| 3463 | return ret; | 3545 | return ret; | 
| 3464 | } | 3546 | } | 
| 3465 | 3547 | ||
| 3466 | static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, | 3548 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 
| 3467 | struct buffer_head *bh_result, int create) | 3549 | struct buffer_head *bh_result, int create) | 
| 3468 | { | 3550 | { | 
| 3469 | handle_t *handle = NULL; | 3551 | handle_t *handle = ext4_journal_current_handle(); | 
| 3470 | int ret = 0; | 3552 | int ret = 0; | 
| 3471 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 3553 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 
| 3472 | int dio_credits; | 3554 | int dio_credits; | 
| 3555 | int started = 0; | ||
| 3473 | 3556 | ||
| 3474 | ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", | 3557 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 
| 3475 | inode->i_ino, create); | 3558 | inode->i_ino, create); | 
| 3476 | /* | 3559 | /* | 
| 3477 | * DIO VFS code passes create = 0 flag for write to | 3560 | * ext4_get_block in prepare for a DIO write or buffer write. | 
| 3478 | * the middle of file. It does this to avoid block | 3561 | * We allocate an uinitialized extent if blocks haven't been allocated. | 
| 3479 | * allocation for holes, to prevent expose stale data | 3562 | * The extent will be converted to initialized after IO complete. | 
| 3480 | * out when there is parallel buffered read (which does | ||
| 3481 | * not hold the i_mutex lock) while direct IO write has | ||
| 3482 | * not completed. DIO request on holes finally falls back | ||
| 3483 | * to buffered IO for this reason. | ||
| 3484 | * | ||
| 3485 | * For ext4 extent based file, since we support fallocate, | ||
| 3486 | * new allocated extent as uninitialized, for holes, we | ||
| 3487 | * could fallocate blocks for holes, thus parallel | ||
| 3488 | * buffered IO read will zero out the page when read on | ||
| 3489 | * a hole while parallel DIO write to the hole has not completed. | ||
| 3490 | * | ||
| 3491 | * when we come here, we know it's a direct IO write to | ||
| 3492 | * to the middle of file (<i_size) | ||
| 3493 | * so it's safe to override the create flag from VFS. | ||
| 3494 | */ | 3563 | */ | 
| 3495 | create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; | 3564 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; | 
| 3496 | 3565 | ||
| 3497 | if (max_blocks > DIO_MAX_BLOCKS) | 3566 | if (!handle) { | 
| 3498 | max_blocks = DIO_MAX_BLOCKS; | 3567 | if (max_blocks > DIO_MAX_BLOCKS) | 
| 3499 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3568 | max_blocks = DIO_MAX_BLOCKS; | 
| 3500 | handle = ext4_journal_start(inode, dio_credits); | 3569 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 
| 3501 | if (IS_ERR(handle)) { | 3570 | handle = ext4_journal_start(inode, dio_credits); | 
| 3502 | ret = PTR_ERR(handle); | 3571 | if (IS_ERR(handle)) { | 
| 3503 | goto out; | 3572 | ret = PTR_ERR(handle); | 
| 3573 | goto out; | ||
| 3574 | } | ||
| 3575 | started = 1; | ||
| 3504 | } | 3576 | } | 
| 3577 | |||
| 3505 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 3578 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 
| 3506 | create); | 3579 | create); | 
| 3507 | if (ret > 0) { | 3580 | if (ret > 0) { | 
| 3508 | bh_result->b_size = (ret << inode->i_blkbits); | 3581 | bh_result->b_size = (ret << inode->i_blkbits); | 
| 3509 | ret = 0; | 3582 | ret = 0; | 
| 3510 | } | 3583 | } | 
| 3511 | ext4_journal_stop(handle); | 3584 | if (started) | 
| 3585 | ext4_journal_stop(handle); | ||
| 3512 | out: | 3586 | out: | 
| 3513 | return ret; | 3587 | return ret; | 
| 3514 | } | 3588 | } | 
| 3515 | 3589 | ||
| 3516 | static void ext4_free_io_end(ext4_io_end_t *io) | 3590 | static void dump_completed_IO(struct inode * inode) | 
| 3517 | { | ||
| 3518 | BUG_ON(!io); | ||
| 3519 | iput(io->inode); | ||
| 3520 | kfree(io); | ||
| 3521 | } | ||
| 3522 | static void dump_aio_dio_list(struct inode * inode) | ||
| 3523 | { | 3591 | { | 
| 3524 | #ifdef EXT4_DEBUG | 3592 | #ifdef EXT4_DEBUG | 
| 3525 | struct list_head *cur, *before, *after; | 3593 | struct list_head *cur, *before, *after; | 
| 3526 | ext4_io_end_t *io, *io0, *io1; | 3594 | ext4_io_end_t *io, *io0, *io1; | 
| 3595 | unsigned long flags; | ||
| 3527 | 3596 | ||
| 3528 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3597 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | 
| 3529 | ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); | 3598 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | 
| 3530 | return; | 3599 | return; | 
| 3531 | } | 3600 | } | 
| 3532 | 3601 | ||
| 3533 | ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); | 3602 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | 
| 3534 | list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ | 3603 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 
| 3604 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
| 3535 | cur = &io->list; | 3605 | cur = &io->list; | 
| 3536 | before = cur->prev; | 3606 | before = cur->prev; | 
| 3537 | io0 = container_of(before, ext4_io_end_t, list); | 3607 | io0 = container_of(before, ext4_io_end_t, list); | 
| @@ -3541,32 +3611,31 @@ static void dump_aio_dio_list(struct inode * inode) | |||
| 3541 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | 3611 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | 
| 3542 | io, inode->i_ino, io0, io1); | 3612 | io, inode->i_ino, io0, io1); | 
| 3543 | } | 3613 | } | 
| 3614 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
| 3544 | #endif | 3615 | #endif | 
| 3545 | } | 3616 | } | 
| 3546 | 3617 | ||
| 3547 | /* | 3618 | /* | 
| 3548 | * check a range of space and convert unwritten extents to written. | 3619 | * check a range of space and convert unwritten extents to written. | 
| 3549 | */ | 3620 | */ | 
| 3550 | static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | 3621 | static int ext4_end_io_nolock(ext4_io_end_t *io) | 
| 3551 | { | 3622 | { | 
| 3552 | struct inode *inode = io->inode; | 3623 | struct inode *inode = io->inode; | 
| 3553 | loff_t offset = io->offset; | 3624 | loff_t offset = io->offset; | 
| 3554 | size_t size = io->size; | 3625 | ssize_t size = io->size; | 
| 3555 | int ret = 0; | 3626 | int ret = 0; | 
| 3556 | 3627 | ||
| 3557 | ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," | 3628 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 
| 3558 | "list->prev 0x%p\n", | 3629 | "list->prev 0x%p\n", | 
| 3559 | io, inode->i_ino, io->list.next, io->list.prev); | 3630 | io, inode->i_ino, io->list.next, io->list.prev); | 
| 3560 | 3631 | ||
| 3561 | if (list_empty(&io->list)) | 3632 | if (list_empty(&io->list)) | 
| 3562 | return ret; | 3633 | return ret; | 
| 3563 | 3634 | ||
| 3564 | if (io->flag != DIO_AIO_UNWRITTEN) | 3635 | if (io->flag != EXT4_IO_UNWRITTEN) | 
| 3565 | return ret; | 3636 | return ret; | 
| 3566 | 3637 | ||
| 3567 | if (offset + size <= i_size_read(inode)) | 3638 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 
| 3568 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
| 3569 | |||
| 3570 | if (ret < 0) { | 3639 | if (ret < 0) { | 
| 3571 | printk(KERN_EMERG "%s: failed to convert unwritten" | 3640 | printk(KERN_EMERG "%s: failed to convert unwritten" | 
| 3572 | "extents to written extents, error is %d" | 3641 | "extents to written extents, error is %d" | 
| @@ -3579,50 +3648,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | |||
| 3579 | io->flag = 0; | 3648 | io->flag = 0; | 
| 3580 | return ret; | 3649 | return ret; | 
| 3581 | } | 3650 | } | 
| 3651 | |||
| 3582 | /* | 3652 | /* | 
| 3583 | * work on completed aio dio IO, to convert unwritten extents to extents | 3653 | * work on completed aio dio IO, to convert unwritten extents to extents | 
| 3584 | */ | 3654 | */ | 
| 3585 | static void ext4_end_aio_dio_work(struct work_struct *work) | 3655 | static void ext4_end_io_work(struct work_struct *work) | 
| 3586 | { | 3656 | { | 
| 3587 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 3657 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 
| 3588 | struct inode *inode = io->inode; | 3658 | struct inode *inode = io->inode; | 
| 3589 | int ret = 0; | 3659 | struct ext4_inode_info *ei = EXT4_I(inode); | 
| 3660 | unsigned long flags; | ||
| 3661 | int ret; | ||
| 3590 | 3662 | ||
| 3591 | mutex_lock(&inode->i_mutex); | 3663 | mutex_lock(&inode->i_mutex); | 
| 3592 | ret = ext4_end_aio_dio_nolock(io); | 3664 | ret = ext4_end_io_nolock(io); | 
| 3593 | if (ret >= 0) { | 3665 | if (ret < 0) { | 
| 3594 | if (!list_empty(&io->list)) | 3666 | mutex_unlock(&inode->i_mutex); | 
| 3595 | list_del_init(&io->list); | 3667 | return; | 
| 3596 | ext4_free_io_end(io); | ||
| 3597 | } | 3668 | } | 
| 3669 | |||
| 3670 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 3671 | if (!list_empty(&io->list)) | ||
| 3672 | list_del_init(&io->list); | ||
| 3673 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 3598 | mutex_unlock(&inode->i_mutex); | 3674 | mutex_unlock(&inode->i_mutex); | 
| 3675 | ext4_free_io_end(io); | ||
| 3599 | } | 3676 | } | 
| 3677 | |||
| 3600 | /* | 3678 | /* | 
| 3601 | * This function is called from ext4_sync_file(). | 3679 | * This function is called from ext4_sync_file(). | 
| 3602 | * | 3680 | * | 
| 3603 | * When AIO DIO IO is completed, the work to convert unwritten | 3681 | * When IO is completed, the work to convert unwritten extents to | 
| 3604 | * extents to written is queued on workqueue but may not get immediately | 3682 | * written is queued on workqueue but may not get immediately | 
| 3605 | * scheduled. When fsync is called, we need to ensure the | 3683 | * scheduled. When fsync is called, we need to ensure the | 
| 3606 | * conversion is complete before fsync returns. | 3684 | * conversion is complete before fsync returns. | 
| 3607 | * The inode keeps track of a list of completed AIO from DIO path | 3685 | * The inode keeps track of a list of pending/completed IO that | 
| 3608 | * that might needs to do the conversion. This function walks through | 3686 | * might needs to do the conversion. This function walks through | 
| 3609 | * the list and convert the related unwritten extents to written. | 3687 | * the list and convert the related unwritten extents for completed IO | 
| 3688 | * to written. | ||
| 3689 | * The function return the number of pending IOs on success. | ||
| 3610 | */ | 3690 | */ | 
| 3611 | int flush_aio_dio_completed_IO(struct inode *inode) | 3691 | int flush_completed_IO(struct inode *inode) | 
| 3612 | { | 3692 | { | 
| 3613 | ext4_io_end_t *io; | 3693 | ext4_io_end_t *io; | 
| 3694 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 3695 | unsigned long flags; | ||
| 3614 | int ret = 0; | 3696 | int ret = 0; | 
| 3615 | int ret2 = 0; | 3697 | int ret2 = 0; | 
| 3616 | 3698 | ||
| 3617 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) | 3699 | if (list_empty(&ei->i_completed_io_list)) | 
| 3618 | return ret; | 3700 | return ret; | 
| 3619 | 3701 | ||
| 3620 | dump_aio_dio_list(inode); | 3702 | dump_completed_IO(inode); | 
| 3621 | while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3703 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 
| 3622 | io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, | 3704 | while (!list_empty(&ei->i_completed_io_list)){ | 
| 3705 | io = list_entry(ei->i_completed_io_list.next, | ||
| 3623 | ext4_io_end_t, list); | 3706 | ext4_io_end_t, list); | 
| 3624 | /* | 3707 | /* | 
| 3625 | * Calling ext4_end_aio_dio_nolock() to convert completed | 3708 | * Calling ext4_end_io_nolock() to convert completed | 
| 3626 | * IO to written. | 3709 | * IO to written. | 
| 3627 | * | 3710 | * | 
| 3628 | * When ext4_sync_file() is called, run_queue() may already | 3711 | * When ext4_sync_file() is called, run_queue() may already | 
| @@ -3635,20 +3718,23 @@ int flush_aio_dio_completed_IO(struct inode *inode) | |||
| 3635 | * avoid double converting from both fsync and background work | 3718 | * avoid double converting from both fsync and background work | 
| 3636 | * queue work. | 3719 | * queue work. | 
| 3637 | */ | 3720 | */ | 
| 3638 | ret = ext4_end_aio_dio_nolock(io); | 3721 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 
| 3722 | ret = ext4_end_io_nolock(io); | ||
| 3723 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
| 3639 | if (ret < 0) | 3724 | if (ret < 0) | 
| 3640 | ret2 = ret; | 3725 | ret2 = ret; | 
| 3641 | else | 3726 | else | 
| 3642 | list_del_init(&io->list); | 3727 | list_del_init(&io->list); | 
| 3643 | } | 3728 | } | 
| 3729 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 3644 | return (ret2 < 0) ? ret2 : 0; | 3730 | return (ret2 < 0) ? ret2 : 0; | 
| 3645 | } | 3731 | } | 
| 3646 | 3732 | ||
| 3647 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | 3733 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | 
| 3648 | { | 3734 | { | 
| 3649 | ext4_io_end_t *io = NULL; | 3735 | ext4_io_end_t *io = NULL; | 
| 3650 | 3736 | ||
| 3651 | io = kmalloc(sizeof(*io), GFP_NOFS); | 3737 | io = kmalloc(sizeof(*io), flags); | 
| 3652 | 3738 | ||
| 3653 | if (io) { | 3739 | if (io) { | 
| 3654 | igrab(inode); | 3740 | igrab(inode); | 
| @@ -3656,8 +3742,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | |||
| 3656 | io->flag = 0; | 3742 | io->flag = 0; | 
| 3657 | io->offset = 0; | 3743 | io->offset = 0; | 
| 3658 | io->size = 0; | 3744 | io->size = 0; | 
| 3659 | io->error = 0; | 3745 | io->page = NULL; | 
| 3660 | INIT_WORK(&io->work, ext4_end_aio_dio_work); | 3746 | INIT_WORK(&io->work, ext4_end_io_work); | 
| 3661 | INIT_LIST_HEAD(&io->list); | 3747 | INIT_LIST_HEAD(&io->list); | 
| 3662 | } | 3748 | } | 
| 3663 | 3749 | ||
| @@ -3669,6 +3755,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3669 | { | 3755 | { | 
| 3670 | ext4_io_end_t *io_end = iocb->private; | 3756 | ext4_io_end_t *io_end = iocb->private; | 
| 3671 | struct workqueue_struct *wq; | 3757 | struct workqueue_struct *wq; | 
| 3758 | unsigned long flags; | ||
| 3759 | struct ext4_inode_info *ei; | ||
| 3672 | 3760 | ||
| 3673 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3761 | /* if not async direct IO or dio with 0 bytes write, just return */ | 
| 3674 | if (!io_end || !size) | 3762 | if (!io_end || !size) | 
| @@ -3680,7 +3768,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3680 | size); | 3768 | size); | 
| 3681 | 3769 | ||
| 3682 | /* if not aio dio with unwritten extents, just free io and return */ | 3770 | /* if not aio dio with unwritten extents, just free io and return */ | 
| 3683 | if (io_end->flag != DIO_AIO_UNWRITTEN){ | 3771 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 
| 3684 | ext4_free_io_end(io_end); | 3772 | ext4_free_io_end(io_end); | 
| 3685 | iocb->private = NULL; | 3773 | iocb->private = NULL; | 
| 3686 | return; | 3774 | return; | 
| @@ -3688,16 +3776,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3688 | 3776 | ||
| 3689 | io_end->offset = offset; | 3777 | io_end->offset = offset; | 
| 3690 | io_end->size = size; | 3778 | io_end->size = size; | 
| 3779 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
| 3691 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3780 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 
| 3692 | 3781 | ||
| 3693 | /* queue the work to convert unwritten extents to written */ | 3782 | /* queue the work to convert unwritten extents to written */ | 
| 3694 | queue_work(wq, &io_end->work); | 3783 | queue_work(wq, &io_end->work); | 
| 3695 | 3784 | ||
| 3696 | /* Add the io_end to per-inode completed aio dio list*/ | 3785 | /* Add the io_end to per-inode completed aio dio list*/ | 
| 3697 | list_add_tail(&io_end->list, | 3786 | ei = EXT4_I(io_end->inode); | 
| 3698 | &EXT4_I(io_end->inode)->i_aio_dio_complete_list); | 3787 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 
| 3788 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
| 3789 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
| 3699 | iocb->private = NULL; | 3790 | iocb->private = NULL; | 
| 3700 | } | 3791 | } | 
| 3792 | |||
| 3793 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | ||
| 3794 | { | ||
| 3795 | ext4_io_end_t *io_end = bh->b_private; | ||
| 3796 | struct workqueue_struct *wq; | ||
| 3797 | struct inode *inode; | ||
| 3798 | unsigned long flags; | ||
| 3799 | |||
| 3800 | if (!test_clear_buffer_uninit(bh) || !io_end) | ||
| 3801 | goto out; | ||
| 3802 | |||
| 3803 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | ||
| 3804 | printk("sb umounted, discard end_io request for inode %lu\n", | ||
| 3805 | io_end->inode->i_ino); | ||
| 3806 | ext4_free_io_end(io_end); | ||
| 3807 | goto out; | ||
| 3808 | } | ||
| 3809 | |||
| 3810 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
| 3811 | inode = io_end->inode; | ||
| 3812 | |||
| 3813 | /* Add the io_end to per-inode completed io list*/ | ||
| 3814 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
| 3815 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
| 3816 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
| 3817 | |||
| 3818 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
| 3819 | /* queue the work to convert unwritten extents to written */ | ||
| 3820 | queue_work(wq, &io_end->work); | ||
| 3821 | out: | ||
| 3822 | bh->b_private = NULL; | ||
| 3823 | bh->b_end_io = NULL; | ||
| 3824 | clear_buffer_uninit(bh); | ||
| 3825 | end_buffer_async_write(bh, uptodate); | ||
| 3826 | } | ||
| 3827 | |||
| 3828 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | ||
| 3829 | { | ||
| 3830 | ext4_io_end_t *io_end; | ||
| 3831 | struct page *page = bh->b_page; | ||
| 3832 | loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; | ||
| 3833 | size_t size = bh->b_size; | ||
| 3834 | |||
| 3835 | retry: | ||
| 3836 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | ||
| 3837 | if (!io_end) { | ||
| 3838 | if (printk_ratelimit()) | ||
| 3839 | printk(KERN_WARNING "%s: allocation fail\n", __func__); | ||
| 3840 | schedule(); | ||
| 3841 | goto retry; | ||
| 3842 | } | ||
| 3843 | io_end->offset = offset; | ||
| 3844 | io_end->size = size; | ||
| 3845 | /* | ||
| 3846 | * We need to hold a reference to the page to make sure it | ||
| 3847 | * doesn't get evicted before ext4_end_io_work() has a chance | ||
| 3848 | * to convert the extent from written to unwritten. | ||
| 3849 | */ | ||
| 3850 | io_end->page = page; | ||
| 3851 | get_page(io_end->page); | ||
| 3852 | |||
| 3853 | bh->b_private = io_end; | ||
| 3854 | bh->b_end_io = ext4_end_io_buffer_write; | ||
| 3855 | return 0; | ||
| 3856 | } | ||
| 3857 | |||
| 3701 | /* | 3858 | /* | 
| 3702 | * For ext4 extent files, ext4 will do direct-io write to holes, | 3859 | * For ext4 extent files, ext4 will do direct-io write to holes, | 
| 3703 | * preallocated extents, and those write extend the file, no need to | 3860 | * preallocated extents, and those write extend the file, no need to | 
| @@ -3751,7 +3908,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3751 | iocb->private = NULL; | 3908 | iocb->private = NULL; | 
| 3752 | EXT4_I(inode)->cur_aio_dio = NULL; | 3909 | EXT4_I(inode)->cur_aio_dio = NULL; | 
| 3753 | if (!is_sync_kiocb(iocb)) { | 3910 | if (!is_sync_kiocb(iocb)) { | 
| 3754 | iocb->private = ext4_init_io_end(inode); | 3911 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); | 
| 3755 | if (!iocb->private) | 3912 | if (!iocb->private) | 
| 3756 | return -ENOMEM; | 3913 | return -ENOMEM; | 
| 3757 | /* | 3914 | /* | 
| @@ -3767,7 +3924,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3767 | ret = blockdev_direct_IO(rw, iocb, inode, | 3924 | ret = blockdev_direct_IO(rw, iocb, inode, | 
| 3768 | inode->i_sb->s_bdev, iov, | 3925 | inode->i_sb->s_bdev, iov, | 
| 3769 | offset, nr_segs, | 3926 | offset, nr_segs, | 
| 3770 | ext4_get_block_dio_write, | 3927 | ext4_get_block_write, | 
| 3771 | ext4_end_io_dio); | 3928 | ext4_end_io_dio); | 
| 3772 | if (iocb->private) | 3929 | if (iocb->private) | 
| 3773 | EXT4_I(inode)->cur_aio_dio = NULL; | 3930 | EXT4_I(inode)->cur_aio_dio = NULL; | 
| @@ -3788,8 +3945,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3788 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3945 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 
| 3789 | ext4_free_io_end(iocb->private); | 3946 | ext4_free_io_end(iocb->private); | 
| 3790 | iocb->private = NULL; | 3947 | iocb->private = NULL; | 
| 3791 | } else if (ret > 0 && (EXT4_I(inode)->i_state & | 3948 | } else if (ret > 0 && ext4_test_inode_state(inode, | 
| 3792 | EXT4_STATE_DIO_UNWRITTEN)) { | 3949 | EXT4_STATE_DIO_UNWRITTEN)) { | 
| 3793 | int err; | 3950 | int err; | 
| 3794 | /* | 3951 | /* | 
| 3795 | * for non AIO case, since the IO is already | 3952 | * for non AIO case, since the IO is already | 
| @@ -3799,7 +3956,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3799 | offset, ret); | 3956 | offset, ret); | 
| 3800 | if (err < 0) | 3957 | if (err < 0) | 
| 3801 | ret = err; | 3958 | ret = err; | 
| 3802 | EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; | 3959 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 
| 3803 | } | 3960 | } | 
| 3804 | return ret; | 3961 | return ret; | 
| 3805 | } | 3962 | } | 
| @@ -4130,18 +4287,27 @@ no_top: | |||
| 4130 | * We release `count' blocks on disk, but (last - first) may be greater | 4287 | * We release `count' blocks on disk, but (last - first) may be greater | 
| 4131 | * than `count' because there can be holes in there. | 4288 | * than `count' because there can be holes in there. | 
| 4132 | */ | 4289 | */ | 
| 4133 | static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4290 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 
| 4134 | struct buffer_head *bh, | 4291 | struct buffer_head *bh, | 
| 4135 | ext4_fsblk_t block_to_free, | 4292 | ext4_fsblk_t block_to_free, | 
| 4136 | unsigned long count, __le32 *first, | 4293 | unsigned long count, __le32 *first, | 
| 4137 | __le32 *last) | 4294 | __le32 *last) | 
| 4138 | { | 4295 | { | 
| 4139 | __le32 *p; | 4296 | __le32 *p; | 
| 4140 | int flags = EXT4_FREE_BLOCKS_FORGET; | 4297 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 
| 4141 | 4298 | ||
| 4142 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 4299 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 
| 4143 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4300 | flags |= EXT4_FREE_BLOCKS_METADATA; | 
| 4144 | 4301 | ||
| 4302 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
| 4303 | count)) { | ||
| 4304 | ext4_error(inode->i_sb, "inode #%lu: " | ||
| 4305 | "attempt to clear blocks %llu len %lu, invalid", | ||
| 4306 | inode->i_ino, (unsigned long long) block_to_free, | ||
| 4307 | count); | ||
| 4308 | return 1; | ||
| 4309 | } | ||
| 4310 | |||
| 4145 | if (try_to_extend_transaction(handle, inode)) { | 4311 | if (try_to_extend_transaction(handle, inode)) { | 
| 4146 | if (bh) { | 4312 | if (bh) { | 
| 4147 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4313 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 
| @@ -4160,6 +4326,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
| 4160 | *p = 0; | 4326 | *p = 0; | 
| 4161 | 4327 | ||
| 4162 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4328 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 
| 4329 | return 0; | ||
| 4163 | } | 4330 | } | 
| 4164 | 4331 | ||
| 4165 | /** | 4332 | /** | 
| @@ -4215,9 +4382,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
| 4215 | } else if (nr == block_to_free + count) { | 4382 | } else if (nr == block_to_free + count) { | 
| 4216 | count++; | 4383 | count++; | 
| 4217 | } else { | 4384 | } else { | 
| 4218 | ext4_clear_blocks(handle, inode, this_bh, | 4385 | if (ext4_clear_blocks(handle, inode, this_bh, | 
| 4219 | block_to_free, | 4386 | block_to_free, count, | 
| 4220 | count, block_to_free_p, p); | 4387 | block_to_free_p, p)) | 
| 4388 | break; | ||
| 4221 | block_to_free = nr; | 4389 | block_to_free = nr; | 
| 4222 | block_to_free_p = p; | 4390 | block_to_free_p = p; | 
| 4223 | count = 1; | 4391 | count = 1; | 
| @@ -4241,7 +4409,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
| 4241 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4409 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 
| 4242 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4410 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 
| 4243 | else | 4411 | else | 
| 4244 | ext4_error(inode->i_sb, __func__, | 4412 | ext4_error(inode->i_sb, | 
| 4245 | "circular indirect block detected, " | 4413 | "circular indirect block detected, " | 
| 4246 | "inode=%lu, block=%llu", | 4414 | "inode=%lu, block=%llu", | 
| 4247 | inode->i_ino, | 4415 | inode->i_ino, | 
| @@ -4281,6 +4449,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 4281 | if (!nr) | 4449 | if (!nr) | 
| 4282 | continue; /* A hole */ | 4450 | continue; /* A hole */ | 
| 4283 | 4451 | ||
| 4452 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
| 4453 | nr, 1)) { | ||
| 4454 | ext4_error(inode->i_sb, | ||
| 4455 | "indirect mapped block in inode " | ||
| 4456 | "#%lu invalid (level %d, blk #%lu)", | ||
| 4457 | inode->i_ino, depth, | ||
| 4458 | (unsigned long) nr); | ||
| 4459 | break; | ||
| 4460 | } | ||
| 4461 | |||
| 4284 | /* Go read the buffer for the next level down */ | 4462 | /* Go read the buffer for the next level down */ | 
| 4285 | bh = sb_bread(inode->i_sb, nr); | 4463 | bh = sb_bread(inode->i_sb, nr); | 
| 4286 | 4464 | ||
| @@ -4289,7 +4467,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 4289 | * (should be rare). | 4467 | * (should be rare). | 
| 4290 | */ | 4468 | */ | 
| 4291 | if (!bh) { | 4469 | if (!bh) { | 
| 4292 | ext4_error(inode->i_sb, "ext4_free_branches", | 4470 | ext4_error(inode->i_sb, | 
| 4293 | "Read failure, inode=%lu, block=%llu", | 4471 | "Read failure, inode=%lu, block=%llu", | 
| 4294 | inode->i_ino, nr); | 4472 | inode->i_ino, nr); | 
| 4295 | continue; | 4473 | continue; | 
| @@ -4433,8 +4611,10 @@ void ext4_truncate(struct inode *inode) | |||
| 4433 | if (!ext4_can_truncate(inode)) | 4611 | if (!ext4_can_truncate(inode)) | 
| 4434 | return; | 4612 | return; | 
| 4435 | 4613 | ||
| 4614 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | ||
| 4615 | |||
| 4436 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4616 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 
| 4437 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 4617 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 
| 4438 | 4618 | ||
| 4439 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4619 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 
| 4440 | ext4_ext_truncate(inode); | 4620 | ext4_ext_truncate(inode); | 
| @@ -4604,9 +4784,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
| 4604 | 4784 | ||
| 4605 | bh = sb_getblk(sb, block); | 4785 | bh = sb_getblk(sb, block); | 
| 4606 | if (!bh) { | 4786 | if (!bh) { | 
| 4607 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " | 4787 | ext4_error(sb, "unable to read inode block - " | 
| 4608 | "inode block - inode=%lu, block=%llu", | 4788 | "inode=%lu, block=%llu", inode->i_ino, block); | 
| 4609 | inode->i_ino, block); | ||
| 4610 | return -EIO; | 4789 | return -EIO; | 
| 4611 | } | 4790 | } | 
| 4612 | if (!buffer_uptodate(bh)) { | 4791 | if (!buffer_uptodate(bh)) { | 
| @@ -4704,9 +4883,8 @@ make_io: | |||
| 4704 | submit_bh(READ_META, bh); | 4883 | submit_bh(READ_META, bh); | 
| 4705 | wait_on_buffer(bh); | 4884 | wait_on_buffer(bh); | 
| 4706 | if (!buffer_uptodate(bh)) { | 4885 | if (!buffer_uptodate(bh)) { | 
| 4707 | ext4_error(sb, __func__, | 4886 | ext4_error(sb, "unable to read inode block - inode=%lu," | 
| 4708 | "unable to read inode block - inode=%lu, " | 4887 | " block=%llu", inode->i_ino, block); | 
| 4709 | "block=%llu", inode->i_ino, block); | ||
| 4710 | brelse(bh); | 4888 | brelse(bh); | 
| 4711 | return -EIO; | 4889 | return -EIO; | 
| 4712 | } | 4890 | } | 
| @@ -4720,7 +4898,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) | |||
| 4720 | { | 4898 | { | 
| 4721 | /* We have all inode data except xattrs in memory here. */ | 4899 | /* We have all inode data except xattrs in memory here. */ | 
| 4722 | return __ext4_get_inode_loc(inode, iloc, | 4900 | return __ext4_get_inode_loc(inode, iloc, | 
| 4723 | !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); | 4901 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); | 
| 4724 | } | 4902 | } | 
| 4725 | 4903 | ||
| 4726 | void ext4_set_inode_flags(struct inode *inode) | 4904 | void ext4_set_inode_flags(struct inode *inode) | 
| @@ -4814,7 +4992,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 4814 | } | 4992 | } | 
| 4815 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 4993 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 
| 4816 | 4994 | ||
| 4817 | ei->i_state = 0; | 4995 | ei->i_state_flags = 0; | 
| 4818 | ei->i_dir_start_lookup = 0; | 4996 | ei->i_dir_start_lookup = 0; | 
| 4819 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 4997 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 
| 4820 | /* We now have enough fields to check if the inode was active or not. | 4998 | /* We now have enough fields to check if the inode was active or not. | 
| @@ -4897,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 4897 | EXT4_GOOD_OLD_INODE_SIZE + | 5075 | EXT4_GOOD_OLD_INODE_SIZE + | 
| 4898 | ei->i_extra_isize; | 5076 | ei->i_extra_isize; | 
| 4899 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 5077 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 
| 4900 | ei->i_state |= EXT4_STATE_XATTR; | 5078 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | 
| 4901 | } | 5079 | } | 
| 4902 | } else | 5080 | } else | 
| 4903 | ei->i_extra_isize = 0; | 5081 | ei->i_extra_isize = 0; | 
| @@ -4917,8 +5095,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 4917 | ret = 0; | 5095 | ret = 0; | 
| 4918 | if (ei->i_file_acl && | 5096 | if (ei->i_file_acl && | 
| 4919 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 5097 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 
| 4920 | ext4_error(sb, __func__, | 5098 | ext4_error(sb, "bad extended attribute block %llu inode #%lu", | 
| 4921 | "bad extended attribute block %llu in inode #%lu", | ||
| 4922 | ei->i_file_acl, inode->i_ino); | 5099 | ei->i_file_acl, inode->i_ino); | 
| 4923 | ret = -EIO; | 5100 | ret = -EIO; | 
| 4924 | goto bad_inode; | 5101 | goto bad_inode; | 
| @@ -4964,8 +5141,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 4964 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5141 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 
| 4965 | } else { | 5142 | } else { | 
| 4966 | ret = -EIO; | 5143 | ret = -EIO; | 
| 4967 | ext4_error(inode->i_sb, __func__, | 5144 | ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", | 
| 4968 | "bogus i_mode (%o) for inode=%lu", | ||
| 4969 | inode->i_mode, inode->i_ino); | 5145 | inode->i_mode, inode->i_ino); | 
| 4970 | goto bad_inode; | 5146 | goto bad_inode; | 
| 4971 | } | 5147 | } | 
| @@ -5037,7 +5213,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 5037 | 5213 | ||
| 5038 | /* For fields not not tracking in the in-memory inode, | 5214 | /* For fields not not tracking in the in-memory inode, | 
| 5039 | * initialise them to zero for new inodes. */ | 5215 | * initialise them to zero for new inodes. */ | 
| 5040 | if (ei->i_state & EXT4_STATE_NEW) | 5216 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) | 
| 5041 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 5217 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 
| 5042 | 5218 | ||
| 5043 | ext4_get_inode_flags(ei); | 5219 | ext4_get_inode_flags(ei); | 
| @@ -5101,7 +5277,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 5101 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 5277 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 
| 5102 | sb->s_dirt = 1; | 5278 | sb->s_dirt = 1; | 
| 5103 | ext4_handle_sync(handle); | 5279 | ext4_handle_sync(handle); | 
| 5104 | err = ext4_handle_dirty_metadata(handle, inode, | 5280 | err = ext4_handle_dirty_metadata(handle, NULL, | 
| 5105 | EXT4_SB(sb)->s_sbh); | 5281 | EXT4_SB(sb)->s_sbh); | 
| 5106 | } | 5282 | } | 
| 5107 | } | 5283 | } | 
| @@ -5130,10 +5306,10 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 5130 | } | 5306 | } | 
| 5131 | 5307 | ||
| 5132 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 5308 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 
| 5133 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 5309 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); | 
| 5134 | if (!err) | 5310 | if (!err) | 
| 5135 | err = rc; | 5311 | err = rc; | 
| 5136 | ei->i_state &= ~EXT4_STATE_NEW; | 5312 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); | 
| 5137 | 5313 | ||
| 5138 | ext4_update_inode_fsync_trans(handle, inode, 0); | 5314 | ext4_update_inode_fsync_trans(handle, inode, 0); | 
| 5139 | out_brelse: | 5315 | out_brelse: | 
| @@ -5177,7 +5353,7 @@ out_brelse: | |||
| 5177 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 5353 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 
| 5178 | * will no longer be on the superblock's dirty inode list. | 5354 | * will no longer be on the superblock's dirty inode list. | 
| 5179 | */ | 5355 | */ | 
| 5180 | int ext4_write_inode(struct inode *inode, int wait) | 5356 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | 
| 5181 | { | 5357 | { | 
| 5182 | int err; | 5358 | int err; | 
| 5183 | 5359 | ||
| @@ -5191,7 +5367,7 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
| 5191 | return -EIO; | 5367 | return -EIO; | 
| 5192 | } | 5368 | } | 
| 5193 | 5369 | ||
| 5194 | if (!wait) | 5370 | if (wbc->sync_mode != WB_SYNC_ALL) | 
| 5195 | return 0; | 5371 | return 0; | 
| 5196 | 5372 | ||
| 5197 | err = ext4_force_commit(inode->i_sb); | 5373 | err = ext4_force_commit(inode->i_sb); | 
| @@ -5201,13 +5377,11 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
| 5201 | err = ext4_get_inode_loc(inode, &iloc); | 5377 | err = ext4_get_inode_loc(inode, &iloc); | 
| 5202 | if (err) | 5378 | if (err) | 
| 5203 | return err; | 5379 | return err; | 
| 5204 | if (wait) | 5380 | if (wbc->sync_mode == WB_SYNC_ALL) | 
| 5205 | sync_dirty_buffer(iloc.bh); | 5381 | sync_dirty_buffer(iloc.bh); | 
| 5206 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5382 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 
| 5207 | ext4_error(inode->i_sb, __func__, | 5383 | ext4_error(inode->i_sb, "IO error syncing inode, " | 
| 5208 | "IO error syncing inode, " | 5384 | "inode=%lu, block=%llu", inode->i_ino, | 
| 5209 | "inode=%lu, block=%llu", | ||
| 5210 | inode->i_ino, | ||
| 5211 | (unsigned long long)iloc.bh->b_blocknr); | 5385 | (unsigned long long)iloc.bh->b_blocknr); | 
| 5212 | err = -EIO; | 5386 | err = -EIO; | 
| 5213 | } | 5387 | } | 
| @@ -5249,6 +5423,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5249 | if (error) | 5423 | if (error) | 
| 5250 | return error; | 5424 | return error; | 
| 5251 | 5425 | ||
| 5426 | if (ia_valid & ATTR_SIZE) | ||
| 5427 | dquot_initialize(inode); | ||
| 5252 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 5428 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 
| 5253 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 5429 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 
| 5254 | handle_t *handle; | 5430 | handle_t *handle; | 
| @@ -5261,7 +5437,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5261 | error = PTR_ERR(handle); | 5437 | error = PTR_ERR(handle); | 
| 5262 | goto err_out; | 5438 | goto err_out; | 
| 5263 | } | 5439 | } | 
| 5264 | error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; | 5440 | error = dquot_transfer(inode, attr); | 
| 5265 | if (error) { | 5441 | if (error) { | 
| 5266 | ext4_journal_stop(handle); | 5442 | ext4_journal_stop(handle); | 
| 5267 | return error; | 5443 | return error; | 
| @@ -5288,7 +5464,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5288 | } | 5464 | } | 
| 5289 | 5465 | ||
| 5290 | if (S_ISREG(inode->i_mode) && | 5466 | if (S_ISREG(inode->i_mode) && | 
| 5291 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { | 5467 | attr->ia_valid & ATTR_SIZE && | 
| 5468 | (attr->ia_size < inode->i_size || | ||
| 5469 | (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { | ||
| 5292 | handle_t *handle; | 5470 | handle_t *handle; | 
| 5293 | 5471 | ||
| 5294 | handle = ext4_journal_start(inode, 3); | 5472 | handle = ext4_journal_start(inode, 3); | 
| @@ -5319,6 +5497,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 5319 | goto err_out; | 5497 | goto err_out; | 
| 5320 | } | 5498 | } | 
| 5321 | } | 5499 | } | 
| 5500 | /* ext4_truncate will clear the flag */ | ||
| 5501 | if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) | ||
| 5502 | ext4_truncate(inode); | ||
| 5322 | } | 5503 | } | 
| 5323 | 5504 | ||
| 5324 | rc = inode_setattr(inode, attr); | 5505 | rc = inode_setattr(inode, attr); | 
| @@ -5557,8 +5738,8 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
| 5557 | entry = IFIRST(header); | 5738 | entry = IFIRST(header); | 
| 5558 | 5739 | ||
| 5559 | /* No extended attributes present */ | 5740 | /* No extended attributes present */ | 
| 5560 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || | 5741 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || | 
| 5561 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { | 5742 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { | 
| 5562 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, | 5743 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, | 
| 5563 | new_extra_isize); | 5744 | new_extra_isize); | 
| 5564 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 5745 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 
| @@ -5602,7 +5783,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
| 5602 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5783 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 
| 5603 | if (ext4_handle_valid(handle) && | 5784 | if (ext4_handle_valid(handle) && | 
| 5604 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5785 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 
| 5605 | !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { | 5786 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { | 
| 5606 | /* | 5787 | /* | 
| 5607 | * We need extra buffer credits since we may write into EA block | 5788 | * We need extra buffer credits since we may write into EA block | 
| 5608 | * with this same handle. If journal_extend fails, then it will | 5789 | * with this same handle. If journal_extend fails, then it will | 
| @@ -5616,10 +5797,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
| 5616 | sbi->s_want_extra_isize, | 5797 | sbi->s_want_extra_isize, | 
| 5617 | iloc, handle); | 5798 | iloc, handle); | 
| 5618 | if (ret) { | 5799 | if (ret) { | 
| 5619 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | 5800 | ext4_set_inode_state(inode, | 
| 5801 | EXT4_STATE_NO_EXPAND); | ||
| 5620 | if (mnt_count != | 5802 | if (mnt_count != | 
| 5621 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 5803 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 
| 5622 | ext4_warning(inode->i_sb, __func__, | 5804 | ext4_warning(inode->i_sb, | 
| 5623 | "Unable to expand inode %lu. Delete" | 5805 | "Unable to expand inode %lu. Delete" | 
| 5624 | " some EAs or run e2fsck.", | 5806 | " some EAs or run e2fsck.", | 
| 5625 | inode->i_ino); | 5807 | inode->i_ino); | 
| @@ -5641,7 +5823,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
| 5641 | * i_size has been changed by generic_commit_write() and we thus need | 5823 | * i_size has been changed by generic_commit_write() and we thus need | 
| 5642 | * to include the updated inode in the current transaction. | 5824 | * to include the updated inode in the current transaction. | 
| 5643 | * | 5825 | * | 
| 5644 | * Also, vfs_dq_alloc_block() will always dirty the inode when blocks | 5826 | * Also, dquot_alloc_block() will always dirty the inode when blocks | 
| 5645 | * are allocated to the file. | 5827 | * are allocated to the file. | 
| 5646 | * | 5828 | * | 
| 5647 | * If the inode is marked synchronous, we don't honour that here - doing | 5829 | * If the inode is marked synchronous, we don't honour that here - doing | 
| @@ -5683,7 +5865,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode) | |||
| 5683 | err = jbd2_journal_get_write_access(handle, iloc.bh); | 5865 | err = jbd2_journal_get_write_access(handle, iloc.bh); | 
| 5684 | if (!err) | 5866 | if (!err) | 
| 5685 | err = ext4_handle_dirty_metadata(handle, | 5867 | err = ext4_handle_dirty_metadata(handle, | 
| 5686 | inode, | 5868 | NULL, | 
| 5687 | iloc.bh); | 5869 | iloc.bh); | 
| 5688 | brelse(iloc.bh); | 5870 | brelse(iloc.bh); | 
| 5689 | } | 5871 | } | 
