diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 492 |
1 files changed, 337 insertions, 155 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e02..986120f30066 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/uio.h> | 38 | #include <linux/uio.h> |
39 | #include <linux/bio.h> | 39 | #include <linux/bio.h> |
40 | #include <linux/workqueue.h> | 40 | #include <linux/workqueue.h> |
41 | #include <linux/kernel.h> | ||
41 | 42 | ||
42 | #include "ext4_jbd2.h" | 43 | #include "ext4_jbd2.h" |
43 | #include "xattr.h" | 44 | #include "xattr.h" |
@@ -170,6 +171,9 @@ void ext4_delete_inode(struct inode *inode) | |||
170 | handle_t *handle; | 171 | handle_t *handle; |
171 | int err; | 172 | int err; |
172 | 173 | ||
174 | if (!is_bad_inode(inode)) | ||
175 | dquot_initialize(inode); | ||
176 | |||
173 | if (ext4_should_order_data(inode)) | 177 | if (ext4_should_order_data(inode)) |
174 | ext4_begin_ordered_truncate(inode, 0); | 178 | ext4_begin_ordered_truncate(inode, 0); |
175 | truncate_inode_pages(&inode->i_data, 0); | 179 | truncate_inode_pages(&inode->i_data, 0); |
@@ -194,7 +198,7 @@ void ext4_delete_inode(struct inode *inode) | |||
194 | inode->i_size = 0; | 198 | inode->i_size = 0; |
195 | err = ext4_mark_inode_dirty(handle, inode); | 199 | err = ext4_mark_inode_dirty(handle, inode); |
196 | if (err) { | 200 | if (err) { |
197 | ext4_warning(inode->i_sb, __func__, | 201 | ext4_warning(inode->i_sb, |
198 | "couldn't mark inode dirty (err %d)", err); | 202 | "couldn't mark inode dirty (err %d)", err); |
199 | goto stop_handle; | 203 | goto stop_handle; |
200 | } | 204 | } |
@@ -212,7 +216,7 @@ void ext4_delete_inode(struct inode *inode) | |||
212 | if (err > 0) | 216 | if (err > 0) |
213 | err = ext4_journal_restart(handle, 3); | 217 | err = ext4_journal_restart(handle, 3); |
214 | if (err != 0) { | 218 | if (err != 0) { |
215 | ext4_warning(inode->i_sb, __func__, | 219 | ext4_warning(inode->i_sb, |
216 | "couldn't extend journal (err %d)", err); | 220 | "couldn't extend journal (err %d)", err); |
217 | stop_handle: | 221 | stop_handle: |
218 | ext4_journal_stop(handle); | 222 | ext4_journal_stop(handle); |
@@ -323,8 +327,7 @@ static int ext4_block_to_path(struct inode *inode, | |||
323 | offsets[n++] = i_block & (ptrs - 1); | 327 | offsets[n++] = i_block & (ptrs - 1); |
324 | final = ptrs; | 328 | final = ptrs; |
325 | } else { | 329 | } else { |
326 | ext4_warning(inode->i_sb, "ext4_block_to_path", | 330 | ext4_warning(inode->i_sb, "block %lu > max in inode %lu", |
327 | "block %lu > max in inode %lu", | ||
328 | i_block + direct_blocks + | 331 | i_block + direct_blocks + |
329 | indirect_blocks + double_blocks, inode->i_ino); | 332 | indirect_blocks + double_blocks, inode->i_ino); |
330 | } | 333 | } |
@@ -344,7 +347,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
344 | if (blk && | 347 | if (blk && |
345 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 348 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
346 | blk, 1))) { | 349 | blk, 1))) { |
347 | ext4_error(inode->i_sb, function, | 350 | __ext4_error(inode->i_sb, function, |
348 | "invalid block reference %u " | 351 | "invalid block reference %u " |
349 | "in inode #%lu", blk, inode->i_ino); | 352 | "in inode #%lu", blk, inode->i_ino); |
350 | return -EIO; | 353 | return -EIO; |
@@ -607,7 +610,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
607 | if (*err) | 610 | if (*err) |
608 | goto failed_out; | 611 | goto failed_out; |
609 | 612 | ||
610 | BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); | 613 | if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { |
614 | EXT4_ERROR_INODE(inode, | ||
615 | "current_block %llu + count %lu > %d!", | ||
616 | current_block, count, | ||
617 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
618 | *err = -EIO; | ||
619 | goto failed_out; | ||
620 | } | ||
611 | 621 | ||
612 | target -= count; | 622 | target -= count; |
613 | /* allocate blocks for indirect blocks */ | 623 | /* allocate blocks for indirect blocks */ |
@@ -643,7 +653,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
643 | ar.flags = EXT4_MB_HINT_DATA; | 653 | ar.flags = EXT4_MB_HINT_DATA; |
644 | 654 | ||
645 | current_block = ext4_mb_new_blocks(handle, &ar, err); | 655 | current_block = ext4_mb_new_blocks(handle, &ar, err); |
646 | BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); | 656 | if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { |
657 | EXT4_ERROR_INODE(inode, | ||
658 | "current_block %llu + ar.len %d > %d!", | ||
659 | current_block, ar.len, | ||
660 | EXT4_MAX_BLOCK_FILE_PHYS); | ||
661 | *err = -EIO; | ||
662 | goto failed_out; | ||
663 | } | ||
647 | 664 | ||
648 | if (*err && (target == blks)) { | 665 | if (*err && (target == blks)) { |
649 | /* | 666 | /* |
@@ -1061,6 +1078,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1061 | int mdb_free = 0, allocated_meta_blocks = 0; | 1078 | int mdb_free = 0, allocated_meta_blocks = 0; |
1062 | 1079 | ||
1063 | spin_lock(&ei->i_block_reservation_lock); | 1080 | spin_lock(&ei->i_block_reservation_lock); |
1081 | trace_ext4_da_update_reserve_space(inode, used); | ||
1064 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 1082 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
1065 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 1083 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
1066 | "with only %d reserved data blocks\n", | 1084 | "with only %d reserved data blocks\n", |
@@ -1093,9 +1111,9 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1093 | 1111 | ||
1094 | /* Update quota subsystem */ | 1112 | /* Update quota subsystem */ |
1095 | if (quota_claim) { | 1113 | if (quota_claim) { |
1096 | vfs_dq_claim_block(inode, used); | 1114 | dquot_claim_block(inode, used); |
1097 | if (mdb_free) | 1115 | if (mdb_free) |
1098 | vfs_dq_release_reservation_block(inode, mdb_free); | 1116 | dquot_release_reservation_block(inode, mdb_free); |
1099 | } else { | 1117 | } else { |
1100 | /* | 1118 | /* |
1101 | * We did fallocate with an offset that is already delayed | 1119 | * We did fallocate with an offset that is already delayed |
@@ -1106,8 +1124,8 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1106 | * that | 1124 | * that |
1107 | */ | 1125 | */ |
1108 | if (allocated_meta_blocks) | 1126 | if (allocated_meta_blocks) |
1109 | vfs_dq_claim_block(inode, allocated_meta_blocks); | 1127 | dquot_claim_block(inode, allocated_meta_blocks); |
1110 | vfs_dq_release_reservation_block(inode, mdb_free + used); | 1128 | dquot_release_reservation_block(inode, mdb_free + used); |
1111 | } | 1129 | } |
1112 | 1130 | ||
1113 | /* | 1131 | /* |
@@ -1124,7 +1142,7 @@ static int check_block_validity(struct inode *inode, const char *msg, | |||
1124 | sector_t logical, sector_t phys, int len) | 1142 | sector_t logical, sector_t phys, int len) |
1125 | { | 1143 | { |
1126 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1144 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { |
1127 | ext4_error(inode->i_sb, msg, | 1145 | __ext4_error(inode->i_sb, msg, |
1128 | "inode #%lu logical block %llu mapped to %llu " | 1146 | "inode #%lu logical block %llu mapped to %llu " |
1129 | "(size %d)", inode->i_ino, | 1147 | "(size %d)", inode->i_ino, |
1130 | (unsigned long long) logical, | 1148 | (unsigned long long) logical, |
@@ -1306,7 +1324,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1306 | * i_data's format changing. Force the migrate | 1324 | * i_data's format changing. Force the migrate |
1307 | * to fail by clearing migrate flags | 1325 | * to fail by clearing migrate flags |
1308 | */ | 1326 | */ |
1309 | EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; | 1327 | ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); |
1310 | } | 1328 | } |
1311 | 1329 | ||
1312 | /* | 1330 | /* |
@@ -1534,6 +1552,8 @@ static void ext4_truncate_failed_write(struct inode *inode) | |||
1534 | ext4_truncate(inode); | 1552 | ext4_truncate(inode); |
1535 | } | 1553 | } |
1536 | 1554 | ||
1555 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | ||
1556 | struct buffer_head *bh_result, int create); | ||
1537 | static int ext4_write_begin(struct file *file, struct address_space *mapping, | 1557 | static int ext4_write_begin(struct file *file, struct address_space *mapping, |
1538 | loff_t pos, unsigned len, unsigned flags, | 1558 | loff_t pos, unsigned len, unsigned flags, |
1539 | struct page **pagep, void **fsdata) | 1559 | struct page **pagep, void **fsdata) |
@@ -1575,8 +1595,12 @@ retry: | |||
1575 | } | 1595 | } |
1576 | *pagep = page; | 1596 | *pagep = page; |
1577 | 1597 | ||
1578 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1598 | if (ext4_should_dioread_nolock(inode)) |
1579 | ext4_get_block); | 1599 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, |
1600 | fsdata, ext4_get_block_write); | ||
1601 | else | ||
1602 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | ||
1603 | fsdata, ext4_get_block); | ||
1580 | 1604 | ||
1581 | if (!ret && ext4_should_journal_data(inode)) { | 1605 | if (!ret && ext4_should_journal_data(inode)) { |
1582 | ret = walk_page_buffers(handle, page_buffers(page), | 1606 | ret = walk_page_buffers(handle, page_buffers(page), |
@@ -1793,7 +1817,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
1793 | new_i_size = pos + copied; | 1817 | new_i_size = pos + copied; |
1794 | if (new_i_size > inode->i_size) | 1818 | if (new_i_size > inode->i_size) |
1795 | i_size_write(inode, pos+copied); | 1819 | i_size_write(inode, pos+copied); |
1796 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1820 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
1797 | if (new_i_size > EXT4_I(inode)->i_disksize) { | 1821 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
1798 | ext4_update_i_disksize(inode, new_i_size); | 1822 | ext4_update_i_disksize(inode, new_i_size); |
1799 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1823 | ret2 = ext4_mark_inode_dirty(handle, inode); |
@@ -1836,6 +1860,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
1836 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1860 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1837 | struct ext4_inode_info *ei = EXT4_I(inode); | 1861 | struct ext4_inode_info *ei = EXT4_I(inode); |
1838 | unsigned long md_needed, md_reserved; | 1862 | unsigned long md_needed, md_reserved; |
1863 | int ret; | ||
1839 | 1864 | ||
1840 | /* | 1865 | /* |
1841 | * recalculate the amount of metadata blocks to reserve | 1866 | * recalculate the amount of metadata blocks to reserve |
@@ -1846,6 +1871,7 @@ repeat: | |||
1846 | spin_lock(&ei->i_block_reservation_lock); | 1871 | spin_lock(&ei->i_block_reservation_lock); |
1847 | md_reserved = ei->i_reserved_meta_blocks; | 1872 | md_reserved = ei->i_reserved_meta_blocks; |
1848 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1873 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1874 | trace_ext4_da_reserve_space(inode, md_needed); | ||
1849 | spin_unlock(&ei->i_block_reservation_lock); | 1875 | spin_unlock(&ei->i_block_reservation_lock); |
1850 | 1876 | ||
1851 | /* | 1877 | /* |
@@ -1853,11 +1879,12 @@ repeat: | |||
1853 | * later. Real quota accounting is done at pages writeout | 1879 | * later. Real quota accounting is done at pages writeout |
1854 | * time. | 1880 | * time. |
1855 | */ | 1881 | */ |
1856 | if (vfs_dq_reserve_block(inode, md_needed + 1)) | 1882 | ret = dquot_reserve_block(inode, md_needed + 1); |
1857 | return -EDQUOT; | 1883 | if (ret) |
1884 | return ret; | ||
1858 | 1885 | ||
1859 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1886 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1860 | vfs_dq_release_reservation_block(inode, md_needed + 1); | 1887 | dquot_release_reservation_block(inode, md_needed + 1); |
1861 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1888 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1862 | yield(); | 1889 | yield(); |
1863 | goto repeat; | 1890 | goto repeat; |
@@ -1914,7 +1941,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1914 | 1941 | ||
1915 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1942 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1916 | 1943 | ||
1917 | vfs_dq_release_reservation_block(inode, to_free); | 1944 | dquot_release_reservation_block(inode, to_free); |
1918 | } | 1945 | } |
1919 | 1946 | ||
1920 | static void ext4_da_page_release_reservation(struct page *page, | 1947 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -2091,6 +2118,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2091 | } else if (buffer_mapped(bh)) | 2118 | } else if (buffer_mapped(bh)) |
2092 | BUG_ON(bh->b_blocknr != pblock); | 2119 | BUG_ON(bh->b_blocknr != pblock); |
2093 | 2120 | ||
2121 | if (buffer_uninit(exbh)) | ||
2122 | set_buffer_uninit(bh); | ||
2094 | cur_logical++; | 2123 | cur_logical++; |
2095 | pblock++; | 2124 | pblock++; |
2096 | } while ((bh = bh->b_this_page) != head); | 2125 | } while ((bh = bh->b_this_page) != head); |
@@ -2133,17 +2162,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2133 | break; | 2162 | break; |
2134 | for (i = 0; i < nr_pages; i++) { | 2163 | for (i = 0; i < nr_pages; i++) { |
2135 | struct page *page = pvec.pages[i]; | 2164 | struct page *page = pvec.pages[i]; |
2136 | index = page->index; | 2165 | if (page->index > end) |
2137 | if (index > end) | ||
2138 | break; | 2166 | break; |
2139 | index++; | ||
2140 | |||
2141 | BUG_ON(!PageLocked(page)); | 2167 | BUG_ON(!PageLocked(page)); |
2142 | BUG_ON(PageWriteback(page)); | 2168 | BUG_ON(PageWriteback(page)); |
2143 | block_invalidatepage(page, 0); | 2169 | block_invalidatepage(page, 0); |
2144 | ClearPageUptodate(page); | 2170 | ClearPageUptodate(page); |
2145 | unlock_page(page); | 2171 | unlock_page(page); |
2146 | } | 2172 | } |
2173 | index = pvec.pages[nr_pages - 1]->index + 1; | ||
2174 | pagevec_release(&pvec); | ||
2147 | } | 2175 | } |
2148 | return; | 2176 | return; |
2149 | } | 2177 | } |
@@ -2220,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2220 | */ | 2248 | */ |
2221 | new.b_state = 0; | 2249 | new.b_state = 0; |
2222 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; | 2250 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; |
2251 | if (ext4_should_dioread_nolock(mpd->inode)) | ||
2252 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
2223 | if (mpd->b_state & (1 << BH_Delay)) | 2253 | if (mpd->b_state & (1 << BH_Delay)) |
2224 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 2254 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2225 | 2255 | ||
@@ -2630,11 +2660,14 @@ static int __ext4_journalled_writepage(struct page *page, | |||
2630 | ret = err; | 2660 | ret = err; |
2631 | 2661 | ||
2632 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | 2662 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); |
2633 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 2663 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |
2634 | out: | 2664 | out: |
2635 | return ret; | 2665 | return ret; |
2636 | } | 2666 | } |
2637 | 2667 | ||
2668 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | ||
2669 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | ||
2670 | |||
2638 | /* | 2671 | /* |
2639 | * Note that we don't need to start a transaction unless we're journaling data | 2672 | * Note that we don't need to start a transaction unless we're journaling data |
2640 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2673 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
@@ -2682,7 +2715,7 @@ static int ext4_writepage(struct page *page, | |||
2682 | int ret = 0; | 2715 | int ret = 0; |
2683 | loff_t size; | 2716 | loff_t size; |
2684 | unsigned int len; | 2717 | unsigned int len; |
2685 | struct buffer_head *page_bufs; | 2718 | struct buffer_head *page_bufs = NULL; |
2686 | struct inode *inode = page->mapping->host; | 2719 | struct inode *inode = page->mapping->host; |
2687 | 2720 | ||
2688 | trace_ext4_writepage(inode, page); | 2721 | trace_ext4_writepage(inode, page); |
@@ -2758,7 +2791,11 @@ static int ext4_writepage(struct page *page, | |||
2758 | 2791 | ||
2759 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2792 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
2760 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | 2793 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); |
2761 | else | 2794 | else if (page_bufs && buffer_uninit(page_bufs)) { |
2795 | ext4_set_bh_endio(page_bufs, inode); | ||
2796 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | ||
2797 | wbc, ext4_end_io_buffer_write); | ||
2798 | } else | ||
2762 | ret = block_write_full_page(page, noalloc_get_block_write, | 2799 | ret = block_write_full_page(page, noalloc_get_block_write, |
2763 | wbc); | 2800 | wbc); |
2764 | 2801 | ||
@@ -3301,7 +3338,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3301 | filemap_write_and_wait(mapping); | 3338 | filemap_write_and_wait(mapping); |
3302 | } | 3339 | } |
3303 | 3340 | ||
3304 | if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { | 3341 | if (EXT4_JOURNAL(inode) && |
3342 | ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { | ||
3305 | /* | 3343 | /* |
3306 | * This is a REALLY heavyweight approach, but the use of | 3344 | * This is a REALLY heavyweight approach, but the use of |
3307 | * bmap on dirty files is expected to be extremely rare: | 3345 | * bmap on dirty files is expected to be extremely rare: |
@@ -3320,7 +3358,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3320 | * everything they get. | 3358 | * everything they get. |
3321 | */ | 3359 | */ |
3322 | 3360 | ||
3323 | EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; | 3361 | ext4_clear_inode_state(inode, EXT4_STATE_JDATA); |
3324 | journal = EXT4_JOURNAL(inode); | 3362 | journal = EXT4_JOURNAL(inode); |
3325 | jbd2_journal_lock_updates(journal); | 3363 | jbd2_journal_lock_updates(journal); |
3326 | err = jbd2_journal_flush(journal); | 3364 | err = jbd2_journal_flush(journal); |
@@ -3345,11 +3383,45 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3345 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3383 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3346 | } | 3384 | } |
3347 | 3385 | ||
3386 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3387 | { | ||
3388 | BUG_ON(!io); | ||
3389 | if (io->page) | ||
3390 | put_page(io->page); | ||
3391 | iput(io->inode); | ||
3392 | kfree(io); | ||
3393 | } | ||
3394 | |||
3395 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | ||
3396 | { | ||
3397 | struct buffer_head *head, *bh; | ||
3398 | unsigned int curr_off = 0; | ||
3399 | |||
3400 | if (!page_has_buffers(page)) | ||
3401 | return; | ||
3402 | head = bh = page_buffers(page); | ||
3403 | do { | ||
3404 | if (offset <= curr_off && test_clear_buffer_uninit(bh) | ||
3405 | && bh->b_private) { | ||
3406 | ext4_free_io_end(bh->b_private); | ||
3407 | bh->b_private = NULL; | ||
3408 | bh->b_end_io = NULL; | ||
3409 | } | ||
3410 | curr_off = curr_off + bh->b_size; | ||
3411 | bh = bh->b_this_page; | ||
3412 | } while (bh != head); | ||
3413 | } | ||
3414 | |||
3348 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 3415 | static void ext4_invalidatepage(struct page *page, unsigned long offset) |
3349 | { | 3416 | { |
3350 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3417 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3351 | 3418 | ||
3352 | /* | 3419 | /* |
3420 | * free any io_end structure allocated for buffers to be discarded | ||
3421 | */ | ||
3422 | if (ext4_should_dioread_nolock(page->mapping->host)) | ||
3423 | ext4_invalidatepage_free_endio(page, offset); | ||
3424 | /* | ||
3353 | * If it's a full truncate we just forget about the pending dirtying | 3425 | * If it's a full truncate we just forget about the pending dirtying |
3354 | */ | 3426 | */ |
3355 | if (offset == 0) | 3427 | if (offset == 0) |
@@ -3420,7 +3492,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
3420 | } | 3492 | } |
3421 | 3493 | ||
3422 | retry: | 3494 | retry: |
3423 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 3495 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
3496 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | ||
3497 | inode->i_sb->s_bdev, iov, | ||
3498 | offset, nr_segs, | ||
3499 | ext4_get_block, NULL); | ||
3500 | else | ||
3501 | ret = blockdev_direct_IO(rw, iocb, inode, | ||
3502 | inode->i_sb->s_bdev, iov, | ||
3424 | offset, nr_segs, | 3503 | offset, nr_segs, |
3425 | ext4_get_block, NULL); | 3504 | ext4_get_block, NULL); |
3426 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3505 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3436,6 +3515,9 @@ retry: | |||
3436 | * but cannot extend i_size. Bail out and pretend | 3515 | * but cannot extend i_size. Bail out and pretend |
3437 | * the write failed... */ | 3516 | * the write failed... */ |
3438 | ret = PTR_ERR(handle); | 3517 | ret = PTR_ERR(handle); |
3518 | if (inode->i_nlink) | ||
3519 | ext4_orphan_del(NULL, inode); | ||
3520 | |||
3439 | goto out; | 3521 | goto out; |
3440 | } | 3522 | } |
3441 | if (inode->i_nlink) | 3523 | if (inode->i_nlink) |
@@ -3463,75 +3545,63 @@ out: | |||
3463 | return ret; | 3545 | return ret; |
3464 | } | 3546 | } |
3465 | 3547 | ||
3466 | static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, | 3548 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3467 | struct buffer_head *bh_result, int create) | 3549 | struct buffer_head *bh_result, int create) |
3468 | { | 3550 | { |
3469 | handle_t *handle = NULL; | 3551 | handle_t *handle = ext4_journal_current_handle(); |
3470 | int ret = 0; | 3552 | int ret = 0; |
3471 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 3553 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
3472 | int dio_credits; | 3554 | int dio_credits; |
3555 | int started = 0; | ||
3473 | 3556 | ||
3474 | ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", | 3557 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3475 | inode->i_ino, create); | 3558 | inode->i_ino, create); |
3476 | /* | 3559 | /* |
3477 | * DIO VFS code passes create = 0 flag for write to | 3560 | * ext4_get_block in prepare for a DIO write or buffer write. |
3478 | * the middle of file. It does this to avoid block | 3561 | * We allocate an uinitialized extent if blocks haven't been allocated. |
3479 | * allocation for holes, to prevent expose stale data | 3562 | * The extent will be converted to initialized after IO complete. |
3480 | * out when there is parallel buffered read (which does | ||
3481 | * not hold the i_mutex lock) while direct IO write has | ||
3482 | * not completed. DIO request on holes finally falls back | ||
3483 | * to buffered IO for this reason. | ||
3484 | * | ||
3485 | * For ext4 extent based file, since we support fallocate, | ||
3486 | * new allocated extent as uninitialized, for holes, we | ||
3487 | * could fallocate blocks for holes, thus parallel | ||
3488 | * buffered IO read will zero out the page when read on | ||
3489 | * a hole while parallel DIO write to the hole has not completed. | ||
3490 | * | ||
3491 | * when we come here, we know it's a direct IO write to | ||
3492 | * to the middle of file (<i_size) | ||
3493 | * so it's safe to override the create flag from VFS. | ||
3494 | */ | 3563 | */ |
3495 | create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; | 3564 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; |
3496 | 3565 | ||
3497 | if (max_blocks > DIO_MAX_BLOCKS) | 3566 | if (!handle) { |
3498 | max_blocks = DIO_MAX_BLOCKS; | 3567 | if (max_blocks > DIO_MAX_BLOCKS) |
3499 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3568 | max_blocks = DIO_MAX_BLOCKS; |
3500 | handle = ext4_journal_start(inode, dio_credits); | 3569 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
3501 | if (IS_ERR(handle)) { | 3570 | handle = ext4_journal_start(inode, dio_credits); |
3502 | ret = PTR_ERR(handle); | 3571 | if (IS_ERR(handle)) { |
3503 | goto out; | 3572 | ret = PTR_ERR(handle); |
3573 | goto out; | ||
3574 | } | ||
3575 | started = 1; | ||
3504 | } | 3576 | } |
3577 | |||
3505 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 3578 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, |
3506 | create); | 3579 | create); |
3507 | if (ret > 0) { | 3580 | if (ret > 0) { |
3508 | bh_result->b_size = (ret << inode->i_blkbits); | 3581 | bh_result->b_size = (ret << inode->i_blkbits); |
3509 | ret = 0; | 3582 | ret = 0; |
3510 | } | 3583 | } |
3511 | ext4_journal_stop(handle); | 3584 | if (started) |
3585 | ext4_journal_stop(handle); | ||
3512 | out: | 3586 | out: |
3513 | return ret; | 3587 | return ret; |
3514 | } | 3588 | } |
3515 | 3589 | ||
3516 | static void ext4_free_io_end(ext4_io_end_t *io) | 3590 | static void dump_completed_IO(struct inode * inode) |
3517 | { | ||
3518 | BUG_ON(!io); | ||
3519 | iput(io->inode); | ||
3520 | kfree(io); | ||
3521 | } | ||
3522 | static void dump_aio_dio_list(struct inode * inode) | ||
3523 | { | 3591 | { |
3524 | #ifdef EXT4_DEBUG | 3592 | #ifdef EXT4_DEBUG |
3525 | struct list_head *cur, *before, *after; | 3593 | struct list_head *cur, *before, *after; |
3526 | ext4_io_end_t *io, *io0, *io1; | 3594 | ext4_io_end_t *io, *io0, *io1; |
3595 | unsigned long flags; | ||
3527 | 3596 | ||
3528 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3597 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ |
3529 | ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); | 3598 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); |
3530 | return; | 3599 | return; |
3531 | } | 3600 | } |
3532 | 3601 | ||
3533 | ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); | 3602 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); |
3534 | list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ | 3603 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
3604 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
3535 | cur = &io->list; | 3605 | cur = &io->list; |
3536 | before = cur->prev; | 3606 | before = cur->prev; |
3537 | io0 = container_of(before, ext4_io_end_t, list); | 3607 | io0 = container_of(before, ext4_io_end_t, list); |
@@ -3541,32 +3611,31 @@ static void dump_aio_dio_list(struct inode * inode) | |||
3541 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | 3611 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", |
3542 | io, inode->i_ino, io0, io1); | 3612 | io, inode->i_ino, io0, io1); |
3543 | } | 3613 | } |
3614 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3544 | #endif | 3615 | #endif |
3545 | } | 3616 | } |
3546 | 3617 | ||
3547 | /* | 3618 | /* |
3548 | * check a range of space and convert unwritten extents to written. | 3619 | * check a range of space and convert unwritten extents to written. |
3549 | */ | 3620 | */ |
3550 | static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | 3621 | static int ext4_end_io_nolock(ext4_io_end_t *io) |
3551 | { | 3622 | { |
3552 | struct inode *inode = io->inode; | 3623 | struct inode *inode = io->inode; |
3553 | loff_t offset = io->offset; | 3624 | loff_t offset = io->offset; |
3554 | size_t size = io->size; | 3625 | ssize_t size = io->size; |
3555 | int ret = 0; | 3626 | int ret = 0; |
3556 | 3627 | ||
3557 | ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," | 3628 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
3558 | "list->prev 0x%p\n", | 3629 | "list->prev 0x%p\n", |
3559 | io, inode->i_ino, io->list.next, io->list.prev); | 3630 | io, inode->i_ino, io->list.next, io->list.prev); |
3560 | 3631 | ||
3561 | if (list_empty(&io->list)) | 3632 | if (list_empty(&io->list)) |
3562 | return ret; | 3633 | return ret; |
3563 | 3634 | ||
3564 | if (io->flag != DIO_AIO_UNWRITTEN) | 3635 | if (io->flag != EXT4_IO_UNWRITTEN) |
3565 | return ret; | 3636 | return ret; |
3566 | 3637 | ||
3567 | if (offset + size <= i_size_read(inode)) | 3638 | ret = ext4_convert_unwritten_extents(inode, offset, size); |
3568 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3569 | |||
3570 | if (ret < 0) { | 3639 | if (ret < 0) { |
3571 | printk(KERN_EMERG "%s: failed to convert unwritten" | 3640 | printk(KERN_EMERG "%s: failed to convert unwritten" |
3572 | "extents to written extents, error is %d" | 3641 | "extents to written extents, error is %d" |
@@ -3579,50 +3648,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) | |||
3579 | io->flag = 0; | 3648 | io->flag = 0; |
3580 | return ret; | 3649 | return ret; |
3581 | } | 3650 | } |
3651 | |||
3582 | /* | 3652 | /* |
3583 | * work on completed aio dio IO, to convert unwritten extents to extents | 3653 | * work on completed aio dio IO, to convert unwritten extents to extents |
3584 | */ | 3654 | */ |
3585 | static void ext4_end_aio_dio_work(struct work_struct *work) | 3655 | static void ext4_end_io_work(struct work_struct *work) |
3586 | { | 3656 | { |
3587 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | 3657 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); |
3588 | struct inode *inode = io->inode; | 3658 | struct inode *inode = io->inode; |
3589 | int ret = 0; | 3659 | struct ext4_inode_info *ei = EXT4_I(inode); |
3660 | unsigned long flags; | ||
3661 | int ret; | ||
3590 | 3662 | ||
3591 | mutex_lock(&inode->i_mutex); | 3663 | mutex_lock(&inode->i_mutex); |
3592 | ret = ext4_end_aio_dio_nolock(io); | 3664 | ret = ext4_end_io_nolock(io); |
3593 | if (ret >= 0) { | 3665 | if (ret < 0) { |
3594 | if (!list_empty(&io->list)) | 3666 | mutex_unlock(&inode->i_mutex); |
3595 | list_del_init(&io->list); | 3667 | return; |
3596 | ext4_free_io_end(io); | ||
3597 | } | 3668 | } |
3669 | |||
3670 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3671 | if (!list_empty(&io->list)) | ||
3672 | list_del_init(&io->list); | ||
3673 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3598 | mutex_unlock(&inode->i_mutex); | 3674 | mutex_unlock(&inode->i_mutex); |
3675 | ext4_free_io_end(io); | ||
3599 | } | 3676 | } |
3677 | |||
3600 | /* | 3678 | /* |
3601 | * This function is called from ext4_sync_file(). | 3679 | * This function is called from ext4_sync_file(). |
3602 | * | 3680 | * |
3603 | * When AIO DIO IO is completed, the work to convert unwritten | 3681 | * When IO is completed, the work to convert unwritten extents to |
3604 | * extents to written is queued on workqueue but may not get immediately | 3682 | * written is queued on workqueue but may not get immediately |
3605 | * scheduled. When fsync is called, we need to ensure the | 3683 | * scheduled. When fsync is called, we need to ensure the |
3606 | * conversion is complete before fsync returns. | 3684 | * conversion is complete before fsync returns. |
3607 | * The inode keeps track of a list of completed AIO from DIO path | 3685 | * The inode keeps track of a list of pending/completed IO that |
3608 | * that might needs to do the conversion. This function walks through | 3686 | * might needs to do the conversion. This function walks through |
3609 | * the list and convert the related unwritten extents to written. | 3687 | * the list and convert the related unwritten extents for completed IO |
3688 | * to written. | ||
3689 | * The function return the number of pending IOs on success. | ||
3610 | */ | 3690 | */ |
3611 | int flush_aio_dio_completed_IO(struct inode *inode) | 3691 | int flush_completed_IO(struct inode *inode) |
3612 | { | 3692 | { |
3613 | ext4_io_end_t *io; | 3693 | ext4_io_end_t *io; |
3694 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3695 | unsigned long flags; | ||
3614 | int ret = 0; | 3696 | int ret = 0; |
3615 | int ret2 = 0; | 3697 | int ret2 = 0; |
3616 | 3698 | ||
3617 | if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) | 3699 | if (list_empty(&ei->i_completed_io_list)) |
3618 | return ret; | 3700 | return ret; |
3619 | 3701 | ||
3620 | dump_aio_dio_list(inode); | 3702 | dump_completed_IO(inode); |
3621 | while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ | 3703 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3622 | io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, | 3704 | while (!list_empty(&ei->i_completed_io_list)){ |
3705 | io = list_entry(ei->i_completed_io_list.next, | ||
3623 | ext4_io_end_t, list); | 3706 | ext4_io_end_t, list); |
3624 | /* | 3707 | /* |
3625 | * Calling ext4_end_aio_dio_nolock() to convert completed | 3708 | * Calling ext4_end_io_nolock() to convert completed |
3626 | * IO to written. | 3709 | * IO to written. |
3627 | * | 3710 | * |
3628 | * When ext4_sync_file() is called, run_queue() may already | 3711 | * When ext4_sync_file() is called, run_queue() may already |
@@ -3635,20 +3718,23 @@ int flush_aio_dio_completed_IO(struct inode *inode) | |||
3635 | * avoid double converting from both fsync and background work | 3718 | * avoid double converting from both fsync and background work |
3636 | * queue work. | 3719 | * queue work. |
3637 | */ | 3720 | */ |
3638 | ret = ext4_end_aio_dio_nolock(io); | 3721 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3722 | ret = ext4_end_io_nolock(io); | ||
3723 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3639 | if (ret < 0) | 3724 | if (ret < 0) |
3640 | ret2 = ret; | 3725 | ret2 = ret; |
3641 | else | 3726 | else |
3642 | list_del_init(&io->list); | 3727 | list_del_init(&io->list); |
3643 | } | 3728 | } |
3729 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3644 | return (ret2 < 0) ? ret2 : 0; | 3730 | return (ret2 < 0) ? ret2 : 0; |
3645 | } | 3731 | } |
3646 | 3732 | ||
3647 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | 3733 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) |
3648 | { | 3734 | { |
3649 | ext4_io_end_t *io = NULL; | 3735 | ext4_io_end_t *io = NULL; |
3650 | 3736 | ||
3651 | io = kmalloc(sizeof(*io), GFP_NOFS); | 3737 | io = kmalloc(sizeof(*io), flags); |
3652 | 3738 | ||
3653 | if (io) { | 3739 | if (io) { |
3654 | igrab(inode); | 3740 | igrab(inode); |
@@ -3656,8 +3742,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) | |||
3656 | io->flag = 0; | 3742 | io->flag = 0; |
3657 | io->offset = 0; | 3743 | io->offset = 0; |
3658 | io->size = 0; | 3744 | io->size = 0; |
3659 | io->error = 0; | 3745 | io->page = NULL; |
3660 | INIT_WORK(&io->work, ext4_end_aio_dio_work); | 3746 | INIT_WORK(&io->work, ext4_end_io_work); |
3661 | INIT_LIST_HEAD(&io->list); | 3747 | INIT_LIST_HEAD(&io->list); |
3662 | } | 3748 | } |
3663 | 3749 | ||
@@ -3669,6 +3755,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3669 | { | 3755 | { |
3670 | ext4_io_end_t *io_end = iocb->private; | 3756 | ext4_io_end_t *io_end = iocb->private; |
3671 | struct workqueue_struct *wq; | 3757 | struct workqueue_struct *wq; |
3758 | unsigned long flags; | ||
3759 | struct ext4_inode_info *ei; | ||
3672 | 3760 | ||
3673 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3761 | /* if not async direct IO or dio with 0 bytes write, just return */ |
3674 | if (!io_end || !size) | 3762 | if (!io_end || !size) |
@@ -3680,7 +3768,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3680 | size); | 3768 | size); |
3681 | 3769 | ||
3682 | /* if not aio dio with unwritten extents, just free io and return */ | 3770 | /* if not aio dio with unwritten extents, just free io and return */ |
3683 | if (io_end->flag != DIO_AIO_UNWRITTEN){ | 3771 | if (io_end->flag != EXT4_IO_UNWRITTEN){ |
3684 | ext4_free_io_end(io_end); | 3772 | ext4_free_io_end(io_end); |
3685 | iocb->private = NULL; | 3773 | iocb->private = NULL; |
3686 | return; | 3774 | return; |
@@ -3688,16 +3776,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3688 | 3776 | ||
3689 | io_end->offset = offset; | 3777 | io_end->offset = offset; |
3690 | io_end->size = size; | 3778 | io_end->size = size; |
3779 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
3691 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3780 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3692 | 3781 | ||
3693 | /* queue the work to convert unwritten extents to written */ | 3782 | /* queue the work to convert unwritten extents to written */ |
3694 | queue_work(wq, &io_end->work); | 3783 | queue_work(wq, &io_end->work); |
3695 | 3784 | ||
3696 | /* Add the io_end to per-inode completed aio dio list*/ | 3785 | /* Add the io_end to per-inode completed aio dio list*/ |
3697 | list_add_tail(&io_end->list, | 3786 | ei = EXT4_I(io_end->inode); |
3698 | &EXT4_I(io_end->inode)->i_aio_dio_complete_list); | 3787 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
3788 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | ||
3789 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3699 | iocb->private = NULL; | 3790 | iocb->private = NULL; |
3700 | } | 3791 | } |
3792 | |||
3793 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | ||
3794 | { | ||
3795 | ext4_io_end_t *io_end = bh->b_private; | ||
3796 | struct workqueue_struct *wq; | ||
3797 | struct inode *inode; | ||
3798 | unsigned long flags; | ||
3799 | |||
3800 | if (!test_clear_buffer_uninit(bh) || !io_end) | ||
3801 | goto out; | ||
3802 | |||
3803 | if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { | ||
3804 | printk("sb umounted, discard end_io request for inode %lu\n", | ||
3805 | io_end->inode->i_ino); | ||
3806 | ext4_free_io_end(io_end); | ||
3807 | goto out; | ||
3808 | } | ||
3809 | |||
3810 | io_end->flag = EXT4_IO_UNWRITTEN; | ||
3811 | inode = io_end->inode; | ||
3812 | |||
3813 | /* Add the io_end to per-inode completed io list*/ | ||
3814 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3815 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | ||
3816 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
3817 | |||
3818 | wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; | ||
3819 | /* queue the work to convert unwritten extents to written */ | ||
3820 | queue_work(wq, &io_end->work); | ||
3821 | out: | ||
3822 | bh->b_private = NULL; | ||
3823 | bh->b_end_io = NULL; | ||
3824 | clear_buffer_uninit(bh); | ||
3825 | end_buffer_async_write(bh, uptodate); | ||
3826 | } | ||
3827 | |||
3828 | static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) | ||
3829 | { | ||
3830 | ext4_io_end_t *io_end; | ||
3831 | struct page *page = bh->b_page; | ||
3832 | loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; | ||
3833 | size_t size = bh->b_size; | ||
3834 | |||
3835 | retry: | ||
3836 | io_end = ext4_init_io_end(inode, GFP_ATOMIC); | ||
3837 | if (!io_end) { | ||
3838 | if (printk_ratelimit()) | ||
3839 | printk(KERN_WARNING "%s: allocation fail\n", __func__); | ||
3840 | schedule(); | ||
3841 | goto retry; | ||
3842 | } | ||
3843 | io_end->offset = offset; | ||
3844 | io_end->size = size; | ||
3845 | /* | ||
3846 | * We need to hold a reference to the page to make sure it | ||
3847 | * doesn't get evicted before ext4_end_io_work() has a chance | ||
3848 | * to convert the extent from written to unwritten. | ||
3849 | */ | ||
3850 | io_end->page = page; | ||
3851 | get_page(io_end->page); | ||
3852 | |||
3853 | bh->b_private = io_end; | ||
3854 | bh->b_end_io = ext4_end_io_buffer_write; | ||
3855 | return 0; | ||
3856 | } | ||
3857 | |||
3701 | /* | 3858 | /* |
3702 | * For ext4 extent files, ext4 will do direct-io write to holes, | 3859 | * For ext4 extent files, ext4 will do direct-io write to holes, |
3703 | * preallocated extents, and those write extend the file, no need to | 3860 | * preallocated extents, and those write extend the file, no need to |
@@ -3751,7 +3908,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3751 | iocb->private = NULL; | 3908 | iocb->private = NULL; |
3752 | EXT4_I(inode)->cur_aio_dio = NULL; | 3909 | EXT4_I(inode)->cur_aio_dio = NULL; |
3753 | if (!is_sync_kiocb(iocb)) { | 3910 | if (!is_sync_kiocb(iocb)) { |
3754 | iocb->private = ext4_init_io_end(inode); | 3911 | iocb->private = ext4_init_io_end(inode, GFP_NOFS); |
3755 | if (!iocb->private) | 3912 | if (!iocb->private) |
3756 | return -ENOMEM; | 3913 | return -ENOMEM; |
3757 | /* | 3914 | /* |
@@ -3767,7 +3924,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3767 | ret = blockdev_direct_IO(rw, iocb, inode, | 3924 | ret = blockdev_direct_IO(rw, iocb, inode, |
3768 | inode->i_sb->s_bdev, iov, | 3925 | inode->i_sb->s_bdev, iov, |
3769 | offset, nr_segs, | 3926 | offset, nr_segs, |
3770 | ext4_get_block_dio_write, | 3927 | ext4_get_block_write, |
3771 | ext4_end_io_dio); | 3928 | ext4_end_io_dio); |
3772 | if (iocb->private) | 3929 | if (iocb->private) |
3773 | EXT4_I(inode)->cur_aio_dio = NULL; | 3930 | EXT4_I(inode)->cur_aio_dio = NULL; |
@@ -3788,8 +3945,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3788 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3945 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { |
3789 | ext4_free_io_end(iocb->private); | 3946 | ext4_free_io_end(iocb->private); |
3790 | iocb->private = NULL; | 3947 | iocb->private = NULL; |
3791 | } else if (ret > 0 && (EXT4_I(inode)->i_state & | 3948 | } else if (ret > 0 && ext4_test_inode_state(inode, |
3792 | EXT4_STATE_DIO_UNWRITTEN)) { | 3949 | EXT4_STATE_DIO_UNWRITTEN)) { |
3793 | int err; | 3950 | int err; |
3794 | /* | 3951 | /* |
3795 | * for non AIO case, since the IO is already | 3952 | * for non AIO case, since the IO is already |
@@ -3799,7 +3956,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3799 | offset, ret); | 3956 | offset, ret); |
3800 | if (err < 0) | 3957 | if (err < 0) |
3801 | ret = err; | 3958 | ret = err; |
3802 | EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; | 3959 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3803 | } | 3960 | } |
3804 | return ret; | 3961 | return ret; |
3805 | } | 3962 | } |
@@ -4130,18 +4287,27 @@ no_top: | |||
4130 | * We release `count' blocks on disk, but (last - first) may be greater | 4287 | * We release `count' blocks on disk, but (last - first) may be greater |
4131 | * than `count' because there can be holes in there. | 4288 | * than `count' because there can be holes in there. |
4132 | */ | 4289 | */ |
4133 | static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4290 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4134 | struct buffer_head *bh, | 4291 | struct buffer_head *bh, |
4135 | ext4_fsblk_t block_to_free, | 4292 | ext4_fsblk_t block_to_free, |
4136 | unsigned long count, __le32 *first, | 4293 | unsigned long count, __le32 *first, |
4137 | __le32 *last) | 4294 | __le32 *last) |
4138 | { | 4295 | { |
4139 | __le32 *p; | 4296 | __le32 *p; |
4140 | int flags = EXT4_FREE_BLOCKS_FORGET; | 4297 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; |
4141 | 4298 | ||
4142 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 4299 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
4143 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4300 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4144 | 4301 | ||
4302 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | ||
4303 | count)) { | ||
4304 | ext4_error(inode->i_sb, "inode #%lu: " | ||
4305 | "attempt to clear blocks %llu len %lu, invalid", | ||
4306 | inode->i_ino, (unsigned long long) block_to_free, | ||
4307 | count); | ||
4308 | return 1; | ||
4309 | } | ||
4310 | |||
4145 | if (try_to_extend_transaction(handle, inode)) { | 4311 | if (try_to_extend_transaction(handle, inode)) { |
4146 | if (bh) { | 4312 | if (bh) { |
4147 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4313 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
@@ -4160,6 +4326,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4160 | *p = 0; | 4326 | *p = 0; |
4161 | 4327 | ||
4162 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4328 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); |
4329 | return 0; | ||
4163 | } | 4330 | } |
4164 | 4331 | ||
4165 | /** | 4332 | /** |
@@ -4215,9 +4382,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4215 | } else if (nr == block_to_free + count) { | 4382 | } else if (nr == block_to_free + count) { |
4216 | count++; | 4383 | count++; |
4217 | } else { | 4384 | } else { |
4218 | ext4_clear_blocks(handle, inode, this_bh, | 4385 | if (ext4_clear_blocks(handle, inode, this_bh, |
4219 | block_to_free, | 4386 | block_to_free, count, |
4220 | count, block_to_free_p, p); | 4387 | block_to_free_p, p)) |
4388 | break; | ||
4221 | block_to_free = nr; | 4389 | block_to_free = nr; |
4222 | block_to_free_p = p; | 4390 | block_to_free_p = p; |
4223 | count = 1; | 4391 | count = 1; |
@@ -4241,7 +4409,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4241 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4409 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
4242 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4410 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
4243 | else | 4411 | else |
4244 | ext4_error(inode->i_sb, __func__, | 4412 | ext4_error(inode->i_sb, |
4245 | "circular indirect block detected, " | 4413 | "circular indirect block detected, " |
4246 | "inode=%lu, block=%llu", | 4414 | "inode=%lu, block=%llu", |
4247 | inode->i_ino, | 4415 | inode->i_ino, |
@@ -4281,6 +4449,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4281 | if (!nr) | 4449 | if (!nr) |
4282 | continue; /* A hole */ | 4450 | continue; /* A hole */ |
4283 | 4451 | ||
4452 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | ||
4453 | nr, 1)) { | ||
4454 | ext4_error(inode->i_sb, | ||
4455 | "indirect mapped block in inode " | ||
4456 | "#%lu invalid (level %d, blk #%lu)", | ||
4457 | inode->i_ino, depth, | ||
4458 | (unsigned long) nr); | ||
4459 | break; | ||
4460 | } | ||
4461 | |||
4284 | /* Go read the buffer for the next level down */ | 4462 | /* Go read the buffer for the next level down */ |
4285 | bh = sb_bread(inode->i_sb, nr); | 4463 | bh = sb_bread(inode->i_sb, nr); |
4286 | 4464 | ||
@@ -4289,7 +4467,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4289 | * (should be rare). | 4467 | * (should be rare). |
4290 | */ | 4468 | */ |
4291 | if (!bh) { | 4469 | if (!bh) { |
4292 | ext4_error(inode->i_sb, "ext4_free_branches", | 4470 | ext4_error(inode->i_sb, |
4293 | "Read failure, inode=%lu, block=%llu", | 4471 | "Read failure, inode=%lu, block=%llu", |
4294 | inode->i_ino, nr); | 4472 | inode->i_ino, nr); |
4295 | continue; | 4473 | continue; |
@@ -4433,8 +4611,10 @@ void ext4_truncate(struct inode *inode) | |||
4433 | if (!ext4_can_truncate(inode)) | 4611 | if (!ext4_can_truncate(inode)) |
4434 | return; | 4612 | return; |
4435 | 4613 | ||
4614 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | ||
4615 | |||
4436 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4616 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4437 | ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | 4617 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4438 | 4618 | ||
4439 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4619 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { |
4440 | ext4_ext_truncate(inode); | 4620 | ext4_ext_truncate(inode); |
@@ -4604,9 +4784,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4604 | 4784 | ||
4605 | bh = sb_getblk(sb, block); | 4785 | bh = sb_getblk(sb, block); |
4606 | if (!bh) { | 4786 | if (!bh) { |
4607 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " | 4787 | ext4_error(sb, "unable to read inode block - " |
4608 | "inode block - inode=%lu, block=%llu", | 4788 | "inode=%lu, block=%llu", inode->i_ino, block); |
4609 | inode->i_ino, block); | ||
4610 | return -EIO; | 4789 | return -EIO; |
4611 | } | 4790 | } |
4612 | if (!buffer_uptodate(bh)) { | 4791 | if (!buffer_uptodate(bh)) { |
@@ -4704,9 +4883,8 @@ make_io: | |||
4704 | submit_bh(READ_META, bh); | 4883 | submit_bh(READ_META, bh); |
4705 | wait_on_buffer(bh); | 4884 | wait_on_buffer(bh); |
4706 | if (!buffer_uptodate(bh)) { | 4885 | if (!buffer_uptodate(bh)) { |
4707 | ext4_error(sb, __func__, | 4886 | ext4_error(sb, "unable to read inode block - inode=%lu," |
4708 | "unable to read inode block - inode=%lu, " | 4887 | " block=%llu", inode->i_ino, block); |
4709 | "block=%llu", inode->i_ino, block); | ||
4710 | brelse(bh); | 4888 | brelse(bh); |
4711 | return -EIO; | 4889 | return -EIO; |
4712 | } | 4890 | } |
@@ -4720,7 +4898,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) | |||
4720 | { | 4898 | { |
4721 | /* We have all inode data except xattrs in memory here. */ | 4899 | /* We have all inode data except xattrs in memory here. */ |
4722 | return __ext4_get_inode_loc(inode, iloc, | 4900 | return __ext4_get_inode_loc(inode, iloc, |
4723 | !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); | 4901 | !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); |
4724 | } | 4902 | } |
4725 | 4903 | ||
4726 | void ext4_set_inode_flags(struct inode *inode) | 4904 | void ext4_set_inode_flags(struct inode *inode) |
@@ -4814,7 +4992,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4814 | } | 4992 | } |
4815 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 4993 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); |
4816 | 4994 | ||
4817 | ei->i_state = 0; | 4995 | ei->i_state_flags = 0; |
4818 | ei->i_dir_start_lookup = 0; | 4996 | ei->i_dir_start_lookup = 0; |
4819 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); | 4997 | ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); |
4820 | /* We now have enough fields to check if the inode was active or not. | 4998 | /* We now have enough fields to check if the inode was active or not. |
@@ -4897,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4897 | EXT4_GOOD_OLD_INODE_SIZE + | 5075 | EXT4_GOOD_OLD_INODE_SIZE + |
4898 | ei->i_extra_isize; | 5076 | ei->i_extra_isize; |
4899 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 5077 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) |
4900 | ei->i_state |= EXT4_STATE_XATTR; | 5078 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
4901 | } | 5079 | } |
4902 | } else | 5080 | } else |
4903 | ei->i_extra_isize = 0; | 5081 | ei->i_extra_isize = 0; |
@@ -4917,8 +5095,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4917 | ret = 0; | 5095 | ret = 0; |
4918 | if (ei->i_file_acl && | 5096 | if (ei->i_file_acl && |
4919 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 5097 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
4920 | ext4_error(sb, __func__, | 5098 | ext4_error(sb, "bad extended attribute block %llu inode #%lu", |
4921 | "bad extended attribute block %llu in inode #%lu", | ||
4922 | ei->i_file_acl, inode->i_ino); | 5099 | ei->i_file_acl, inode->i_ino); |
4923 | ret = -EIO; | 5100 | ret = -EIO; |
4924 | goto bad_inode; | 5101 | goto bad_inode; |
@@ -4964,8 +5141,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4964 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5141 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
4965 | } else { | 5142 | } else { |
4966 | ret = -EIO; | 5143 | ret = -EIO; |
4967 | ext4_error(inode->i_sb, __func__, | 5144 | ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", |
4968 | "bogus i_mode (%o) for inode=%lu", | ||
4969 | inode->i_mode, inode->i_ino); | 5145 | inode->i_mode, inode->i_ino); |
4970 | goto bad_inode; | 5146 | goto bad_inode; |
4971 | } | 5147 | } |
@@ -5037,7 +5213,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5037 | 5213 | ||
5038 | /* For fields not not tracking in the in-memory inode, | 5214 | /* For fields not not tracking in the in-memory inode, |
5039 | * initialise them to zero for new inodes. */ | 5215 | * initialise them to zero for new inodes. */ |
5040 | if (ei->i_state & EXT4_STATE_NEW) | 5216 | if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) |
5041 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); | 5217 | memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); |
5042 | 5218 | ||
5043 | ext4_get_inode_flags(ei); | 5219 | ext4_get_inode_flags(ei); |
@@ -5101,7 +5277,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5101 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 5277 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
5102 | sb->s_dirt = 1; | 5278 | sb->s_dirt = 1; |
5103 | ext4_handle_sync(handle); | 5279 | ext4_handle_sync(handle); |
5104 | err = ext4_handle_dirty_metadata(handle, inode, | 5280 | err = ext4_handle_dirty_metadata(handle, NULL, |
5105 | EXT4_SB(sb)->s_sbh); | 5281 | EXT4_SB(sb)->s_sbh); |
5106 | } | 5282 | } |
5107 | } | 5283 | } |
@@ -5130,10 +5306,10 @@ static int ext4_do_update_inode(handle_t *handle, | |||
5130 | } | 5306 | } |
5131 | 5307 | ||
5132 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 5308 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
5133 | rc = ext4_handle_dirty_metadata(handle, inode, bh); | 5309 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); |
5134 | if (!err) | 5310 | if (!err) |
5135 | err = rc; | 5311 | err = rc; |
5136 | ei->i_state &= ~EXT4_STATE_NEW; | 5312 | ext4_clear_inode_state(inode, EXT4_STATE_NEW); |
5137 | 5313 | ||
5138 | ext4_update_inode_fsync_trans(handle, inode, 0); | 5314 | ext4_update_inode_fsync_trans(handle, inode, 0); |
5139 | out_brelse: | 5315 | out_brelse: |
@@ -5177,7 +5353,7 @@ out_brelse: | |||
5177 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 5353 | * `stuff()' is running, and the new i_size will be lost. Plus the inode |
5178 | * will no longer be on the superblock's dirty inode list. | 5354 | * will no longer be on the superblock's dirty inode list. |
5179 | */ | 5355 | */ |
5180 | int ext4_write_inode(struct inode *inode, int wait) | 5356 | int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) |
5181 | { | 5357 | { |
5182 | int err; | 5358 | int err; |
5183 | 5359 | ||
@@ -5191,7 +5367,7 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
5191 | return -EIO; | 5367 | return -EIO; |
5192 | } | 5368 | } |
5193 | 5369 | ||
5194 | if (!wait) | 5370 | if (wbc->sync_mode != WB_SYNC_ALL) |
5195 | return 0; | 5371 | return 0; |
5196 | 5372 | ||
5197 | err = ext4_force_commit(inode->i_sb); | 5373 | err = ext4_force_commit(inode->i_sb); |
@@ -5201,13 +5377,11 @@ int ext4_write_inode(struct inode *inode, int wait) | |||
5201 | err = ext4_get_inode_loc(inode, &iloc); | 5377 | err = ext4_get_inode_loc(inode, &iloc); |
5202 | if (err) | 5378 | if (err) |
5203 | return err; | 5379 | return err; |
5204 | if (wait) | 5380 | if (wbc->sync_mode == WB_SYNC_ALL) |
5205 | sync_dirty_buffer(iloc.bh); | 5381 | sync_dirty_buffer(iloc.bh); |
5206 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5382 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5207 | ext4_error(inode->i_sb, __func__, | 5383 | ext4_error(inode->i_sb, "IO error syncing inode, " |
5208 | "IO error syncing inode, " | 5384 | "inode=%lu, block=%llu", inode->i_ino, |
5209 | "inode=%lu, block=%llu", | ||
5210 | inode->i_ino, | ||
5211 | (unsigned long long)iloc.bh->b_blocknr); | 5385 | (unsigned long long)iloc.bh->b_blocknr); |
5212 | err = -EIO; | 5386 | err = -EIO; |
5213 | } | 5387 | } |
@@ -5249,6 +5423,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5249 | if (error) | 5423 | if (error) |
5250 | return error; | 5424 | return error; |
5251 | 5425 | ||
5426 | if (ia_valid & ATTR_SIZE) | ||
5427 | dquot_initialize(inode); | ||
5252 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 5428 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
5253 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 5429 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
5254 | handle_t *handle; | 5430 | handle_t *handle; |
@@ -5261,7 +5437,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5261 | error = PTR_ERR(handle); | 5437 | error = PTR_ERR(handle); |
5262 | goto err_out; | 5438 | goto err_out; |
5263 | } | 5439 | } |
5264 | error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; | 5440 | error = dquot_transfer(inode, attr); |
5265 | if (error) { | 5441 | if (error) { |
5266 | ext4_journal_stop(handle); | 5442 | ext4_journal_stop(handle); |
5267 | return error; | 5443 | return error; |
@@ -5288,7 +5464,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5288 | } | 5464 | } |
5289 | 5465 | ||
5290 | if (S_ISREG(inode->i_mode) && | 5466 | if (S_ISREG(inode->i_mode) && |
5291 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { | 5467 | attr->ia_valid & ATTR_SIZE && |
5468 | (attr->ia_size < inode->i_size || | ||
5469 | (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { | ||
5292 | handle_t *handle; | 5470 | handle_t *handle; |
5293 | 5471 | ||
5294 | handle = ext4_journal_start(inode, 3); | 5472 | handle = ext4_journal_start(inode, 3); |
@@ -5319,6 +5497,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5319 | goto err_out; | 5497 | goto err_out; |
5320 | } | 5498 | } |
5321 | } | 5499 | } |
5500 | /* ext4_truncate will clear the flag */ | ||
5501 | if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) | ||
5502 | ext4_truncate(inode); | ||
5322 | } | 5503 | } |
5323 | 5504 | ||
5324 | rc = inode_setattr(inode, attr); | 5505 | rc = inode_setattr(inode, attr); |
@@ -5557,8 +5738,8 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5557 | entry = IFIRST(header); | 5738 | entry = IFIRST(header); |
5558 | 5739 | ||
5559 | /* No extended attributes present */ | 5740 | /* No extended attributes present */ |
5560 | if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || | 5741 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
5561 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { | 5742 | header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { |
5562 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, | 5743 | memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, |
5563 | new_extra_isize); | 5744 | new_extra_isize); |
5564 | EXT4_I(inode)->i_extra_isize = new_extra_isize; | 5745 | EXT4_I(inode)->i_extra_isize = new_extra_isize; |
@@ -5602,7 +5783,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5602 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5783 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5603 | if (ext4_handle_valid(handle) && | 5784 | if (ext4_handle_valid(handle) && |
5604 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5785 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
5605 | !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { | 5786 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { |
5606 | /* | 5787 | /* |
5607 | * We need extra buffer credits since we may write into EA block | 5788 | * We need extra buffer credits since we may write into EA block |
5608 | * with this same handle. If journal_extend fails, then it will | 5789 | * with this same handle. If journal_extend fails, then it will |
@@ -5616,10 +5797,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5616 | sbi->s_want_extra_isize, | 5797 | sbi->s_want_extra_isize, |
5617 | iloc, handle); | 5798 | iloc, handle); |
5618 | if (ret) { | 5799 | if (ret) { |
5619 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | 5800 | ext4_set_inode_state(inode, |
5801 | EXT4_STATE_NO_EXPAND); | ||
5620 | if (mnt_count != | 5802 | if (mnt_count != |
5621 | le16_to_cpu(sbi->s_es->s_mnt_count)) { | 5803 | le16_to_cpu(sbi->s_es->s_mnt_count)) { |
5622 | ext4_warning(inode->i_sb, __func__, | 5804 | ext4_warning(inode->i_sb, |
5623 | "Unable to expand inode %lu. Delete" | 5805 | "Unable to expand inode %lu. Delete" |
5624 | " some EAs or run e2fsck.", | 5806 | " some EAs or run e2fsck.", |
5625 | inode->i_ino); | 5807 | inode->i_ino); |
@@ -5641,7 +5823,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5641 | * i_size has been changed by generic_commit_write() and we thus need | 5823 | * i_size has been changed by generic_commit_write() and we thus need |
5642 | * to include the updated inode in the current transaction. | 5824 | * to include the updated inode in the current transaction. |
5643 | * | 5825 | * |
5644 | * Also, vfs_dq_alloc_block() will always dirty the inode when blocks | 5826 | * Also, dquot_alloc_block() will always dirty the inode when blocks |
5645 | * are allocated to the file. | 5827 | * are allocated to the file. |
5646 | * | 5828 | * |
5647 | * If the inode is marked synchronous, we don't honour that here - doing | 5829 | * If the inode is marked synchronous, we don't honour that here - doing |
@@ -5683,7 +5865,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode) | |||
5683 | err = jbd2_journal_get_write_access(handle, iloc.bh); | 5865 | err = jbd2_journal_get_write_access(handle, iloc.bh); |
5684 | if (!err) | 5866 | if (!err) |
5685 | err = ext4_handle_dirty_metadata(handle, | 5867 | err = ext4_handle_dirty_metadata(handle, |
5686 | inode, | 5868 | NULL, |
5687 | iloc.bh); | 5869 | iloc.bh); |
5688 | brelse(iloc.bh); | 5870 | brelse(iloc.bh); |
5689 | } | 5871 | } |