diff options
Diffstat (limited to 'fs/ext3/inode.c')
-rw-r--r-- | fs/ext3/inode.c | 229 |
1 files changed, 164 insertions, 65 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 3451d23c3ba..2ce3c52db32 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -38,10 +38,12 @@ | |||
38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
39 | #include <linux/fiemap.h> | 39 | #include <linux/fiemap.h> |
40 | #include <linux/namei.h> | 40 | #include <linux/namei.h> |
41 | #include <trace/events/ext3.h> | ||
41 | #include "xattr.h" | 42 | #include "xattr.h" |
42 | #include "acl.h" | 43 | #include "acl.h" |
43 | 44 | ||
44 | static int ext3_writepage_trans_blocks(struct inode *inode); | 45 | static int ext3_writepage_trans_blocks(struct inode *inode); |
46 | static int ext3_block_truncate_page(struct inode *inode, loff_t from); | ||
45 | 47 | ||
46 | /* | 48 | /* |
47 | * Test whether an inode is a fast symlink. | 49 | * Test whether an inode is a fast symlink. |
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, | |||
70 | 72 | ||
71 | might_sleep(); | 73 | might_sleep(); |
72 | 74 | ||
75 | trace_ext3_forget(inode, is_metadata, blocknr); | ||
73 | BUFFER_TRACE(bh, "enter"); | 76 | BUFFER_TRACE(bh, "enter"); |
74 | 77 | ||
75 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " | 78 | jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " |
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode) | |||
194 | */ | 197 | */ |
195 | void ext3_evict_inode (struct inode *inode) | 198 | void ext3_evict_inode (struct inode *inode) |
196 | { | 199 | { |
200 | struct ext3_inode_info *ei = EXT3_I(inode); | ||
197 | struct ext3_block_alloc_info *rsv; | 201 | struct ext3_block_alloc_info *rsv; |
198 | handle_t *handle; | 202 | handle_t *handle; |
199 | int want_delete = 0; | 203 | int want_delete = 0; |
200 | 204 | ||
205 | trace_ext3_evict_inode(inode); | ||
201 | if (!inode->i_nlink && !is_bad_inode(inode)) { | 206 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
202 | dquot_initialize(inode); | 207 | dquot_initialize(inode); |
203 | want_delete = 1; | 208 | want_delete = 1; |
204 | } | 209 | } |
205 | 210 | ||
211 | /* | ||
212 | * When journalling data dirty buffers are tracked only in the journal. | ||
213 | * So although mm thinks everything is clean and ready for reaping the | ||
214 | * inode might still have some pages to write in the running | ||
215 | * transaction or waiting to be checkpointed. Thus calling | ||
216 | * journal_invalidatepage() (via truncate_inode_pages()) to discard | ||
217 | * these buffers can cause data loss. Also even if we did not discard | ||
218 | * these buffers, we would have no way to find them after the inode | ||
219 | * is reaped and thus user could see stale data if he tries to read | ||
220 | * them before the transaction is checkpointed. So be careful and | ||
221 | * force everything to disk here... We use ei->i_datasync_tid to | ||
222 | * store the newest transaction containing inode's data. | ||
223 | * | ||
224 | * Note that directories do not have this problem because they don't | ||
225 | * use page cache. | ||
226 | */ | ||
227 | if (inode->i_nlink && ext3_should_journal_data(inode) && | ||
228 | (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { | ||
229 | tid_t commit_tid = atomic_read(&ei->i_datasync_tid); | ||
230 | journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; | ||
231 | |||
232 | log_start_commit(journal, commit_tid); | ||
233 | log_wait_commit(journal, commit_tid); | ||
234 | filemap_write_and_wait(&inode->i_data); | ||
235 | } | ||
206 | truncate_inode_pages(&inode->i_data, 0); | 236 | truncate_inode_pages(&inode->i_data, 0); |
207 | 237 | ||
208 | ext3_discard_reservation(inode); | 238 | ext3_discard_reservation(inode); |
209 | rsv = EXT3_I(inode)->i_block_alloc_info; | 239 | rsv = ei->i_block_alloc_info; |
210 | EXT3_I(inode)->i_block_alloc_info = NULL; | 240 | ei->i_block_alloc_info = NULL; |
211 | if (unlikely(rsv)) | 241 | if (unlikely(rsv)) |
212 | kfree(rsv); | 242 | kfree(rsv); |
213 | 243 | ||
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode) | |||
231 | if (inode->i_blocks) | 261 | if (inode->i_blocks) |
232 | ext3_truncate(inode); | 262 | ext3_truncate(inode); |
233 | /* | 263 | /* |
234 | * Kill off the orphan record which ext3_truncate created. | 264 | * Kill off the orphan record created when the inode lost the last |
235 | * AKPM: I think this can be inside the above `if'. | 265 | * link. Note that ext3_orphan_del() has to be able to cope with the |
236 | * Note that ext3_orphan_del() has to be able to cope with the | 266 | * deletion of a non-existent orphan - ext3_truncate() could |
237 | * deletion of a non-existent orphan - this is because we don't | 267 | * have removed the record. |
238 | * know if ext3_truncate() actually created an orphan record. | ||
239 | * (Well, we could do this if we need to, but heck - it works) | ||
240 | */ | 268 | */ |
241 | ext3_orphan_del(handle, inode); | 269 | ext3_orphan_del(handle, inode); |
242 | EXT3_I(inode)->i_dtime = get_seconds(); | 270 | ei->i_dtime = get_seconds(); |
243 | 271 | ||
244 | /* | 272 | /* |
245 | * One subtle ordering requirement: if anything has gone wrong | 273 | * One subtle ordering requirement: if anything has gone wrong |
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
842 | ext3_fsblk_t first_block = 0; | 870 | ext3_fsblk_t first_block = 0; |
843 | 871 | ||
844 | 872 | ||
873 | trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create); | ||
845 | J_ASSERT(handle != NULL || create == 0); | 874 | J_ASSERT(handle != NULL || create == 0); |
846 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); | 875 | depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); |
847 | 876 | ||
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
886 | if (!create || err == -EIO) | 915 | if (!create || err == -EIO) |
887 | goto cleanup; | 916 | goto cleanup; |
888 | 917 | ||
918 | /* | ||
919 | * Block out ext3_truncate while we alter the tree | ||
920 | */ | ||
889 | mutex_lock(&ei->truncate_mutex); | 921 | mutex_lock(&ei->truncate_mutex); |
890 | 922 | ||
891 | /* | 923 | /* |
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
934 | */ | 966 | */ |
935 | count = ext3_blks_to_allocate(partial, indirect_blks, | 967 | count = ext3_blks_to_allocate(partial, indirect_blks, |
936 | maxblocks, blocks_to_boundary); | 968 | maxblocks, blocks_to_boundary); |
937 | /* | ||
938 | * Block out ext3_truncate while we alter the tree | ||
939 | */ | ||
940 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, | 969 | err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, |
941 | offsets + (partial - chain), partial); | 970 | offsets + (partial - chain), partial); |
942 | 971 | ||
@@ -970,6 +999,9 @@ cleanup: | |||
970 | } | 999 | } |
971 | BUFFER_TRACE(bh_result, "returned"); | 1000 | BUFFER_TRACE(bh_result, "returned"); |
972 | out: | 1001 | out: |
1002 | trace_ext3_get_blocks_exit(inode, iblock, | ||
1003 | depth ? le32_to_cpu(chain[depth-1].key) : 0, | ||
1004 | count, err); | ||
973 | return err; | 1005 | return err; |
974 | } | 1006 | } |
975 | 1007 | ||
@@ -1102,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, | |||
1102 | return bh; | 1134 | return bh; |
1103 | if (buffer_uptodate(bh)) | 1135 | if (buffer_uptodate(bh)) |
1104 | return bh; | 1136 | return bh; |
1105 | ll_rw_block(READ_META, 1, &bh); | 1137 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
1106 | wait_on_buffer(bh); | 1138 | wait_on_buffer(bh); |
1107 | if (buffer_uptodate(bh)) | 1139 | if (buffer_uptodate(bh)) |
1108 | return bh; | 1140 | return bh; |
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode) | |||
1202 | ext3_truncate(inode); | 1234 | ext3_truncate(inode); |
1203 | } | 1235 | } |
1204 | 1236 | ||
1237 | /* | ||
1238 | * Truncate blocks that were not used by direct IO write. We have to zero out | ||
1239 | * the last file block as well because direct IO might have written to it. | ||
1240 | */ | ||
1241 | static void ext3_truncate_failed_direct_write(struct inode *inode) | ||
1242 | { | ||
1243 | ext3_block_truncate_page(inode, inode->i_size); | ||
1244 | ext3_truncate(inode); | ||
1245 | } | ||
1246 | |||
1205 | static int ext3_write_begin(struct file *file, struct address_space *mapping, | 1247 | static int ext3_write_begin(struct file *file, struct address_space *mapping, |
1206 | loff_t pos, unsigned len, unsigned flags, | 1248 | loff_t pos, unsigned len, unsigned flags, |
1207 | struct page **pagep, void **fsdata) | 1249 | struct page **pagep, void **fsdata) |
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, | |||
1217 | * we allocate blocks but write fails for some reason */ | 1259 | * we allocate blocks but write fails for some reason */ |
1218 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; | 1260 | int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; |
1219 | 1261 | ||
1262 | trace_ext3_write_begin(inode, pos, len, flags); | ||
1263 | |||
1220 | index = pos >> PAGE_CACHE_SHIFT; | 1264 | index = pos >> PAGE_CACHE_SHIFT; |
1221 | from = pos & (PAGE_CACHE_SIZE - 1); | 1265 | from = pos & (PAGE_CACHE_SIZE - 1); |
1222 | to = from + len; | 1266 | to = from + len; |
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file, | |||
1332 | unsigned from, to; | 1376 | unsigned from, to; |
1333 | int ret = 0, ret2; | 1377 | int ret = 0, ret2; |
1334 | 1378 | ||
1379 | trace_ext3_ordered_write_end(inode, pos, len, copied); | ||
1335 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1380 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1336 | 1381 | ||
1337 | from = pos & (PAGE_CACHE_SIZE - 1); | 1382 | from = pos & (PAGE_CACHE_SIZE - 1); |
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file, | |||
1367 | struct inode *inode = file->f_mapping->host; | 1412 | struct inode *inode = file->f_mapping->host; |
1368 | int ret; | 1413 | int ret; |
1369 | 1414 | ||
1415 | trace_ext3_writeback_write_end(inode, pos, len, copied); | ||
1370 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); | 1416 | copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); |
1371 | update_file_sizes(inode, pos, copied); | 1417 | update_file_sizes(inode, pos, copied); |
1372 | /* | 1418 | /* |
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file, | |||
1391 | { | 1437 | { |
1392 | handle_t *handle = ext3_journal_current_handle(); | 1438 | handle_t *handle = ext3_journal_current_handle(); |
1393 | struct inode *inode = mapping->host; | 1439 | struct inode *inode = mapping->host; |
1440 | struct ext3_inode_info *ei = EXT3_I(inode); | ||
1394 | int ret = 0, ret2; | 1441 | int ret = 0, ret2; |
1395 | int partial = 0; | 1442 | int partial = 0; |
1396 | unsigned from, to; | 1443 | unsigned from, to; |
1397 | 1444 | ||
1445 | trace_ext3_journalled_write_end(inode, pos, len, copied); | ||
1398 | from = pos & (PAGE_CACHE_SIZE - 1); | 1446 | from = pos & (PAGE_CACHE_SIZE - 1); |
1399 | to = from + len; | 1447 | to = from + len; |
1400 | 1448 | ||
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file, | |||
1419 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) | 1467 | if (pos + len > inode->i_size && ext3_can_truncate(inode)) |
1420 | ext3_orphan_add(handle, inode); | 1468 | ext3_orphan_add(handle, inode); |
1421 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1469 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1422 | if (inode->i_size > EXT3_I(inode)->i_disksize) { | 1470 | atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); |
1423 | EXT3_I(inode)->i_disksize = inode->i_size; | 1471 | if (inode->i_size > ei->i_disksize) { |
1472 | ei->i_disksize = inode->i_size; | ||
1424 | ret2 = ext3_mark_inode_dirty(handle, inode); | 1473 | ret2 = ext3_mark_inode_dirty(handle, inode); |
1425 | if (!ret) | 1474 | if (!ret) |
1426 | ret = ret2; | 1475 | ret = ret2; |
@@ -1568,7 +1617,13 @@ static int ext3_ordered_writepage(struct page *page, | |||
1568 | int err; | 1617 | int err; |
1569 | 1618 | ||
1570 | J_ASSERT(PageLocked(page)); | 1619 | J_ASSERT(PageLocked(page)); |
1571 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1620 | /* |
1621 | * We don't want to warn for emergency remount. The condition is | ||
1622 | * ordered to avoid dereferencing inode->i_sb in non-error case to | ||
1623 | * avoid slow-downs. | ||
1624 | */ | ||
1625 | WARN_ON_ONCE(IS_RDONLY(inode) && | ||
1626 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); | ||
1572 | 1627 | ||
1573 | /* | 1628 | /* |
1574 | * We give up here if we're reentered, because it might be for a | 1629 | * We give up here if we're reentered, because it might be for a |
@@ -1577,6 +1632,7 @@ static int ext3_ordered_writepage(struct page *page, | |||
1577 | if (ext3_journal_current_handle()) | 1632 | if (ext3_journal_current_handle()) |
1578 | goto out_fail; | 1633 | goto out_fail; |
1579 | 1634 | ||
1635 | trace_ext3_ordered_writepage(page); | ||
1580 | if (!page_has_buffers(page)) { | 1636 | if (!page_has_buffers(page)) { |
1581 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 1637 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
1582 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1638 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
@@ -1642,11 +1698,18 @@ static int ext3_writeback_writepage(struct page *page, | |||
1642 | int err; | 1698 | int err; |
1643 | 1699 | ||
1644 | J_ASSERT(PageLocked(page)); | 1700 | J_ASSERT(PageLocked(page)); |
1645 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1701 | /* |
1702 | * We don't want to warn for emergency remount. The condition is | ||
1703 | * ordered to avoid dereferencing inode->i_sb in non-error case to | ||
1704 | * avoid slow-downs. | ||
1705 | */ | ||
1706 | WARN_ON_ONCE(IS_RDONLY(inode) && | ||
1707 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); | ||
1646 | 1708 | ||
1647 | if (ext3_journal_current_handle()) | 1709 | if (ext3_journal_current_handle()) |
1648 | goto out_fail; | 1710 | goto out_fail; |
1649 | 1711 | ||
1712 | trace_ext3_writeback_writepage(page); | ||
1650 | if (page_has_buffers(page)) { | 1713 | if (page_has_buffers(page)) { |
1651 | if (!walk_page_buffers(NULL, page_buffers(page), 0, | 1714 | if (!walk_page_buffers(NULL, page_buffers(page), 0, |
1652 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | 1715 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { |
@@ -1684,11 +1747,18 @@ static int ext3_journalled_writepage(struct page *page, | |||
1684 | int err; | 1747 | int err; |
1685 | 1748 | ||
1686 | J_ASSERT(PageLocked(page)); | 1749 | J_ASSERT(PageLocked(page)); |
1687 | WARN_ON_ONCE(IS_RDONLY(inode)); | 1750 | /* |
1751 | * We don't want to warn for emergency remount. The condition is | ||
1752 | * ordered to avoid dereferencing inode->i_sb in non-error case to | ||
1753 | * avoid slow-downs. | ||
1754 | */ | ||
1755 | WARN_ON_ONCE(IS_RDONLY(inode) && | ||
1756 | !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); | ||
1688 | 1757 | ||
1689 | if (ext3_journal_current_handle()) | 1758 | if (ext3_journal_current_handle()) |
1690 | goto no_write; | 1759 | goto no_write; |
1691 | 1760 | ||
1761 | trace_ext3_journalled_writepage(page); | ||
1692 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1762 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1693 | if (IS_ERR(handle)) { | 1763 | if (IS_ERR(handle)) { |
1694 | ret = PTR_ERR(handle); | 1764 | ret = PTR_ERR(handle); |
@@ -1715,6 +1785,8 @@ static int ext3_journalled_writepage(struct page *page, | |||
1715 | if (ret == 0) | 1785 | if (ret == 0) |
1716 | ret = err; | 1786 | ret = err; |
1717 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); | 1787 | ext3_set_inode_state(inode, EXT3_STATE_JDATA); |
1788 | atomic_set(&EXT3_I(inode)->i_datasync_tid, | ||
1789 | handle->h_transaction->t_tid); | ||
1718 | unlock_page(page); | 1790 | unlock_page(page); |
1719 | } else { | 1791 | } else { |
1720 | /* | 1792 | /* |
@@ -1739,6 +1811,7 @@ out_unlock: | |||
1739 | 1811 | ||
1740 | static int ext3_readpage(struct file *file, struct page *page) | 1812 | static int ext3_readpage(struct file *file, struct page *page) |
1741 | { | 1813 | { |
1814 | trace_ext3_readpage(page); | ||
1742 | return mpage_readpage(page, ext3_get_block); | 1815 | return mpage_readpage(page, ext3_get_block); |
1743 | } | 1816 | } |
1744 | 1817 | ||
@@ -1753,6 +1826,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset) | |||
1753 | { | 1826 | { |
1754 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1827 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1755 | 1828 | ||
1829 | trace_ext3_invalidatepage(page, offset); | ||
1830 | |||
1756 | /* | 1831 | /* |
1757 | * If it's a full truncate we just forget about the pending dirtying | 1832 | * If it's a full truncate we just forget about the pending dirtying |
1758 | */ | 1833 | */ |
@@ -1766,6 +1841,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait) | |||
1766 | { | 1841 | { |
1767 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1842 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
1768 | 1843 | ||
1844 | trace_ext3_releasepage(page); | ||
1769 | WARN_ON(PageChecked(page)); | 1845 | WARN_ON(PageChecked(page)); |
1770 | if (!page_has_buffers(page)) | 1846 | if (!page_has_buffers(page)) |
1771 | return 0; | 1847 | return 0; |
@@ -1794,6 +1870,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | |||
1794 | size_t count = iov_length(iov, nr_segs); | 1870 | size_t count = iov_length(iov, nr_segs); |
1795 | int retries = 0; | 1871 | int retries = 0; |
1796 | 1872 | ||
1873 | trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
1874 | |||
1797 | if (rw == WRITE) { | 1875 | if (rw == WRITE) { |
1798 | loff_t final_size = offset + count; | 1876 | loff_t final_size = offset + count; |
1799 | 1877 | ||
@@ -1816,9 +1894,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, | |||
1816 | } | 1894 | } |
1817 | 1895 | ||
1818 | retry: | 1896 | retry: |
1819 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1897 | ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, |
1820 | offset, nr_segs, | 1898 | ext3_get_block); |
1821 | ext3_get_block, NULL); | ||
1822 | /* | 1899 | /* |
1823 | * In case of error extending write may have instantiated a few | 1900 | * In case of error extending write may have instantiated a few |
1824 | * blocks outside i_size. Trim these off again. | 1901 | * blocks outside i_size. Trim these off again. |
@@ -1828,7 +1905,7 @@ retry: | |||
1828 | loff_t end = offset + iov_length(iov, nr_segs); | 1905 | loff_t end = offset + iov_length(iov, nr_segs); |
1829 | 1906 | ||
1830 | if (end > isize) | 1907 | if (end > isize) |
1831 | vmtruncate(inode, isize); | 1908 | ext3_truncate_failed_direct_write(inode); |
1832 | } | 1909 | } |
1833 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1910 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1834 | goto retry; | 1911 | goto retry; |
@@ -1842,7 +1919,7 @@ retry: | |||
1842 | /* This is really bad luck. We've written the data | 1919 | /* This is really bad luck. We've written the data |
1843 | * but cannot extend i_size. Truncate allocated blocks | 1920 | * but cannot extend i_size. Truncate allocated blocks |
1844 | * and pretend the write failed... */ | 1921 | * and pretend the write failed... */ |
1845 | ext3_truncate(inode); | 1922 | ext3_truncate_failed_direct_write(inode); |
1846 | ret = PTR_ERR(handle); | 1923 | ret = PTR_ERR(handle); |
1847 | goto out; | 1924 | goto out; |
1848 | } | 1925 | } |
@@ -1868,6 +1945,8 @@ retry: | |||
1868 | ret = err; | 1945 | ret = err; |
1869 | } | 1946 | } |
1870 | out: | 1947 | out: |
1948 | trace_ext3_direct_IO_exit(inode, offset, | ||
1949 | iov_length(iov, nr_segs), rw, ret); | ||
1871 | return ret; | 1950 | return ret; |
1872 | } | 1951 | } |
1873 | 1952 | ||
@@ -1950,17 +2029,24 @@ void ext3_set_aops(struct inode *inode) | |||
1950 | * This required during truncate. We need to physically zero the tail end | 2029 | * This required during truncate. We need to physically zero the tail end |
1951 | * of that block so it doesn't yield old data if the file is later grown. | 2030 | * of that block so it doesn't yield old data if the file is later grown. |
1952 | */ | 2031 | */ |
1953 | static int ext3_block_truncate_page(handle_t *handle, struct page *page, | 2032 | static int ext3_block_truncate_page(struct inode *inode, loff_t from) |
1954 | struct address_space *mapping, loff_t from) | ||
1955 | { | 2033 | { |
1956 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 2034 | ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
1957 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 2035 | unsigned offset = from & (PAGE_CACHE_SIZE - 1); |
1958 | unsigned blocksize, iblock, length, pos; | 2036 | unsigned blocksize, iblock, length, pos; |
1959 | struct inode *inode = mapping->host; | 2037 | struct page *page; |
2038 | handle_t *handle = NULL; | ||
1960 | struct buffer_head *bh; | 2039 | struct buffer_head *bh; |
1961 | int err = 0; | 2040 | int err = 0; |
1962 | 2041 | ||
2042 | /* Truncated on block boundary - nothing to do */ | ||
1963 | blocksize = inode->i_sb->s_blocksize; | 2043 | blocksize = inode->i_sb->s_blocksize; |
2044 | if ((from & (blocksize - 1)) == 0) | ||
2045 | return 0; | ||
2046 | |||
2047 | page = grab_cache_page(inode->i_mapping, index); | ||
2048 | if (!page) | ||
2049 | return -ENOMEM; | ||
1964 | length = blocksize - (offset & (blocksize - 1)); | 2050 | length = blocksize - (offset & (blocksize - 1)); |
1965 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 2051 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
1966 | 2052 | ||
@@ -2005,11 +2091,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
2005 | goto unlock; | 2091 | goto unlock; |
2006 | } | 2092 | } |
2007 | 2093 | ||
2094 | /* data=writeback mode doesn't need transaction to zero-out data */ | ||
2095 | if (!ext3_should_writeback_data(inode)) { | ||
2096 | /* We journal at most one block */ | ||
2097 | handle = ext3_journal_start(inode, 1); | ||
2098 | if (IS_ERR(handle)) { | ||
2099 | clear_highpage(page); | ||
2100 | flush_dcache_page(page); | ||
2101 | err = PTR_ERR(handle); | ||
2102 | goto unlock; | ||
2103 | } | ||
2104 | } | ||
2105 | |||
2008 | if (ext3_should_journal_data(inode)) { | 2106 | if (ext3_should_journal_data(inode)) { |
2009 | BUFFER_TRACE(bh, "get write access"); | 2107 | BUFFER_TRACE(bh, "get write access"); |
2010 | err = ext3_journal_get_write_access(handle, bh); | 2108 | err = ext3_journal_get_write_access(handle, bh); |
2011 | if (err) | 2109 | if (err) |
2012 | goto unlock; | 2110 | goto stop; |
2013 | } | 2111 | } |
2014 | 2112 | ||
2015 | zero_user(page, offset, length); | 2113 | zero_user(page, offset, length); |
@@ -2023,6 +2121,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
2023 | err = ext3_journal_dirty_data(handle, bh); | 2121 | err = ext3_journal_dirty_data(handle, bh); |
2024 | mark_buffer_dirty(bh); | 2122 | mark_buffer_dirty(bh); |
2025 | } | 2123 | } |
2124 | stop: | ||
2125 | if (handle) | ||
2126 | ext3_journal_stop(handle); | ||
2026 | 2127 | ||
2027 | unlock: | 2128 | unlock: |
2028 | unlock_page(page); | 2129 | unlock_page(page); |
@@ -2391,8 +2492,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
2391 | 2492 | ||
2392 | int ext3_can_truncate(struct inode *inode) | 2493 | int ext3_can_truncate(struct inode *inode) |
2393 | { | 2494 | { |
2394 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
2395 | return 0; | ||
2396 | if (S_ISREG(inode->i_mode)) | 2495 | if (S_ISREG(inode->i_mode)) |
2397 | return 1; | 2496 | return 1; |
2398 | if (S_ISDIR(inode->i_mode)) | 2497 | if (S_ISDIR(inode->i_mode)) |
@@ -2436,7 +2535,6 @@ void ext3_truncate(struct inode *inode) | |||
2436 | struct ext3_inode_info *ei = EXT3_I(inode); | 2535 | struct ext3_inode_info *ei = EXT3_I(inode); |
2437 | __le32 *i_data = ei->i_data; | 2536 | __le32 *i_data = ei->i_data; |
2438 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); | 2537 | int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); |
2439 | struct address_space *mapping = inode->i_mapping; | ||
2440 | int offsets[4]; | 2538 | int offsets[4]; |
2441 | Indirect chain[4]; | 2539 | Indirect chain[4]; |
2442 | Indirect *partial; | 2540 | Indirect *partial; |
@@ -2444,7 +2542,8 @@ void ext3_truncate(struct inode *inode) | |||
2444 | int n; | 2542 | int n; |
2445 | long last_block; | 2543 | long last_block; |
2446 | unsigned blocksize = inode->i_sb->s_blocksize; | 2544 | unsigned blocksize = inode->i_sb->s_blocksize; |
2447 | struct page *page; | 2545 | |
2546 | trace_ext3_truncate_enter(inode); | ||
2448 | 2547 | ||
2449 | if (!ext3_can_truncate(inode)) | 2548 | if (!ext3_can_truncate(inode)) |
2450 | goto out_notrans; | 2549 | goto out_notrans; |
@@ -2452,37 +2551,12 @@ void ext3_truncate(struct inode *inode) | |||
2452 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) | 2551 | if (inode->i_size == 0 && ext3_should_writeback_data(inode)) |
2453 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); | 2552 | ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); |
2454 | 2553 | ||
2455 | /* | ||
2456 | * We have to lock the EOF page here, because lock_page() nests | ||
2457 | * outside journal_start(). | ||
2458 | */ | ||
2459 | if ((inode->i_size & (blocksize - 1)) == 0) { | ||
2460 | /* Block boundary? Nothing to do */ | ||
2461 | page = NULL; | ||
2462 | } else { | ||
2463 | page = grab_cache_page(mapping, | ||
2464 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
2465 | if (!page) | ||
2466 | goto out_notrans; | ||
2467 | } | ||
2468 | |||
2469 | handle = start_transaction(inode); | 2554 | handle = start_transaction(inode); |
2470 | if (IS_ERR(handle)) { | 2555 | if (IS_ERR(handle)) |
2471 | if (page) { | ||
2472 | clear_highpage(page); | ||
2473 | flush_dcache_page(page); | ||
2474 | unlock_page(page); | ||
2475 | page_cache_release(page); | ||
2476 | } | ||
2477 | goto out_notrans; | 2556 | goto out_notrans; |
2478 | } | ||
2479 | 2557 | ||
2480 | last_block = (inode->i_size + blocksize-1) | 2558 | last_block = (inode->i_size + blocksize-1) |
2481 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); | 2559 | >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); |
2482 | |||
2483 | if (page) | ||
2484 | ext3_block_truncate_page(handle, page, mapping, inode->i_size); | ||
2485 | |||
2486 | n = ext3_block_to_path(inode, last_block, offsets, NULL); | 2560 | n = ext3_block_to_path(inode, last_block, offsets, NULL); |
2487 | if (n == 0) | 2561 | if (n == 0) |
2488 | goto out_stop; /* error */ | 2562 | goto out_stop; /* error */ |
@@ -2597,6 +2671,7 @@ out_stop: | |||
2597 | ext3_orphan_del(handle, inode); | 2671 | ext3_orphan_del(handle, inode); |
2598 | 2672 | ||
2599 | ext3_journal_stop(handle); | 2673 | ext3_journal_stop(handle); |
2674 | trace_ext3_truncate_exit(inode); | ||
2600 | return; | 2675 | return; |
2601 | out_notrans: | 2676 | out_notrans: |
2602 | /* | 2677 | /* |
@@ -2605,6 +2680,7 @@ out_notrans: | |||
2605 | */ | 2680 | */ |
2606 | if (inode->i_nlink) | 2681 | if (inode->i_nlink) |
2607 | ext3_orphan_del(NULL, inode); | 2682 | ext3_orphan_del(NULL, inode); |
2683 | trace_ext3_truncate_exit(inode); | ||
2608 | } | 2684 | } |
2609 | 2685 | ||
2610 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, | 2686 | static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, |
@@ -2746,9 +2822,10 @@ make_io: | |||
2746 | * has in-inode xattrs, or we don't have this inode in memory. | 2822 | * has in-inode xattrs, or we don't have this inode in memory. |
2747 | * Read the block from disk. | 2823 | * Read the block from disk. |
2748 | */ | 2824 | */ |
2825 | trace_ext3_load_inode(inode); | ||
2749 | get_bh(bh); | 2826 | get_bh(bh); |
2750 | bh->b_end_io = end_buffer_read_sync; | 2827 | bh->b_end_io = end_buffer_read_sync; |
2751 | submit_bh(READ_META, bh); | 2828 | submit_bh(READ | REQ_META | REQ_PRIO, bh); |
2752 | wait_on_buffer(bh); | 2829 | wait_on_buffer(bh); |
2753 | if (!buffer_uptodate(bh)) { | 2830 | if (!buffer_uptodate(bh)) { |
2754 | ext3_error(inode->i_sb, "ext3_get_inode_loc", | 2831 | ext3_error(inode->i_sb, "ext3_get_inode_loc", |
@@ -3216,6 +3293,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) | |||
3216 | ext3_journal_stop(handle); | 3293 | ext3_journal_stop(handle); |
3217 | } | 3294 | } |
3218 | 3295 | ||
3296 | if (attr->ia_valid & ATTR_SIZE) | ||
3297 | inode_dio_wait(inode); | ||
3298 | |||
3219 | if (S_ISREG(inode->i_mode) && | 3299 | if (S_ISREG(inode->i_mode) && |
3220 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { | 3300 | attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { |
3221 | handle_t *handle; | 3301 | handle_t *handle; |
@@ -3227,18 +3307,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) | |||
3227 | } | 3307 | } |
3228 | 3308 | ||
3229 | error = ext3_orphan_add(handle, inode); | 3309 | error = ext3_orphan_add(handle, inode); |
3310 | if (error) { | ||
3311 | ext3_journal_stop(handle); | ||
3312 | goto err_out; | ||
3313 | } | ||
3230 | EXT3_I(inode)->i_disksize = attr->ia_size; | 3314 | EXT3_I(inode)->i_disksize = attr->ia_size; |
3231 | rc = ext3_mark_inode_dirty(handle, inode); | 3315 | error = ext3_mark_inode_dirty(handle, inode); |
3232 | if (!error) | ||
3233 | error = rc; | ||
3234 | ext3_journal_stop(handle); | 3316 | ext3_journal_stop(handle); |
3317 | if (error) { | ||
3318 | /* Some hard fs error must have happened. Bail out. */ | ||
3319 | ext3_orphan_del(NULL, inode); | ||
3320 | goto err_out; | ||
3321 | } | ||
3322 | rc = ext3_block_truncate_page(inode, attr->ia_size); | ||
3323 | if (rc) { | ||
3324 | /* Cleanup orphan list and exit */ | ||
3325 | handle = ext3_journal_start(inode, 3); | ||
3326 | if (IS_ERR(handle)) { | ||
3327 | ext3_orphan_del(NULL, inode); | ||
3328 | goto err_out; | ||
3329 | } | ||
3330 | ext3_orphan_del(handle, inode); | ||
3331 | ext3_journal_stop(handle); | ||
3332 | goto err_out; | ||
3333 | } | ||
3235 | } | 3334 | } |
3236 | 3335 | ||
3237 | if ((attr->ia_valid & ATTR_SIZE) && | 3336 | if ((attr->ia_valid & ATTR_SIZE) && |
3238 | attr->ia_size != i_size_read(inode)) { | 3337 | attr->ia_size != i_size_read(inode)) { |
3239 | rc = vmtruncate(inode, attr->ia_size); | 3338 | truncate_setsize(inode, attr->ia_size); |
3240 | if (rc) | 3339 | ext3_truncate(inode); |
3241 | goto err_out; | ||
3242 | } | 3340 | } |
3243 | 3341 | ||
3244 | setattr_copy(inode, attr); | 3342 | setattr_copy(inode, attr); |
@@ -3372,6 +3470,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
3372 | int err; | 3470 | int err; |
3373 | 3471 | ||
3374 | might_sleep(); | 3472 | might_sleep(); |
3473 | trace_ext3_mark_inode_dirty(inode, _RET_IP_); | ||
3375 | err = ext3_reserve_inode_write(handle, inode, &iloc); | 3474 | err = ext3_reserve_inode_write(handle, inode, &iloc); |
3376 | if (!err) | 3475 | if (!err) |
3377 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); | 3476 | err = ext3_mark_iloc_dirty(handle, inode, &iloc); |