aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext3/inode.c')
-rw-r--r--fs/ext3/inode.c229
1 files changed, 164 insertions, 65 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23c3ba..2ce3c52db32 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,10 +38,12 @@
38#include <linux/bio.h> 38#include <linux/bio.h>
39#include <linux/fiemap.h> 39#include <linux/fiemap.h>
40#include <linux/namei.h> 40#include <linux/namei.h>
41#include <trace/events/ext3.h>
41#include "xattr.h" 42#include "xattr.h"
42#include "acl.h" 43#include "acl.h"
43 44
44static int ext3_writepage_trans_blocks(struct inode *inode); 45static int ext3_writepage_trans_blocks(struct inode *inode);
46static int ext3_block_truncate_page(struct inode *inode, loff_t from);
45 47
46/* 48/*
47 * Test whether an inode is a fast symlink. 49 * Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
70 72
71 might_sleep(); 73 might_sleep();
72 74
75 trace_ext3_forget(inode, is_metadata, blocknr);
73 BUFFER_TRACE(bh, "enter"); 76 BUFFER_TRACE(bh, "enter");
74 77
75 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " 78 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
194 */ 197 */
195void ext3_evict_inode (struct inode *inode) 198void ext3_evict_inode (struct inode *inode)
196{ 199{
200 struct ext3_inode_info *ei = EXT3_I(inode);
197 struct ext3_block_alloc_info *rsv; 201 struct ext3_block_alloc_info *rsv;
198 handle_t *handle; 202 handle_t *handle;
199 int want_delete = 0; 203 int want_delete = 0;
200 204
205 trace_ext3_evict_inode(inode);
201 if (!inode->i_nlink && !is_bad_inode(inode)) { 206 if (!inode->i_nlink && !is_bad_inode(inode)) {
202 dquot_initialize(inode); 207 dquot_initialize(inode);
203 want_delete = 1; 208 want_delete = 1;
204 } 209 }
205 210
211 /*
212 * When journalling data dirty buffers are tracked only in the journal.
213 * So although mm thinks everything is clean and ready for reaping the
214 * inode might still have some pages to write in the running
215 * transaction or waiting to be checkpointed. Thus calling
216 * journal_invalidatepage() (via truncate_inode_pages()) to discard
217 * these buffers can cause data loss. Also even if we did not discard
218 * these buffers, we would have no way to find them after the inode
219 * is reaped and thus user could see stale data if he tries to read
220 * them before the transaction is checkpointed. So be careful and
221 * force everything to disk here... We use ei->i_datasync_tid to
222 * store the newest transaction containing inode's data.
223 *
224 * Note that directories do not have this problem because they don't
225 * use page cache.
226 */
227 if (inode->i_nlink && ext3_should_journal_data(inode) &&
228 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
229 tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
230 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
231
232 log_start_commit(journal, commit_tid);
233 log_wait_commit(journal, commit_tid);
234 filemap_write_and_wait(&inode->i_data);
235 }
206 truncate_inode_pages(&inode->i_data, 0); 236 truncate_inode_pages(&inode->i_data, 0);
207 237
208 ext3_discard_reservation(inode); 238 ext3_discard_reservation(inode);
209 rsv = EXT3_I(inode)->i_block_alloc_info; 239 rsv = ei->i_block_alloc_info;
210 EXT3_I(inode)->i_block_alloc_info = NULL; 240 ei->i_block_alloc_info = NULL;
211 if (unlikely(rsv)) 241 if (unlikely(rsv))
212 kfree(rsv); 242 kfree(rsv);
213 243
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
231 if (inode->i_blocks) 261 if (inode->i_blocks)
232 ext3_truncate(inode); 262 ext3_truncate(inode);
233 /* 263 /*
234 * Kill off the orphan record which ext3_truncate created. 264 * Kill off the orphan record created when the inode lost the last
235 * AKPM: I think this can be inside the above `if'. 265 * link. Note that ext3_orphan_del() has to be able to cope with the
236 * Note that ext3_orphan_del() has to be able to cope with the 266 * deletion of a non-existent orphan - ext3_truncate() could
237 * deletion of a non-existent orphan - this is because we don't 267 * have removed the record.
238 * know if ext3_truncate() actually created an orphan record.
239 * (Well, we could do this if we need to, but heck - it works)
240 */ 268 */
241 ext3_orphan_del(handle, inode); 269 ext3_orphan_del(handle, inode);
242 EXT3_I(inode)->i_dtime = get_seconds(); 270 ei->i_dtime = get_seconds();
243 271
244 /* 272 /*
245 * One subtle ordering requirement: if anything has gone wrong 273 * One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
842 ext3_fsblk_t first_block = 0; 870 ext3_fsblk_t first_block = 0;
843 871
844 872
873 trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
845 J_ASSERT(handle != NULL || create == 0); 874 J_ASSERT(handle != NULL || create == 0);
846 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); 875 depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
847 876
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
886 if (!create || err == -EIO) 915 if (!create || err == -EIO)
887 goto cleanup; 916 goto cleanup;
888 917
918 /*
919 * Block out ext3_truncate while we alter the tree
920 */
889 mutex_lock(&ei->truncate_mutex); 921 mutex_lock(&ei->truncate_mutex);
890 922
891 /* 923 /*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
934 */ 966 */
935 count = ext3_blks_to_allocate(partial, indirect_blks, 967 count = ext3_blks_to_allocate(partial, indirect_blks,
936 maxblocks, blocks_to_boundary); 968 maxblocks, blocks_to_boundary);
937 /*
938 * Block out ext3_truncate while we alter the tree
939 */
940 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, 969 err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
941 offsets + (partial - chain), partial); 970 offsets + (partial - chain), partial);
942 971
@@ -970,6 +999,9 @@ cleanup:
970 } 999 }
971 BUFFER_TRACE(bh_result, "returned"); 1000 BUFFER_TRACE(bh_result, "returned");
972out: 1001out:
1002 trace_ext3_get_blocks_exit(inode, iblock,
1003 depth ? le32_to_cpu(chain[depth-1].key) : 0,
1004 count, err);
973 return err; 1005 return err;
974} 1006}
975 1007
@@ -1102,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
1102 return bh; 1134 return bh;
1103 if (buffer_uptodate(bh)) 1135 if (buffer_uptodate(bh))
1104 return bh; 1136 return bh;
1105 ll_rw_block(READ_META, 1, &bh); 1137 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
1106 wait_on_buffer(bh); 1138 wait_on_buffer(bh);
1107 if (buffer_uptodate(bh)) 1139 if (buffer_uptodate(bh))
1108 return bh; 1140 return bh;
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
1202 ext3_truncate(inode); 1234 ext3_truncate(inode);
1203} 1235}
1204 1236
1237/*
1238 * Truncate blocks that were not used by direct IO write. We have to zero out
1239 * the last file block as well because direct IO might have written to it.
1240 */
1241static void ext3_truncate_failed_direct_write(struct inode *inode)
1242{
1243 ext3_block_truncate_page(inode, inode->i_size);
1244 ext3_truncate(inode);
1245}
1246
1205static int ext3_write_begin(struct file *file, struct address_space *mapping, 1247static int ext3_write_begin(struct file *file, struct address_space *mapping,
1206 loff_t pos, unsigned len, unsigned flags, 1248 loff_t pos, unsigned len, unsigned flags,
1207 struct page **pagep, void **fsdata) 1249 struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
1217 * we allocate blocks but write fails for some reason */ 1259 * we allocate blocks but write fails for some reason */
1218 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; 1260 int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
1219 1261
1262 trace_ext3_write_begin(inode, pos, len, flags);
1263
1220 index = pos >> PAGE_CACHE_SHIFT; 1264 index = pos >> PAGE_CACHE_SHIFT;
1221 from = pos & (PAGE_CACHE_SIZE - 1); 1265 from = pos & (PAGE_CACHE_SIZE - 1);
1222 to = from + len; 1266 to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
1332 unsigned from, to; 1376 unsigned from, to;
1333 int ret = 0, ret2; 1377 int ret = 0, ret2;
1334 1378
1379 trace_ext3_ordered_write_end(inode, pos, len, copied);
1335 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1380 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1336 1381
1337 from = pos & (PAGE_CACHE_SIZE - 1); 1382 from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
1367 struct inode *inode = file->f_mapping->host; 1412 struct inode *inode = file->f_mapping->host;
1368 int ret; 1413 int ret;
1369 1414
1415 trace_ext3_writeback_write_end(inode, pos, len, copied);
1370 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 1416 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
1371 update_file_sizes(inode, pos, copied); 1417 update_file_sizes(inode, pos, copied);
1372 /* 1418 /*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
1391{ 1437{
1392 handle_t *handle = ext3_journal_current_handle(); 1438 handle_t *handle = ext3_journal_current_handle();
1393 struct inode *inode = mapping->host; 1439 struct inode *inode = mapping->host;
1440 struct ext3_inode_info *ei = EXT3_I(inode);
1394 int ret = 0, ret2; 1441 int ret = 0, ret2;
1395 int partial = 0; 1442 int partial = 0;
1396 unsigned from, to; 1443 unsigned from, to;
1397 1444
1445 trace_ext3_journalled_write_end(inode, pos, len, copied);
1398 from = pos & (PAGE_CACHE_SIZE - 1); 1446 from = pos & (PAGE_CACHE_SIZE - 1);
1399 to = from + len; 1447 to = from + len;
1400 1448
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
1419 if (pos + len > inode->i_size && ext3_can_truncate(inode)) 1467 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1420 ext3_orphan_add(handle, inode); 1468 ext3_orphan_add(handle, inode);
1421 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1469 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1422 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1470 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
1423 EXT3_I(inode)->i_disksize = inode->i_size; 1471 if (inode->i_size > ei->i_disksize) {
1472 ei->i_disksize = inode->i_size;
1424 ret2 = ext3_mark_inode_dirty(handle, inode); 1473 ret2 = ext3_mark_inode_dirty(handle, inode);
1425 if (!ret) 1474 if (!ret)
1426 ret = ret2; 1475 ret = ret2;
@@ -1568,7 +1617,13 @@ static int ext3_ordered_writepage(struct page *page,
1568 int err; 1617 int err;
1569 1618
1570 J_ASSERT(PageLocked(page)); 1619 J_ASSERT(PageLocked(page));
1571 WARN_ON_ONCE(IS_RDONLY(inode)); 1620 /*
1621 * We don't want to warn for emergency remount. The condition is
1622 * ordered to avoid dereferencing inode->i_sb in non-error case to
1623 * avoid slow-downs.
1624 */
1625 WARN_ON_ONCE(IS_RDONLY(inode) &&
1626 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1572 1627
1573 /* 1628 /*
1574 * We give up here if we're reentered, because it might be for a 1629 * We give up here if we're reentered, because it might be for a
@@ -1577,6 +1632,7 @@ static int ext3_ordered_writepage(struct page *page,
1577 if (ext3_journal_current_handle()) 1632 if (ext3_journal_current_handle())
1578 goto out_fail; 1633 goto out_fail;
1579 1634
1635 trace_ext3_ordered_writepage(page);
1580 if (!page_has_buffers(page)) { 1636 if (!page_has_buffers(page)) {
1581 create_empty_buffers(page, inode->i_sb->s_blocksize, 1637 create_empty_buffers(page, inode->i_sb->s_blocksize,
1582 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1638 (1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1642,11 +1698,18 @@ static int ext3_writeback_writepage(struct page *page,
1642 int err; 1698 int err;
1643 1699
1644 J_ASSERT(PageLocked(page)); 1700 J_ASSERT(PageLocked(page));
1645 WARN_ON_ONCE(IS_RDONLY(inode)); 1701 /*
1702 * We don't want to warn for emergency remount. The condition is
1703 * ordered to avoid dereferencing inode->i_sb in non-error case to
1704 * avoid slow-downs.
1705 */
1706 WARN_ON_ONCE(IS_RDONLY(inode) &&
1707 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1646 1708
1647 if (ext3_journal_current_handle()) 1709 if (ext3_journal_current_handle())
1648 goto out_fail; 1710 goto out_fail;
1649 1711
1712 trace_ext3_writeback_writepage(page);
1650 if (page_has_buffers(page)) { 1713 if (page_has_buffers(page)) {
1651 if (!walk_page_buffers(NULL, page_buffers(page), 0, 1714 if (!walk_page_buffers(NULL, page_buffers(page), 0,
1652 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { 1715 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1684,11 +1747,18 @@ static int ext3_journalled_writepage(struct page *page,
1684 int err; 1747 int err;
1685 1748
1686 J_ASSERT(PageLocked(page)); 1749 J_ASSERT(PageLocked(page));
1687 WARN_ON_ONCE(IS_RDONLY(inode)); 1750 /*
1751 * We don't want to warn for emergency remount. The condition is
1752 * ordered to avoid dereferencing inode->i_sb in non-error case to
1753 * avoid slow-downs.
1754 */
1755 WARN_ON_ONCE(IS_RDONLY(inode) &&
1756 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1688 1757
1689 if (ext3_journal_current_handle()) 1758 if (ext3_journal_current_handle())
1690 goto no_write; 1759 goto no_write;
1691 1760
1761 trace_ext3_journalled_writepage(page);
1692 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1762 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1693 if (IS_ERR(handle)) { 1763 if (IS_ERR(handle)) {
1694 ret = PTR_ERR(handle); 1764 ret = PTR_ERR(handle);
@@ -1715,6 +1785,8 @@ static int ext3_journalled_writepage(struct page *page,
1715 if (ret == 0) 1785 if (ret == 0)
1716 ret = err; 1786 ret = err;
1717 ext3_set_inode_state(inode, EXT3_STATE_JDATA); 1787 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
1788 atomic_set(&EXT3_I(inode)->i_datasync_tid,
1789 handle->h_transaction->t_tid);
1718 unlock_page(page); 1790 unlock_page(page);
1719 } else { 1791 } else {
1720 /* 1792 /*
@@ -1739,6 +1811,7 @@ out_unlock:
1739 1811
1740static int ext3_readpage(struct file *file, struct page *page) 1812static int ext3_readpage(struct file *file, struct page *page)
1741{ 1813{
1814 trace_ext3_readpage(page);
1742 return mpage_readpage(page, ext3_get_block); 1815 return mpage_readpage(page, ext3_get_block);
1743} 1816}
1744 1817
@@ -1753,6 +1826,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
1753{ 1826{
1754 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1827 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1755 1828
1829 trace_ext3_invalidatepage(page, offset);
1830
1756 /* 1831 /*
1757 * If it's a full truncate we just forget about the pending dirtying 1832 * If it's a full truncate we just forget about the pending dirtying
1758 */ 1833 */
@@ -1766,6 +1841,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
1766{ 1841{
1767 journal_t *journal = EXT3_JOURNAL(page->mapping->host); 1842 journal_t *journal = EXT3_JOURNAL(page->mapping->host);
1768 1843
1844 trace_ext3_releasepage(page);
1769 WARN_ON(PageChecked(page)); 1845 WARN_ON(PageChecked(page));
1770 if (!page_has_buffers(page)) 1846 if (!page_has_buffers(page))
1771 return 0; 1847 return 0;
@@ -1794,6 +1870,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1794 size_t count = iov_length(iov, nr_segs); 1870 size_t count = iov_length(iov, nr_segs);
1795 int retries = 0; 1871 int retries = 0;
1796 1872
1873 trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
1874
1797 if (rw == WRITE) { 1875 if (rw == WRITE) {
1798 loff_t final_size = offset + count; 1876 loff_t final_size = offset + count;
1799 1877
@@ -1816,9 +1894,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1816 } 1894 }
1817 1895
1818retry: 1896retry:
1819 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1897 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
1820 offset, nr_segs, 1898 ext3_get_block);
1821 ext3_get_block, NULL);
1822 /* 1899 /*
1823 * In case of error extending write may have instantiated a few 1900 * In case of error extending write may have instantiated a few
1824 * blocks outside i_size. Trim these off again. 1901 * blocks outside i_size. Trim these off again.
@@ -1828,7 +1905,7 @@ retry:
1828 loff_t end = offset + iov_length(iov, nr_segs); 1905 loff_t end = offset + iov_length(iov, nr_segs);
1829 1906
1830 if (end > isize) 1907 if (end > isize)
1831 vmtruncate(inode, isize); 1908 ext3_truncate_failed_direct_write(inode);
1832 } 1909 }
1833 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1910 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1834 goto retry; 1911 goto retry;
@@ -1842,7 +1919,7 @@ retry:
1842 /* This is really bad luck. We've written the data 1919 /* This is really bad luck. We've written the data
1843 * but cannot extend i_size. Truncate allocated blocks 1920 * but cannot extend i_size. Truncate allocated blocks
1844 * and pretend the write failed... */ 1921 * and pretend the write failed... */
1845 ext3_truncate(inode); 1922 ext3_truncate_failed_direct_write(inode);
1846 ret = PTR_ERR(handle); 1923 ret = PTR_ERR(handle);
1847 goto out; 1924 goto out;
1848 } 1925 }
@@ -1868,6 +1945,8 @@ retry:
1868 ret = err; 1945 ret = err;
1869 } 1946 }
1870out: 1947out:
1948 trace_ext3_direct_IO_exit(inode, offset,
1949 iov_length(iov, nr_segs), rw, ret);
1871 return ret; 1950 return ret;
1872} 1951}
1873 1952
@@ -1950,17 +2029,24 @@ void ext3_set_aops(struct inode *inode)
1950 * This required during truncate. We need to physically zero the tail end 2029 * This required during truncate. We need to physically zero the tail end
1951 * of that block so it doesn't yield old data if the file is later grown. 2030 * of that block so it doesn't yield old data if the file is later grown.
1952 */ 2031 */
1953static int ext3_block_truncate_page(handle_t *handle, struct page *page, 2032static int ext3_block_truncate_page(struct inode *inode, loff_t from)
1954 struct address_space *mapping, loff_t from)
1955{ 2033{
1956 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; 2034 ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
1957 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2035 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
1958 unsigned blocksize, iblock, length, pos; 2036 unsigned blocksize, iblock, length, pos;
1959 struct inode *inode = mapping->host; 2037 struct page *page;
2038 handle_t *handle = NULL;
1960 struct buffer_head *bh; 2039 struct buffer_head *bh;
1961 int err = 0; 2040 int err = 0;
1962 2041
2042 /* Truncated on block boundary - nothing to do */
1963 blocksize = inode->i_sb->s_blocksize; 2043 blocksize = inode->i_sb->s_blocksize;
2044 if ((from & (blocksize - 1)) == 0)
2045 return 0;
2046
2047 page = grab_cache_page(inode->i_mapping, index);
2048 if (!page)
2049 return -ENOMEM;
1964 length = blocksize - (offset & (blocksize - 1)); 2050 length = blocksize - (offset & (blocksize - 1));
1965 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 2051 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1966 2052
@@ -2005,11 +2091,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
2005 goto unlock; 2091 goto unlock;
2006 } 2092 }
2007 2093
2094 /* data=writeback mode doesn't need transaction to zero-out data */
2095 if (!ext3_should_writeback_data(inode)) {
2096 /* We journal at most one block */
2097 handle = ext3_journal_start(inode, 1);
2098 if (IS_ERR(handle)) {
2099 clear_highpage(page);
2100 flush_dcache_page(page);
2101 err = PTR_ERR(handle);
2102 goto unlock;
2103 }
2104 }
2105
2008 if (ext3_should_journal_data(inode)) { 2106 if (ext3_should_journal_data(inode)) {
2009 BUFFER_TRACE(bh, "get write access"); 2107 BUFFER_TRACE(bh, "get write access");
2010 err = ext3_journal_get_write_access(handle, bh); 2108 err = ext3_journal_get_write_access(handle, bh);
2011 if (err) 2109 if (err)
2012 goto unlock; 2110 goto stop;
2013 } 2111 }
2014 2112
2015 zero_user(page, offset, length); 2113 zero_user(page, offset, length);
@@ -2023,6 +2121,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
2023 err = ext3_journal_dirty_data(handle, bh); 2121 err = ext3_journal_dirty_data(handle, bh);
2024 mark_buffer_dirty(bh); 2122 mark_buffer_dirty(bh);
2025 } 2123 }
2124stop:
2125 if (handle)
2126 ext3_journal_stop(handle);
2026 2127
2027unlock: 2128unlock:
2028 unlock_page(page); 2129 unlock_page(page);
@@ -2391,8 +2492,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2391 2492
2392int ext3_can_truncate(struct inode *inode) 2493int ext3_can_truncate(struct inode *inode)
2393{ 2494{
2394 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2395 return 0;
2396 if (S_ISREG(inode->i_mode)) 2495 if (S_ISREG(inode->i_mode))
2397 return 1; 2496 return 1;
2398 if (S_ISDIR(inode->i_mode)) 2497 if (S_ISDIR(inode->i_mode))
@@ -2436,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
2436 struct ext3_inode_info *ei = EXT3_I(inode); 2535 struct ext3_inode_info *ei = EXT3_I(inode);
2437 __le32 *i_data = ei->i_data; 2536 __le32 *i_data = ei->i_data;
2438 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); 2537 int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
2439 struct address_space *mapping = inode->i_mapping;
2440 int offsets[4]; 2538 int offsets[4];
2441 Indirect chain[4]; 2539 Indirect chain[4];
2442 Indirect *partial; 2540 Indirect *partial;
@@ -2444,7 +2542,8 @@ void ext3_truncate(struct inode *inode)
2444 int n; 2542 int n;
2445 long last_block; 2543 long last_block;
2446 unsigned blocksize = inode->i_sb->s_blocksize; 2544 unsigned blocksize = inode->i_sb->s_blocksize;
2447 struct page *page; 2545
2546 trace_ext3_truncate_enter(inode);
2448 2547
2449 if (!ext3_can_truncate(inode)) 2548 if (!ext3_can_truncate(inode))
2450 goto out_notrans; 2549 goto out_notrans;
@@ -2452,37 +2551,12 @@ void ext3_truncate(struct inode *inode)
2452 if (inode->i_size == 0 && ext3_should_writeback_data(inode)) 2551 if (inode->i_size == 0 && ext3_should_writeback_data(inode))
2453 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); 2552 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
2454 2553
2455 /*
2456 * We have to lock the EOF page here, because lock_page() nests
2457 * outside journal_start().
2458 */
2459 if ((inode->i_size & (blocksize - 1)) == 0) {
2460 /* Block boundary? Nothing to do */
2461 page = NULL;
2462 } else {
2463 page = grab_cache_page(mapping,
2464 inode->i_size >> PAGE_CACHE_SHIFT);
2465 if (!page)
2466 goto out_notrans;
2467 }
2468
2469 handle = start_transaction(inode); 2554 handle = start_transaction(inode);
2470 if (IS_ERR(handle)) { 2555 if (IS_ERR(handle))
2471 if (page) {
2472 clear_highpage(page);
2473 flush_dcache_page(page);
2474 unlock_page(page);
2475 page_cache_release(page);
2476 }
2477 goto out_notrans; 2556 goto out_notrans;
2478 }
2479 2557
2480 last_block = (inode->i_size + blocksize-1) 2558 last_block = (inode->i_size + blocksize-1)
2481 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); 2559 >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
2482
2483 if (page)
2484 ext3_block_truncate_page(handle, page, mapping, inode->i_size);
2485
2486 n = ext3_block_to_path(inode, last_block, offsets, NULL); 2560 n = ext3_block_to_path(inode, last_block, offsets, NULL);
2487 if (n == 0) 2561 if (n == 0)
2488 goto out_stop; /* error */ 2562 goto out_stop; /* error */
@@ -2597,6 +2671,7 @@ out_stop:
2597 ext3_orphan_del(handle, inode); 2671 ext3_orphan_del(handle, inode);
2598 2672
2599 ext3_journal_stop(handle); 2673 ext3_journal_stop(handle);
2674 trace_ext3_truncate_exit(inode);
2600 return; 2675 return;
2601out_notrans: 2676out_notrans:
2602 /* 2677 /*
@@ -2605,6 +2680,7 @@ out_notrans:
2605 */ 2680 */
2606 if (inode->i_nlink) 2681 if (inode->i_nlink)
2607 ext3_orphan_del(NULL, inode); 2682 ext3_orphan_del(NULL, inode);
2683 trace_ext3_truncate_exit(inode);
2608} 2684}
2609 2685
2610static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2686static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2746,9 +2822,10 @@ make_io:
2746 * has in-inode xattrs, or we don't have this inode in memory. 2822 * has in-inode xattrs, or we don't have this inode in memory.
2747 * Read the block from disk. 2823 * Read the block from disk.
2748 */ 2824 */
2825 trace_ext3_load_inode(inode);
2749 get_bh(bh); 2826 get_bh(bh);
2750 bh->b_end_io = end_buffer_read_sync; 2827 bh->b_end_io = end_buffer_read_sync;
2751 submit_bh(READ_META, bh); 2828 submit_bh(READ | REQ_META | REQ_PRIO, bh);
2752 wait_on_buffer(bh); 2829 wait_on_buffer(bh);
2753 if (!buffer_uptodate(bh)) { 2830 if (!buffer_uptodate(bh)) {
2754 ext3_error(inode->i_sb, "ext3_get_inode_loc", 2831 ext3_error(inode->i_sb, "ext3_get_inode_loc",
@@ -3216,6 +3293,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3216 ext3_journal_stop(handle); 3293 ext3_journal_stop(handle);
3217 } 3294 }
3218 3295
3296 if (attr->ia_valid & ATTR_SIZE)
3297 inode_dio_wait(inode);
3298
3219 if (S_ISREG(inode->i_mode) && 3299 if (S_ISREG(inode->i_mode) &&
3220 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 3300 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
3221 handle_t *handle; 3301 handle_t *handle;
@@ -3227,18 +3307,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3227 } 3307 }
3228 3308
3229 error = ext3_orphan_add(handle, inode); 3309 error = ext3_orphan_add(handle, inode);
3310 if (error) {
3311 ext3_journal_stop(handle);
3312 goto err_out;
3313 }
3230 EXT3_I(inode)->i_disksize = attr->ia_size; 3314 EXT3_I(inode)->i_disksize = attr->ia_size;
3231 rc = ext3_mark_inode_dirty(handle, inode); 3315 error = ext3_mark_inode_dirty(handle, inode);
3232 if (!error)
3233 error = rc;
3234 ext3_journal_stop(handle); 3316 ext3_journal_stop(handle);
3317 if (error) {
3318 /* Some hard fs error must have happened. Bail out. */
3319 ext3_orphan_del(NULL, inode);
3320 goto err_out;
3321 }
3322 rc = ext3_block_truncate_page(inode, attr->ia_size);
3323 if (rc) {
3324 /* Cleanup orphan list and exit */
3325 handle = ext3_journal_start(inode, 3);
3326 if (IS_ERR(handle)) {
3327 ext3_orphan_del(NULL, inode);
3328 goto err_out;
3329 }
3330 ext3_orphan_del(handle, inode);
3331 ext3_journal_stop(handle);
3332 goto err_out;
3333 }
3235 } 3334 }
3236 3335
3237 if ((attr->ia_valid & ATTR_SIZE) && 3336 if ((attr->ia_valid & ATTR_SIZE) &&
3238 attr->ia_size != i_size_read(inode)) { 3337 attr->ia_size != i_size_read(inode)) {
3239 rc = vmtruncate(inode, attr->ia_size); 3338 truncate_setsize(inode, attr->ia_size);
3240 if (rc) 3339 ext3_truncate(inode);
3241 goto err_out;
3242 } 3340 }
3243 3341
3244 setattr_copy(inode, attr); 3342 setattr_copy(inode, attr);
@@ -3372,6 +3470,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
3372 int err; 3470 int err;
3373 3471
3374 might_sleep(); 3472 might_sleep();
3473 trace_ext3_mark_inode_dirty(inode, _RET_IP_);
3375 err = ext3_reserve_inode_write(handle, inode, &iloc); 3474 err = ext3_reserve_inode_write(handle, inode, &iloc);
3376 if (!err) 3475 if (!err)
3377 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 3476 err = ext3_mark_iloc_dirty(handle, inode, &iloc);