aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorLachlan McIlroy <lachlan@sgi.com>2007-05-07 23:49:46 -0400
committerTim Shimmin <tes@sgi.com>2007-05-07 23:49:46 -0400
commitba87ea699ebd9dd577bf055ebc4a98200e337542 (patch)
tree713b7d32937372fd7c5b8647f14d0e7262fc7075 /fs/xfs/xfs_inode.c
parent2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1 (diff)
[XFS] Fix to prevent the notorious 'NULL files' problem after a crash.
The problem that has been addressed is that of synchronising updates of the file size with writes that extend a file. Without the fix the update of a file's size, as a result of a write beyond eof, is independent of when the cached data is flushed to disk. Often the file size update would be written to the filesystem log before the data is flushed to disk. When a system crashes between these two events and the filesystem log is replayed on mount the file's size will be set but since the contents never made it to disk the file is full of holes. If some of the cached data was flushed to disk then it may just be a section of the file at the end that has holes. There are existing fixes to help alleviate this problem, particularly in the case where a file has been truncated, that force cached data to be flushed to disk when the file is closed. If the system crashes while the file(s) are still open then this flushing will never occur. The fix that we have implemented is to introduce a second file size, called the in-memory file size, that represents the current file size as viewed by the user. The existing file size, called the on-disk file size, is the one that get's written to the filesystem log and we only update it when it is safe to do so. When we write to a file beyond eof we only update the in- memory file size in the write operation. Later when the I/O operation, that flushes the cached data to disk completes, an I/O completion routine will update the on-disk file size. The on-disk file size will be updated to the maximum offset of the I/O or to the value of the in-memory file size if the I/O includes eof. SGI-PV: 958522 SGI-Modid: xfs-linux-melb:xfs-kern:28322a Signed-off-by: Lachlan McIlroy <lachlan@sgi.com> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c48
1 files changed, 38 insertions, 10 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7d1ab3967b8e..3ca5d43b8345 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -442,6 +442,7 @@ xfs_iformat(
442 return XFS_ERROR(EFSCORRUPTED); 442 return XFS_ERROR(EFSCORRUPTED);
443 } 443 }
444 ip->i_d.di_size = 0; 444 ip->i_d.di_size = 0;
445 ip->i_size = 0;
445 ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); 446 ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
446 break; 447 break;
447 448
@@ -980,6 +981,7 @@ xfs_iread(
980 } 981 }
981 982
982 ip->i_delayed_blks = 0; 983 ip->i_delayed_blks = 0;
984 ip->i_size = ip->i_d.di_size;
983 985
984 /* 986 /*
985 * Mark the buffer containing the inode as something to keep 987 * Mark the buffer containing the inode as something to keep
@@ -1170,6 +1172,7 @@ xfs_ialloc(
1170 } 1172 }
1171 1173
1172 ip->i_d.di_size = 0; 1174 ip->i_d.di_size = 0;
1175 ip->i_size = 0;
1173 ip->i_d.di_nextents = 0; 1176 ip->i_d.di_nextents = 0;
1174 ASSERT(ip->i_d.di_nblocks == 0); 1177 ASSERT(ip->i_d.di_nblocks == 0);
1175 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); 1178 xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD);
@@ -1340,7 +1343,7 @@ xfs_file_last_byte(
1340 } else { 1343 } else {
1341 last_block = 0; 1344 last_block = 0;
1342 } 1345 }
1343 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_d.di_size); 1346 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
1344 last_block = XFS_FILEOFF_MAX(last_block, size_last_block); 1347 last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
1345 1348
1346 last_byte = XFS_FSB_TO_B(mp, last_block); 1349 last_byte = XFS_FSB_TO_B(mp, last_block);
@@ -1434,7 +1437,7 @@ xfs_itruncate_start(
1434 int error = 0; 1437 int error = 0;
1435 1438
1436 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1439 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
1437 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1440 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1438 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1441 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1439 (flags == XFS_ITRUNC_MAYBE)); 1442 (flags == XFS_ITRUNC_MAYBE));
1440 1443
@@ -1558,7 +1561,7 @@ xfs_itruncate_finish(
1558 1561
1559 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1562 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
1560 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 1563 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
1561 ASSERT((new_size == 0) || (new_size <= ip->i_d.di_size)); 1564 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1562 ASSERT(*tp != NULL); 1565 ASSERT(*tp != NULL);
1563 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1566 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
1564 ASSERT(ip->i_transp == *tp); 1567 ASSERT(ip->i_transp == *tp);
@@ -1632,8 +1635,20 @@ xfs_itruncate_finish(
1632 */ 1635 */
1633 if (fork == XFS_DATA_FORK) { 1636 if (fork == XFS_DATA_FORK) {
1634 if (ip->i_d.di_nextents > 0) { 1637 if (ip->i_d.di_nextents > 0) {
1635 ip->i_d.di_size = new_size; 1638 /*
1636 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1639 * If we are not changing the file size then do
1640 * not update the on-disk file size - we may be
1641 * called from xfs_inactive_free_eofblocks(). If we
1642 * update the on-disk file size and then the system
1643 * crashes before the contents of the file are
1644 * flushed to disk then the files may be full of
1645 * holes (ie NULL files bug).
1646 */
1647 if (ip->i_size != new_size) {
1648 ip->i_d.di_size = new_size;
1649 ip->i_size = new_size;
1650 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1651 }
1637 } 1652 }
1638 } else if (sync) { 1653 } else if (sync) {
1639 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); 1654 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
@@ -1769,7 +1784,19 @@ xfs_itruncate_finish(
1769 */ 1784 */
1770 if (fork == XFS_DATA_FORK) { 1785 if (fork == XFS_DATA_FORK) {
1771 xfs_isize_check(mp, ip, new_size); 1786 xfs_isize_check(mp, ip, new_size);
1772 ip->i_d.di_size = new_size; 1787 /*
1788 * If we are not changing the file size then do
1789 * not update the on-disk file size - we may be
1790 * called from xfs_inactive_free_eofblocks(). If we
1791 * update the on-disk file size and then the system
1792 * crashes before the contents of the file are
1793 * flushed to disk then the files may be full of
1794 * holes (ie NULL files bug).
1795 */
1796 if (ip->i_size != new_size) {
1797 ip->i_d.di_size = new_size;
1798 ip->i_size = new_size;
1799 }
1773 } 1800 }
1774 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1801 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1775 ASSERT((new_size != 0) || 1802 ASSERT((new_size != 0) ||
@@ -1802,7 +1829,7 @@ xfs_igrow_start(
1802 1829
1803 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1830 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1804 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1831 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1805 ASSERT(new_size > ip->i_d.di_size); 1832 ASSERT(new_size > ip->i_size);
1806 1833
1807 /* 1834 /*
1808 * Zero any pages that may have been created by 1835 * Zero any pages that may have been created by
@@ -1810,7 +1837,7 @@ xfs_igrow_start(
1810 * and any blocks between the old and new file sizes. 1837 * and any blocks between the old and new file sizes.
1811 */ 1838 */
1812 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size, 1839 error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
1813 ip->i_d.di_size); 1840 ip->i_size);
1814 return error; 1841 return error;
1815} 1842}
1816 1843
@@ -1834,13 +1861,14 @@ xfs_igrow_finish(
1834 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1861 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
1835 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0); 1862 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1836 ASSERT(ip->i_transp == tp); 1863 ASSERT(ip->i_transp == tp);
1837 ASSERT(new_size > ip->i_d.di_size); 1864 ASSERT(new_size > ip->i_size);
1838 1865
1839 /* 1866 /*
1840 * Update the file size. Update the inode change timestamp 1867 * Update the file size. Update the inode change timestamp
1841 * if change_flag set. 1868 * if change_flag set.
1842 */ 1869 */
1843 ip->i_d.di_size = new_size; 1870 ip->i_d.di_size = new_size;
1871 ip->i_size = new_size;
1844 if (change_flag) 1872 if (change_flag)
1845 xfs_ichgtime(ip, XFS_ICHGTIME_CHG); 1873 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1846 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1874 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -2323,7 +2351,7 @@ xfs_ifree(
2323 ASSERT(ip->i_d.di_nlink == 0); 2351 ASSERT(ip->i_d.di_nlink == 0);
2324 ASSERT(ip->i_d.di_nextents == 0); 2352 ASSERT(ip->i_d.di_nextents == 0);
2325 ASSERT(ip->i_d.di_anextents == 0); 2353 ASSERT(ip->i_d.di_anextents == 0);
2326 ASSERT((ip->i_d.di_size == 0) || 2354 ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
2327 ((ip->i_d.di_mode & S_IFMT) != S_IFREG)); 2355 ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
2328 ASSERT(ip->i_d.di_nblocks == 0); 2356 ASSERT(ip->i_d.di_nblocks == 0);
2329 2357