aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_vnodeops.c
diff options
context:
space:
mode:
authorLachlan McIlroy <lachlan@sgi.com>2007-05-07 23:49:46 -0400
committerTim Shimmin <tes@sgi.com>2007-05-07 23:49:46 -0400
commitba87ea699ebd9dd577bf055ebc4a98200e337542 (patch)
tree713b7d32937372fd7c5b8647f14d0e7262fc7075 /fs/xfs/xfs_vnodeops.c
parent2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1 (diff)
[XFS] Fix to prevent the notorious 'NULL files' problem after a crash.
The problem that has been addressed is that of synchronising updates of the file size with writes that extend a file. Without the fix the update of a file's size, as a result of a write beyond eof, is independent of when the cached data is flushed to disk. Often the file size update would be written to the filesystem log before the data is flushed to disk. When a system crashes between these two events and the filesystem log is replayed on mount the file's size will be set but since the contents never made it to disk the file is full of holes. If some of the cached data was flushed to disk then it may just be a section of the file at the end that has holes. There are existing fixes to help alleviate this problem, particularly in the case where a file has been truncated, that force cached data to be flushed to disk when the file is closed. If the system crashes while the file(s) are still open then this flushing will never occur. The fix that we have implemented is to introduce a second file size, called the in-memory file size, that represents the current file size as viewed by the user. The existing file size, called the on-disk file size, is the one that get's written to the filesystem log and we only update it when it is safe to do so. When we write to a file beyond eof we only update the in- memory file size in the write operation. Later when the I/O operation, that flushes the cached data to disk completes, an I/O completion routine will update the on-disk file size. The on-disk file size will be updated to the maximum offset of the I/O or to the value of the in-memory file size if the I/O includes eof. SGI-PV: 958522 SGI-Modid: xfs-linux-melb:xfs-kern:28322a Signed-off-by: Lachlan McIlroy <lachlan@sgi.com> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_vnodeops.c')
-rw-r--r--fs/xfs/xfs_vnodeops.c40
1 files changed, 20 insertions, 20 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6e49bd362460..e17be3b647be 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -133,7 +133,7 @@ xfs_getattr(
133 if (!(flags & ATTR_LAZY)) 133 if (!(flags & ATTR_LAZY))
134 xfs_ilock(ip, XFS_ILOCK_SHARED); 134 xfs_ilock(ip, XFS_ILOCK_SHARED);
135 135
136 vap->va_size = ip->i_d.di_size; 136 vap->va_size = XFS_ISIZE(ip);
137 if (vap->va_mask == XFS_AT_SIZE) 137 if (vap->va_mask == XFS_AT_SIZE)
138 goto all_done; 138 goto all_done;
139 139
@@ -496,7 +496,7 @@ xfs_setattr(
496 if (mask & XFS_AT_SIZE) { 496 if (mask & XFS_AT_SIZE) {
497 /* Short circuit the truncate case for zero length files */ 497 /* Short circuit the truncate case for zero length files */
498 if ((vap->va_size == 0) && 498 if ((vap->va_size == 0) &&
499 (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { 499 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) {
500 xfs_iunlock(ip, XFS_ILOCK_EXCL); 500 xfs_iunlock(ip, XFS_ILOCK_EXCL);
501 lock_flags &= ~XFS_ILOCK_EXCL; 501 lock_flags &= ~XFS_ILOCK_EXCL;
502 if (mask & XFS_AT_CTIME) 502 if (mask & XFS_AT_CTIME)
@@ -614,7 +614,7 @@ xfs_setattr(
614 */ 614 */
615 if (mask & XFS_AT_SIZE) { 615 if (mask & XFS_AT_SIZE) {
616 code = 0; 616 code = 0;
617 if ((vap->va_size > ip->i_d.di_size) && 617 if ((vap->va_size > ip->i_size) &&
618 (flags & ATTR_NOSIZETOK) == 0) { 618 (flags & ATTR_NOSIZETOK) == 0) {
619 code = xfs_igrow_start(ip, vap->va_size, credp); 619 code = xfs_igrow_start(ip, vap->va_size, credp);
620 } 620 }
@@ -654,10 +654,10 @@ xfs_setattr(
654 * Truncate file. Must have write permission and not be a directory. 654 * Truncate file. Must have write permission and not be a directory.
655 */ 655 */
656 if (mask & XFS_AT_SIZE) { 656 if (mask & XFS_AT_SIZE) {
657 if (vap->va_size > ip->i_d.di_size) { 657 if (vap->va_size > ip->i_size) {
658 xfs_igrow_finish(tp, ip, vap->va_size, 658 xfs_igrow_finish(tp, ip, vap->va_size,
659 !(flags & ATTR_DMI)); 659 !(flags & ATTR_DMI));
660 } else if ((vap->va_size <= ip->i_d.di_size) || 660 } else if ((vap->va_size <= ip->i_size) ||
661 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 661 ((vap->va_size == 0) && ip->i_d.di_nextents)) {
662 /* 662 /*
663 * signal a sync transaction unless 663 * signal a sync transaction unless
@@ -1221,7 +1221,7 @@ xfs_inactive_free_eofblocks(
1221 * Figure out if there are any blocks beyond the end 1221 * Figure out if there are any blocks beyond the end
1222 * of the file. If not, then there is nothing to do. 1222 * of the file. If not, then there is nothing to do.
1223 */ 1223 */
1224 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size)); 1224 end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
1225 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1225 last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1226 map_len = last_fsb - end_fsb; 1226 map_len = last_fsb - end_fsb;
1227 if (map_len <= 0) 1227 if (map_len <= 0)
@@ -1258,7 +1258,7 @@ xfs_inactive_free_eofblocks(
1258 */ 1258 */
1259 xfs_ilock(ip, XFS_IOLOCK_EXCL); 1259 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1260 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 1260 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
1261 ip->i_d.di_size); 1261 ip->i_size);
1262 if (error) { 1262 if (error) {
1263 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1263 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1264 return error; 1264 return error;
@@ -1282,7 +1282,7 @@ xfs_inactive_free_eofblocks(
1282 xfs_trans_ihold(tp, ip); 1282 xfs_trans_ihold(tp, ip);
1283 1283
1284 error = xfs_itruncate_finish(&tp, ip, 1284 error = xfs_itruncate_finish(&tp, ip,
1285 ip->i_d.di_size, 1285 ip->i_size,
1286 XFS_DATA_FORK, 1286 XFS_DATA_FORK,
1287 0); 1287 0);
1288 /* 1288 /*
@@ -1568,7 +1568,7 @@ xfs_release(
1568 1568
1569 if (ip->i_d.di_nlink != 0) { 1569 if (ip->i_d.di_nlink != 0) {
1570 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1570 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1571 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1571 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
1572 ip->i_delayed_blks > 0)) && 1572 ip->i_delayed_blks > 0)) &&
1573 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1573 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1574 (!(ip->i_d.di_flags & 1574 (!(ip->i_d.di_flags &
@@ -1629,8 +1629,8 @@ xfs_inactive(
1629 * only one with a reference to the inode. 1629 * only one with a reference to the inode.
1630 */ 1630 */
1631 truncate = ((ip->i_d.di_nlink == 0) && 1631 truncate = ((ip->i_d.di_nlink == 0) &&
1632 ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0) || 1632 ((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
1633 (ip->i_delayed_blks > 0)) && 1633 (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
1634 ((ip->i_d.di_mode & S_IFMT) == S_IFREG)); 1634 ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
1635 1635
1636 mp = ip->i_mount; 1636 mp = ip->i_mount;
@@ -1648,7 +1648,7 @@ xfs_inactive(
1648 1648
1649 if (ip->i_d.di_nlink != 0) { 1649 if (ip->i_d.di_nlink != 0) {
1650 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && 1650 if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1651 ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 || 1651 ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
1652 ip->i_delayed_blks > 0)) && 1652 ip->i_delayed_blks > 0)) &&
1653 (ip->i_df.if_flags & XFS_IFEXTENTS) && 1653 (ip->i_df.if_flags & XFS_IFEXTENTS) &&
1654 (!(ip->i_d.di_flags & 1654 (!(ip->i_d.di_flags &
@@ -4055,14 +4055,14 @@ xfs_alloc_file_space(
4055 allocatesize_fsb = XFS_B_TO_FSB(mp, count); 4055 allocatesize_fsb = XFS_B_TO_FSB(mp, count);
4056 4056
4057 /* Generate a DMAPI event if needed. */ 4057 /* Generate a DMAPI event if needed. */
4058 if (alloc_type != 0 && offset < ip->i_d.di_size && 4058 if (alloc_type != 0 && offset < ip->i_size &&
4059 (attr_flags&ATTR_DMI) == 0 && 4059 (attr_flags&ATTR_DMI) == 0 &&
4060 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4060 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
4061 xfs_off_t end_dmi_offset; 4061 xfs_off_t end_dmi_offset;
4062 4062
4063 end_dmi_offset = offset+len; 4063 end_dmi_offset = offset+len;
4064 if (end_dmi_offset > ip->i_d.di_size) 4064 if (end_dmi_offset > ip->i_size)
4065 end_dmi_offset = ip->i_d.di_size; 4065 end_dmi_offset = ip->i_size;
4066 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), 4066 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
4067 offset, end_dmi_offset - offset, 4067 offset, end_dmi_offset - offset,
4068 0, NULL); 4068 0, NULL);
@@ -4318,11 +4318,11 @@ xfs_free_file_space(
4318 end_dmi_offset = offset + len; 4318 end_dmi_offset = offset + len;
4319 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 4319 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
4320 4320
4321 if (offset < ip->i_d.di_size && 4321 if (offset < ip->i_size &&
4322 (attr_flags & ATTR_DMI) == 0 && 4322 (attr_flags & ATTR_DMI) == 0 &&
4323 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) { 4323 DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
4324 if (end_dmi_offset > ip->i_d.di_size) 4324 if (end_dmi_offset > ip->i_size)
4325 end_dmi_offset = ip->i_d.di_size; 4325 end_dmi_offset = ip->i_size;
4326 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, 4326 error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
4327 offset, end_dmi_offset - offset, 4327 offset, end_dmi_offset - offset,
4328 AT_DELAY_FLAG(attr_flags), NULL); 4328 AT_DELAY_FLAG(attr_flags), NULL);
@@ -4541,7 +4541,7 @@ xfs_change_file_space(
4541 bf->l_start += offset; 4541 bf->l_start += offset;
4542 break; 4542 break;
4543 case 2: /*SEEK_END*/ 4543 case 2: /*SEEK_END*/
4544 bf->l_start += ip->i_d.di_size; 4544 bf->l_start += ip->i_size;
4545 break; 4545 break;
4546 default: 4546 default:
4547 return XFS_ERROR(EINVAL); 4547 return XFS_ERROR(EINVAL);
@@ -4558,7 +4558,7 @@ xfs_change_file_space(
4558 bf->l_whence = 0; 4558 bf->l_whence = 0;
4559 4559
4560 startoffset = bf->l_start; 4560 startoffset = bf->l_start;
4561 fsize = ip->i_d.di_size; 4561 fsize = ip->i_size;
4562 4562
4563 /* 4563 /*
4564 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve 4564 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve