diff options
author | Christoph Hellwig <hch@lst.de> | 2011-07-08 08:34:34 -0400 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2011-07-08 08:34:34 -0400 |
commit | 8f04c47aa9712874af2c8816c2ca2a332cba80e4 (patch) | |
tree | 56f76e7d1443759ed68c6720e7f242950e220f8c | |
parent | 857b9778d86ccba7d7b42c9d8aeecde794ec8a6b (diff) |
xfs: split xfs_itruncate_finish
Split the guts of xfs_itruncate_finish that loop over the existing extents
and calls xfs_bunmapi on them into a new helper, xfs_itruncate_externs.
Make xfs_attr_inactive call it directly instead of xfs_itruncate_finish,
which allows to simplify the latter a lot, by only letting it deal with
the data fork. As a result xfs_itruncate_finish is renamed to
xfs_itruncate_data to make its use case more obvious.
Also remove the sync parameter from xfs_itruncate_data, which has been
unessecary since the introduction of the busy extent list in 2002, and
completely dead code since 2003 when the XFS_BMAPI_ASYNC parameter was
made a no-op.
I can't actually see why the xfs_attr_inactive needs to set the transaction
sync, but let's keep this patch simple and without changes in behaviour.
Also avoid passing a useless argument to xfs_isize_check, and make it
private to xfs_inode.c.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 10 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 4 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm_syscalls.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_attr.c | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 357 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 24 |
7 files changed, 155 insertions, 277 deletions
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 5a0fcb09fc7e..501e4f630548 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -879,15 +879,7 @@ xfs_setattr_size( | |||
879 | ip->i_size = iattr->ia_size; | 879 | ip->i_size = iattr->ia_size; |
880 | } else if (iattr->ia_size <= ip->i_size || | 880 | } else if (iattr->ia_size <= ip->i_size || |
881 | (iattr->ia_size == 0 && ip->i_d.di_nextents)) { | 881 | (iattr->ia_size == 0 && ip->i_d.di_nextents)) { |
882 | /* | 882 | error = xfs_itruncate_data(&tp, ip, iattr->ia_size); |
883 | * Signal a sync transaction unless we are truncating an | ||
884 | * already unlinked file on a wsync filesystem. | ||
885 | */ | ||
886 | error = xfs_itruncate_finish(&tp, ip, iattr->ia_size, | ||
887 | XFS_DATA_FORK, | ||
888 | ((ip->i_d.di_nlink != 0 || | ||
889 | !(mp->m_flags & XFS_MOUNT_WSYNC)) | ||
890 | ? 1 : 0)); | ||
891 | if (error) | 883 | if (error) |
892 | goto out_trans_abort; | 884 | goto out_trans_abort; |
893 | 885 | ||
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index cac41e423451..4fe53f9f0477 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1055,8 +1055,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, | |||
1055 | DEFINE_EVENT(xfs_itrunc_class, name, \ | 1055 | DEFINE_EVENT(xfs_itrunc_class, name, \ |
1056 | TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ | 1056 | TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ |
1057 | TP_ARGS(ip, new_size)) | 1057 | TP_ARGS(ip, new_size)) |
1058 | DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); | 1058 | DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); |
1059 | DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); | 1059 | DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); |
1060 | 1060 | ||
1061 | TRACE_EVENT(xfs_pagecache_inval, | 1061 | TRACE_EVENT(xfs_pagecache_inval, |
1062 | TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), | 1062 | TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 2dadb15d5ca9..f2dfc74ccf34 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -263,7 +263,7 @@ xfs_qm_scall_trunc_qfile( | |||
263 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 263 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
264 | xfs_trans_ijoin(tp, ip); | 264 | xfs_trans_ijoin(tp, ip); |
265 | 265 | ||
266 | error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1); | 266 | error = xfs_itruncate_data(&tp, ip, 0); |
267 | if (error) { | 267 | if (error) { |
268 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | | 268 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | |
269 | XFS_TRANS_ABORT); | 269 | XFS_TRANS_ABORT); |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 01d2072fb6d4..795d5aac7042 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -822,17 +822,21 @@ xfs_attr_inactive(xfs_inode_t *dp) | |||
822 | error = xfs_attr_root_inactive(&trans, dp); | 822 | error = xfs_attr_root_inactive(&trans, dp); |
823 | if (error) | 823 | if (error) |
824 | goto out; | 824 | goto out; |
825 | |||
825 | /* | 826 | /* |
826 | * signal synchronous inactive transactions unless this | 827 | * Signal synchronous inactive transactions unless this is a |
827 | * is a synchronous mount filesystem in which case we | 828 | * synchronous mount filesystem in which case we know that we're here |
828 | * know that we're here because we've been called out of | 829 | * because we've been called out of xfs_inactive which means that the |
829 | * xfs_inactive which means that the last reference is gone | 830 | * last reference is gone and the unlink transaction has already hit |
830 | * and the unlink transaction has already hit the disk so | 831 | * the disk so async inactive transactions are safe. |
831 | * async inactive transactions are safe. | ||
832 | */ | 832 | */ |
833 | if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, | 833 | if (!(mp->m_flags & XFS_MOUNT_WSYNC)) { |
834 | (!(mp->m_flags & XFS_MOUNT_WSYNC) | 834 | if (dp->i_d.di_anextents > 0) |
835 | ? 1 : 0)))) | 835 | xfs_trans_set_sync(trans); |
836 | } | ||
837 | |||
838 | error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); | ||
839 | if (error) | ||
836 | goto out; | 840 | goto out; |
837 | 841 | ||
838 | /* | 842 | /* |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 82a282ab63dc..aa143b870afb 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -52,7 +52,7 @@ kmem_zone_t *xfs_ifork_zone; | |||
52 | kmem_zone_t *xfs_inode_zone; | 52 | kmem_zone_t *xfs_inode_zone; |
53 | 53 | ||
54 | /* | 54 | /* |
55 | * Used in xfs_itruncate(). This is the maximum number of extents | 55 | * Used in xfs_itruncate_extents(). This is the maximum number of extents |
56 | * freed from a file in a single transaction. | 56 | * freed from a file in a single transaction. |
57 | */ | 57 | */ |
58 | #define XFS_ITRUNC_MAX_EXTENTS 2 | 58 | #define XFS_ITRUNC_MAX_EXTENTS 2 |
@@ -1179,15 +1179,15 @@ xfs_ialloc( | |||
1179 | * at least do it for regular files. | 1179 | * at least do it for regular files. |
1180 | */ | 1180 | */ |
1181 | #ifdef DEBUG | 1181 | #ifdef DEBUG |
1182 | void | 1182 | STATIC void |
1183 | xfs_isize_check( | 1183 | xfs_isize_check( |
1184 | xfs_mount_t *mp, | 1184 | struct xfs_inode *ip, |
1185 | xfs_inode_t *ip, | 1185 | xfs_fsize_t isize) |
1186 | xfs_fsize_t isize) | ||
1187 | { | 1186 | { |
1188 | xfs_fileoff_t map_first; | 1187 | struct xfs_mount *mp = ip->i_mount; |
1189 | int nimaps; | 1188 | xfs_fileoff_t map_first; |
1190 | xfs_bmbt_irec_t imaps[2]; | 1189 | int nimaps; |
1190 | xfs_bmbt_irec_t imaps[2]; | ||
1191 | 1191 | ||
1192 | if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) | 1192 | if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) |
1193 | return; | 1193 | return; |
@@ -1214,11 +1214,14 @@ xfs_isize_check( | |||
1214 | ASSERT(nimaps == 1); | 1214 | ASSERT(nimaps == 1); |
1215 | ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); | 1215 | ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); |
1216 | } | 1216 | } |
1217 | #else /* DEBUG */ | ||
1218 | #define xfs_isize_check(ip, isize) | ||
1217 | #endif /* DEBUG */ | 1219 | #endif /* DEBUG */ |
1218 | 1220 | ||
1219 | /* | 1221 | /* |
1220 | * Free up the underlying blocks past new_size. The new size must be | 1222 | * Free up the underlying blocks past new_size. The new size must be smaller |
1221 | * smaller than the current size. | 1223 | * than the current size. This routine can be used both for the attribute and |
1224 | * data fork, and does not modify the inode size, which is left to the caller. | ||
1222 | * | 1225 | * |
1223 | * The transaction passed to this routine must have made a permanent log | 1226 | * The transaction passed to this routine must have made a permanent log |
1224 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the | 1227 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the |
@@ -1230,31 +1233,6 @@ xfs_isize_check( | |||
1230 | * will be "held" within the returned transaction. This routine does NOT | 1233 | * will be "held" within the returned transaction. This routine does NOT |
1231 | * require any disk space to be reserved for it within the transaction. | 1234 | * require any disk space to be reserved for it within the transaction. |
1232 | * | 1235 | * |
1233 | * The fork parameter must be either XFS_ATTR_FORK or XFS_DATA_FORK, and it | ||
1234 | * indicates the fork which is to be truncated. For the attribute fork we only | ||
1235 | * support truncation to size 0. | ||
1236 | * | ||
1237 | * We use the sync parameter to indicate whether or not the first transaction | ||
1238 | * we perform might have to be synchronous. For the attr fork, it needs to be | ||
1239 | * so if the unlink of the inode is not yet known to be permanent in the log. | ||
1240 | * This keeps us from freeing and reusing the blocks of the attribute fork | ||
1241 | * before the unlink of the inode becomes permanent. | ||
1242 | * | ||
1243 | * For the data fork, we normally have to run synchronously if we're being | ||
1244 | * called out of the inactive path or we're being called out of the create path | ||
1245 | * where we're truncating an existing file. Either way, the truncate needs to | ||
1246 | * be sync so blocks don't reappear in the file with altered data in case of a | ||
1247 | * crash. wsync filesystems can run the first case async because anything that | ||
1248 | * shrinks the inode has to run sync so by the time we're called here from | ||
1249 | * inactive, the inode size is permanently set to 0. | ||
1250 | * | ||
1251 | * Calls from the truncate path always need to be sync unless we're in a wsync | ||
1252 | * filesystem and the file has already been unlinked. | ||
1253 | * | ||
1254 | * The caller is responsible for correctly setting the sync parameter. It gets | ||
1255 | * too hard for us to guess here which path we're being called out of just | ||
1256 | * based on inode state. | ||
1257 | * | ||
1258 | * If we get an error, we must return with the inode locked and linked into the | 1236 | * If we get an error, we must return with the inode locked and linked into the |
1259 | * current transaction. This keeps things simple for the higher level code, | 1237 | * current transaction. This keeps things simple for the higher level code, |
1260 | * because it always knows that the inode is locked and held in the transaction | 1238 | * because it always knows that the inode is locked and held in the transaction |
@@ -1262,124 +1240,31 @@ xfs_isize_check( | |||
1262 | * dirty on error so that transactions can be easily aborted if possible. | 1240 | * dirty on error so that transactions can be easily aborted if possible. |
1263 | */ | 1241 | */ |
1264 | int | 1242 | int |
1265 | xfs_itruncate_finish( | 1243 | xfs_itruncate_extents( |
1266 | xfs_trans_t **tp, | 1244 | struct xfs_trans **tpp, |
1267 | xfs_inode_t *ip, | 1245 | struct xfs_inode *ip, |
1268 | xfs_fsize_t new_size, | 1246 | int whichfork, |
1269 | int fork, | 1247 | xfs_fsize_t new_size) |
1270 | int sync) | ||
1271 | { | 1248 | { |
1272 | xfs_fsblock_t first_block; | 1249 | struct xfs_mount *mp = ip->i_mount; |
1273 | xfs_fileoff_t first_unmap_block; | 1250 | struct xfs_trans *tp = *tpp; |
1274 | xfs_fileoff_t last_block; | 1251 | struct xfs_trans *ntp; |
1275 | xfs_filblks_t unmap_len=0; | 1252 | xfs_bmap_free_t free_list; |
1276 | xfs_mount_t *mp; | 1253 | xfs_fsblock_t first_block; |
1277 | xfs_trans_t *ntp; | 1254 | xfs_fileoff_t first_unmap_block; |
1278 | int done; | 1255 | xfs_fileoff_t last_block; |
1279 | int committed; | 1256 | xfs_filblks_t unmap_len; |
1280 | xfs_bmap_free_t free_list; | 1257 | int committed; |
1281 | int error; | 1258 | int error = 0; |
1259 | int done = 0; | ||
1282 | 1260 | ||
1283 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); | 1261 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); |
1284 | ASSERT((new_size == 0) || (new_size <= ip->i_size)); | 1262 | ASSERT(new_size <= ip->i_size); |
1285 | ASSERT(*tp != NULL); | 1263 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
1286 | ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); | 1264 | ASSERT(ip->i_transp == tp); |
1287 | ASSERT(ip->i_transp == *tp); | ||
1288 | ASSERT(ip->i_itemp != NULL); | 1265 | ASSERT(ip->i_itemp != NULL); |
1289 | ASSERT(ip->i_itemp->ili_lock_flags == 0); | 1266 | ASSERT(ip->i_itemp->ili_lock_flags == 0); |
1290 | 1267 | ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); | |
1291 | |||
1292 | ntp = *tp; | ||
1293 | mp = (ntp)->t_mountp; | ||
1294 | ASSERT(! XFS_NOT_DQATTACHED(mp, ip)); | ||
1295 | |||
1296 | /* | ||
1297 | * We only support truncating the entire attribute fork. | ||
1298 | */ | ||
1299 | if (fork == XFS_ATTR_FORK) { | ||
1300 | new_size = 0LL; | ||
1301 | } | ||
1302 | first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); | ||
1303 | trace_xfs_itruncate_finish_start(ip, new_size); | ||
1304 | |||
1305 | /* | ||
1306 | * The first thing we do is set the size to new_size permanently | ||
1307 | * on disk. This way we don't have to worry about anyone ever | ||
1308 | * being able to look at the data being freed even in the face | ||
1309 | * of a crash. What we're getting around here is the case where | ||
1310 | * we free a block, it is allocated to another file, it is written | ||
1311 | * to, and then we crash. If the new data gets written to the | ||
1312 | * file but the log buffers containing the free and reallocation | ||
1313 | * don't, then we'd end up with garbage in the blocks being freed. | ||
1314 | * As long as we make the new_size permanent before actually | ||
1315 | * freeing any blocks it doesn't matter if they get written to. | ||
1316 | * | ||
1317 | * The callers must signal into us whether or not the size | ||
1318 | * setting here must be synchronous. There are a few cases | ||
1319 | * where it doesn't have to be synchronous. Those cases | ||
1320 | * occur if the file is unlinked and we know the unlink is | ||
1321 | * permanent or if the blocks being truncated are guaranteed | ||
1322 | * to be beyond the inode eof (regardless of the link count) | ||
1323 | * and the eof value is permanent. Both of these cases occur | ||
1324 | * only on wsync-mounted filesystems. In those cases, we're | ||
1325 | * guaranteed that no user will ever see the data in the blocks | ||
1326 | * that are being truncated so the truncate can run async. | ||
1327 | * In the free beyond eof case, the file may wind up with | ||
1328 | * more blocks allocated to it than it needs if we crash | ||
1329 | * and that won't get fixed until the next time the file | ||
1330 | * is re-opened and closed but that's ok as that shouldn't | ||
1331 | * be too many blocks. | ||
1332 | * | ||
1333 | * However, we can't just make all wsync xactions run async | ||
1334 | * because there's one call out of the create path that needs | ||
1335 | * to run sync where it's truncating an existing file to size | ||
1336 | * 0 whose size is > 0. | ||
1337 | * | ||
1338 | * It's probably possible to come up with a test in this | ||
1339 | * routine that would correctly distinguish all the above | ||
1340 | * cases from the values of the function parameters and the | ||
1341 | * inode state but for sanity's sake, I've decided to let the | ||
1342 | * layers above just tell us. It's simpler to correctly figure | ||
1343 | * out in the layer above exactly under what conditions we | ||
1344 | * can run async and I think it's easier for others read and | ||
1345 | * follow the logic in case something has to be changed. | ||
1346 | * cscope is your friend -- rcc. | ||
1347 | * | ||
1348 | * The attribute fork is much simpler. | ||
1349 | * | ||
1350 | * For the attribute fork we allow the caller to tell us whether | ||
1351 | * the unlink of the inode that led to this call is yet permanent | ||
1352 | * in the on disk log. If it is not and we will be freeing extents | ||
1353 | * in this inode then we make the first transaction synchronous | ||
1354 | * to make sure that the unlink is permanent by the time we free | ||
1355 | * the blocks. | ||
1356 | */ | ||
1357 | if (fork == XFS_DATA_FORK) { | ||
1358 | if (ip->i_d.di_nextents > 0) { | ||
1359 | /* | ||
1360 | * If we are not changing the file size then do | ||
1361 | * not update the on-disk file size - we may be | ||
1362 | * called from xfs_inactive_free_eofblocks(). If we | ||
1363 | * update the on-disk file size and then the system | ||
1364 | * crashes before the contents of the file are | ||
1365 | * flushed to disk then the files may be full of | ||
1366 | * holes (ie NULL files bug). | ||
1367 | */ | ||
1368 | if (ip->i_size != new_size) { | ||
1369 | ip->i_d.di_size = new_size; | ||
1370 | ip->i_size = new_size; | ||
1371 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | ||
1372 | } | ||
1373 | } | ||
1374 | } else if (sync) { | ||
1375 | ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC)); | ||
1376 | if (ip->i_d.di_anextents > 0) | ||
1377 | xfs_trans_set_sync(ntp); | ||
1378 | } | ||
1379 | ASSERT(fork == XFS_DATA_FORK || | ||
1380 | (fork == XFS_ATTR_FORK && | ||
1381 | ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) || | ||
1382 | (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC))))); | ||
1383 | 1268 | ||
1384 | /* | 1269 | /* |
1385 | * Since it is possible for space to become allocated beyond | 1270 | * Since it is possible for space to become allocated beyond |
@@ -1390,128 +1275,142 @@ xfs_itruncate_finish( | |||
1390 | * beyond the maximum file size (ie it is the same as last_block), | 1275 | * beyond the maximum file size (ie it is the same as last_block), |
1391 | * then there is nothing to do. | 1276 | * then there is nothing to do. |
1392 | */ | 1277 | */ |
1278 | first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); | ||
1393 | last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); | 1279 | last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); |
1394 | ASSERT(first_unmap_block <= last_block); | 1280 | if (first_unmap_block == last_block) |
1395 | done = 0; | 1281 | return 0; |
1396 | if (last_block == first_unmap_block) { | 1282 | |
1397 | done = 1; | 1283 | ASSERT(first_unmap_block < last_block); |
1398 | } else { | 1284 | unmap_len = last_block - first_unmap_block + 1; |
1399 | unmap_len = last_block - first_unmap_block + 1; | ||
1400 | } | ||
1401 | while (!done) { | 1285 | while (!done) { |
1402 | /* | ||
1403 | * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi() | ||
1404 | * will tell us whether it freed the entire range or | ||
1405 | * not. If this is a synchronous mount (wsync), | ||
1406 | * then we can tell bunmapi to keep all the | ||
1407 | * transactions asynchronous since the unlink | ||
1408 | * transaction that made this inode inactive has | ||
1409 | * already hit the disk. There's no danger of | ||
1410 | * the freed blocks being reused, there being a | ||
1411 | * crash, and the reused blocks suddenly reappearing | ||
1412 | * in this file with garbage in them once recovery | ||
1413 | * runs. | ||
1414 | */ | ||
1415 | xfs_bmap_init(&free_list, &first_block); | 1286 | xfs_bmap_init(&free_list, &first_block); |
1416 | error = xfs_bunmapi(ntp, ip, | 1287 | error = xfs_bunmapi(tp, ip, |
1417 | first_unmap_block, unmap_len, | 1288 | first_unmap_block, unmap_len, |
1418 | xfs_bmapi_aflag(fork), | 1289 | xfs_bmapi_aflag(whichfork), |
1419 | XFS_ITRUNC_MAX_EXTENTS, | 1290 | XFS_ITRUNC_MAX_EXTENTS, |
1420 | &first_block, &free_list, | 1291 | &first_block, &free_list, |
1421 | &done); | 1292 | &done); |
1422 | if (error) { | 1293 | if (error) |
1423 | /* | 1294 | goto out_bmap_cancel; |
1424 | * If the bunmapi call encounters an error, | ||
1425 | * return to the caller where the transaction | ||
1426 | * can be properly aborted. We just need to | ||
1427 | * make sure we're not holding any resources | ||
1428 | * that we were not when we came in. | ||
1429 | */ | ||
1430 | xfs_bmap_cancel(&free_list); | ||
1431 | return error; | ||
1432 | } | ||
1433 | 1295 | ||
1434 | /* | 1296 | /* |
1435 | * Duplicate the transaction that has the permanent | 1297 | * Duplicate the transaction that has the permanent |
1436 | * reservation and commit the old transaction. | 1298 | * reservation and commit the old transaction. |
1437 | */ | 1299 | */ |
1438 | error = xfs_bmap_finish(tp, &free_list, &committed); | 1300 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
1439 | ntp = *tp; | ||
1440 | if (committed) | 1301 | if (committed) |
1441 | xfs_trans_ijoin(ntp, ip); | 1302 | xfs_trans_ijoin(tp, ip); |
1442 | 1303 | if (error) | |
1443 | if (error) { | 1304 | goto out_bmap_cancel; |
1444 | /* | ||
1445 | * If the bmap finish call encounters an error, return | ||
1446 | * to the caller where the transaction can be properly | ||
1447 | * aborted. We just need to make sure we're not | ||
1448 | * holding any resources that we were not when we came | ||
1449 | * in. | ||
1450 | * | ||
1451 | * Aborting from this point might lose some blocks in | ||
1452 | * the file system, but oh well. | ||
1453 | */ | ||
1454 | xfs_bmap_cancel(&free_list); | ||
1455 | return error; | ||
1456 | } | ||
1457 | 1305 | ||
1458 | if (committed) { | 1306 | if (committed) { |
1459 | /* | 1307 | /* |
1460 | * Mark the inode dirty so it will be logged and | 1308 | * Mark the inode dirty so it will be logged and |
1461 | * moved forward in the log as part of every commit. | 1309 | * moved forward in the log as part of every commit. |
1462 | */ | 1310 | */ |
1463 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1311 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
1464 | } | 1312 | } |
1465 | 1313 | ||
1466 | ntp = xfs_trans_dup(ntp); | 1314 | ntp = xfs_trans_dup(tp); |
1467 | error = xfs_trans_commit(*tp, 0); | 1315 | error = xfs_trans_commit(tp, 0); |
1468 | *tp = ntp; | 1316 | tp = ntp; |
1469 | 1317 | ||
1470 | xfs_trans_ijoin(ntp, ip); | 1318 | xfs_trans_ijoin(tp, ip); |
1471 | 1319 | ||
1472 | if (error) | 1320 | if (error) |
1473 | return error; | 1321 | goto out; |
1322 | |||
1474 | /* | 1323 | /* |
1475 | * transaction commit worked ok so we can drop the extra ticket | 1324 | * Transaction commit worked ok so we can drop the extra ticket |
1476 | * reference that we gained in xfs_trans_dup() | 1325 | * reference that we gained in xfs_trans_dup() |
1477 | */ | 1326 | */ |
1478 | xfs_log_ticket_put(ntp->t_ticket); | 1327 | xfs_log_ticket_put(tp->t_ticket); |
1479 | error = xfs_trans_reserve(ntp, 0, | 1328 | error = xfs_trans_reserve(tp, 0, |
1480 | XFS_ITRUNCATE_LOG_RES(mp), 0, | 1329 | XFS_ITRUNCATE_LOG_RES(mp), 0, |
1481 | XFS_TRANS_PERM_LOG_RES, | 1330 | XFS_TRANS_PERM_LOG_RES, |
1482 | XFS_ITRUNCATE_LOG_COUNT); | 1331 | XFS_ITRUNCATE_LOG_COUNT); |
1483 | if (error) | 1332 | if (error) |
1484 | return error; | 1333 | goto out; |
1485 | } | 1334 | } |
1335 | |||
1336 | out: | ||
1337 | *tpp = tp; | ||
1338 | return error; | ||
1339 | out_bmap_cancel: | ||
1486 | /* | 1340 | /* |
1487 | * Only update the size in the case of the data fork, but | 1341 | * If the bunmapi call encounters an error, return to the caller where |
1488 | * always re-log the inode so that our permanent transaction | 1342 | * the transaction can be properly aborted. We just need to make sure |
1489 | * can keep on rolling it forward in the log. | 1343 | * we're not holding any resources that we were not when we came in. |
1490 | */ | 1344 | */ |
1491 | if (fork == XFS_DATA_FORK) { | 1345 | xfs_bmap_cancel(&free_list); |
1492 | xfs_isize_check(mp, ip, new_size); | 1346 | goto out; |
1347 | } | ||
1348 | |||
1349 | int | ||
1350 | xfs_itruncate_data( | ||
1351 | struct xfs_trans **tpp, | ||
1352 | struct xfs_inode *ip, | ||
1353 | xfs_fsize_t new_size) | ||
1354 | { | ||
1355 | int error; | ||
1356 | |||
1357 | trace_xfs_itruncate_data_start(ip, new_size); | ||
1358 | |||
1359 | /* | ||
1360 | * The first thing we do is set the size to new_size permanently on | ||
1361 | * disk. This way we don't have to worry about anyone ever being able | ||
1362 | * to look at the data being freed even in the face of a crash. | ||
1363 | * What we're getting around here is the case where we free a block, it | ||
1364 | * is allocated to another file, it is written to, and then we crash. | ||
1365 | * If the new data gets written to the file but the log buffers | ||
1366 | * containing the free and reallocation don't, then we'd end up with | ||
1367 | * garbage in the blocks being freed. As long as we make the new_size | ||
1368 | * permanent before actually freeing any blocks it doesn't matter if | ||
1369 | * they get written to. | ||
1370 | */ | ||
1371 | if (ip->i_d.di_nextents > 0) { | ||
1493 | /* | 1372 | /* |
1494 | * If we are not changing the file size then do | 1373 | * If we are not changing the file size then do not update |
1495 | * not update the on-disk file size - we may be | 1374 | * the on-disk file size - we may be called from |
1496 | * called from xfs_inactive_free_eofblocks(). If we | 1375 | * xfs_inactive_free_eofblocks(). If we update the on-disk |
1497 | * update the on-disk file size and then the system | 1376 | * file size and then the system crashes before the contents |
1498 | * crashes before the contents of the file are | 1377 | * of the file are flushed to disk then the files may be |
1499 | * flushed to disk then the files may be full of | 1378 | * full of holes (ie NULL files bug). |
1500 | * holes (ie NULL files bug). | ||
1501 | */ | 1379 | */ |
1502 | if (ip->i_size != new_size) { | 1380 | if (ip->i_size != new_size) { |
1503 | ip->i_d.di_size = new_size; | 1381 | ip->i_d.di_size = new_size; |
1504 | ip->i_size = new_size; | 1382 | ip->i_size = new_size; |
1383 | xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); | ||
1505 | } | 1384 | } |
1506 | } | 1385 | } |
1507 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1386 | |
1508 | ASSERT((new_size != 0) || | 1387 | error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size); |
1509 | (fork == XFS_ATTR_FORK) || | 1388 | if (error) |
1510 | (ip->i_delayed_blks == 0)); | 1389 | return error; |
1511 | ASSERT((new_size != 0) || | 1390 | |
1512 | (fork == XFS_ATTR_FORK) || | 1391 | /* |
1513 | (ip->i_d.di_nextents == 0)); | 1392 | * If we are not changing the file size then do not update the on-disk |
1514 | trace_xfs_itruncate_finish_end(ip, new_size); | 1393 | * file size - we may be called from xfs_inactive_free_eofblocks(). |
1394 | * If we update the on-disk file size and then the system crashes | ||
1395 | * before the contents of the file are flushed to disk then the files | ||
1396 | * may be full of holes (ie NULL files bug). | ||
1397 | */ | ||
1398 | xfs_isize_check(ip, new_size); | ||
1399 | if (ip->i_size != new_size) { | ||
1400 | ip->i_d.di_size = new_size; | ||
1401 | ip->i_size = new_size; | ||
1402 | } | ||
1403 | |||
1404 | ASSERT(new_size != 0 || ip->i_delayed_blks == 0); | ||
1405 | ASSERT(new_size != 0 || ip->i_d.di_nextents == 0); | ||
1406 | |||
1407 | /* | ||
1408 | * Always re-log the inode so that our permanent transaction can keep | ||
1409 | * on rolling it forward in the log. | ||
1410 | */ | ||
1411 | xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); | ||
1412 | |||
1413 | trace_xfs_itruncate_data_end(ip, new_size); | ||
1515 | return 0; | 1414 | return 0; |
1516 | } | 1415 | } |
1517 | 1416 | ||
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 6efd471c8724..6495578efe05 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -491,8 +491,10 @@ uint xfs_ip2xflags(struct xfs_inode *); | |||
491 | uint xfs_dic2xflags(struct xfs_dinode *); | 491 | uint xfs_dic2xflags(struct xfs_dinode *); |
492 | int xfs_ifree(struct xfs_trans *, xfs_inode_t *, | 492 | int xfs_ifree(struct xfs_trans *, xfs_inode_t *, |
493 | struct xfs_bmap_free *); | 493 | struct xfs_bmap_free *); |
494 | int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, | 494 | int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, |
495 | xfs_fsize_t, int, int); | 495 | int, xfs_fsize_t); |
496 | int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *, | ||
497 | xfs_fsize_t); | ||
496 | int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | 498 | int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); |
497 | 499 | ||
498 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 500 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
@@ -568,13 +570,6 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); | |||
568 | 570 | ||
569 | #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) | 571 | #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) |
570 | 572 | ||
571 | #ifdef DEBUG | ||
572 | void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, | ||
573 | xfs_fsize_t); | ||
574 | #else /* DEBUG */ | ||
575 | #define xfs_isize_check(mp, ip, isize) | ||
576 | #endif /* DEBUG */ | ||
577 | |||
578 | #if defined(DEBUG) | 573 | #if defined(DEBUG) |
579 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | 574 | void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); |
580 | #else | 575 | #else |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 45b8ac662aee..11242c482771 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -220,15 +220,12 @@ xfs_free_eofblocks( | |||
220 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 220 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
221 | xfs_trans_ijoin(tp, ip); | 221 | xfs_trans_ijoin(tp, ip); |
222 | 222 | ||
223 | error = xfs_itruncate_finish(&tp, ip, | 223 | error = xfs_itruncate_data(&tp, ip, ip->i_size); |
224 | ip->i_size, | ||
225 | XFS_DATA_FORK, | ||
226 | 0); | ||
227 | /* | ||
228 | * If we get an error at this point we | ||
229 | * simply don't bother truncating the file. | ||
230 | */ | ||
231 | if (error) { | 224 | if (error) { |
225 | /* | ||
226 | * If we get an error at this point we simply don't | ||
227 | * bother truncating the file. | ||
228 | */ | ||
232 | xfs_trans_cancel(tp, | 229 | xfs_trans_cancel(tp, |
233 | (XFS_TRANS_RELEASE_LOG_RES | | 230 | (XFS_TRANS_RELEASE_LOG_RES | |
234 | XFS_TRANS_ABORT)); | 231 | XFS_TRANS_ABORT)); |
@@ -665,16 +662,7 @@ xfs_inactive( | |||
665 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 662 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
666 | xfs_trans_ijoin(tp, ip); | 663 | xfs_trans_ijoin(tp, ip); |
667 | 664 | ||
668 | /* | 665 | error = xfs_itruncate_data(&tp, ip, 0); |
669 | * normally, we have to run xfs_itruncate_finish sync. | ||
670 | * But if filesystem is wsync and we're in the inactive | ||
671 | * path, then we know that nlink == 0, and that the | ||
672 | * xaction that made nlink == 0 is permanently committed | ||
673 | * since xfs_remove runs as a synchronous transaction. | ||
674 | */ | ||
675 | error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, | ||
676 | (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); | ||
677 | |||
678 | if (error) { | 666 | if (error) { |
679 | xfs_trans_cancel(tp, | 667 | xfs_trans_cancel(tp, |
680 | XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | 668 | XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); |