diff options
author | Lukas Czerner <lczerner@redhat.com> | 2014-07-15 06:03:38 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2014-07-15 06:03:38 -0400 |
commit | 4f579ae7de560e5f449587a6c3f02594d53d4d51 (patch) | |
tree | 4880232d5b88692ae01779bbd9d495a531ad378d /fs | |
parent | 71d4f7d032149b935a26eb3ff85c6c837f3714e1 (diff) |
ext4: fix punch hole on files with indirect mapping
Currently punch hole code on files with direct/indirect mapping has some
problems which may lead to a data loss. For example (from Jan Kara):
fallocate -n -p 10240000 4096
will punch the range 10240000 - 12632064 instead of the range 1024000 -
10244096.
Also the code is a bit weird and it's not using infrastructure provided
by indirect.c, but rather creating it's own way.
This patch fixes the issues as well as making the operation to run 4
times faster from my testing (punching out 60GB file). It uses similar
approach used in ext4_ind_truncate() which takes advantage of
ext4_free_branches() function.
Also rename the ext4_free_hole_blocks() to something more sensible, like
the equivalent we have for extent mapped files. Call it
ext4_ind_remove_space().
This has been tested mostly with fsx and some xfstests which are testing
punch hole but does not require unwritten extents which are not
supported with direct/indirect mapping. Not problems showed up even with
1024k block size.
CC: stable@vger.kernel.org
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 4 | ||||
-rw-r--r-- | fs/ext4/indirect.c | 281 | ||||
-rw-r--r-- | fs/ext4/inode.c | 2 |
3 files changed, 205 insertions, 82 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d35c78c96184..5535ed2be8c7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -2143,8 +2143,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
2143 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2143 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
2144 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); | 2144 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
2145 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2145 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
2146 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, | 2146 | extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode, |
2147 | ext4_lblk_t first, ext4_lblk_t stop); | 2147 | ext4_lblk_t start, ext4_lblk_t end); |
2148 | 2148 | ||
2149 | /* ioctl.c */ | 2149 | /* ioctl.c */ |
2150 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 2150 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index fd69da194826..e75f840000a0 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -1295,97 +1295,220 @@ do_indirects: | |||
1295 | } | 1295 | } |
1296 | } | 1296 | } |
1297 | 1297 | ||
1298 | static int free_hole_blocks(handle_t *handle, struct inode *inode, | 1298 | /** |
1299 | struct buffer_head *parent_bh, __le32 *i_data, | 1299 | * ext4_ind_remove_space - remove space from the range |
1300 | int level, ext4_lblk_t first, | 1300 | * @handle: JBD handle for this transaction |
1301 | ext4_lblk_t count, int max) | 1301 | * @inode: inode we are dealing with |
1302 | * @start: First block to remove | ||
1303 | * @end: One block after the last block to remove (exclusive) | ||
1304 | * | ||
1305 | * Free the blocks in the defined range (end is exclusive endpoint of | ||
1306 | * range). This is used by ext4_punch_hole(). | ||
1307 | */ | ||
1308 | int ext4_ind_remove_space(handle_t *handle, struct inode *inode, | ||
1309 | ext4_lblk_t start, ext4_lblk_t end) | ||
1302 | { | 1310 | { |
1303 | struct buffer_head *bh = NULL; | 1311 | struct ext4_inode_info *ei = EXT4_I(inode); |
1312 | __le32 *i_data = ei->i_data; | ||
1304 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1313 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); |
1305 | int ret = 0; | 1314 | ext4_lblk_t offsets[4], offsets2[4]; |
1306 | int i, inc; | 1315 | Indirect chain[4], chain2[4]; |
1307 | ext4_lblk_t offset; | 1316 | Indirect *partial, *partial2; |
1308 | __le32 blk; | 1317 | ext4_lblk_t max_block; |
1309 | 1318 | __le32 nr = 0, nr2 = 0; | |
1310 | inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level); | 1319 | int n = 0, n2 = 0; |
1311 | for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) { | 1320 | unsigned blocksize = inode->i_sb->s_blocksize; |
1312 | if (offset >= count + first) | ||
1313 | break; | ||
1314 | if (*i_data == 0 || (offset + inc) <= first) | ||
1315 | continue; | ||
1316 | blk = *i_data; | ||
1317 | if (level > 0) { | ||
1318 | ext4_lblk_t first2; | ||
1319 | ext4_lblk_t count2; | ||
1320 | 1321 | ||
1321 | bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); | 1322 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) |
1322 | if (!bh) { | 1323 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
1323 | EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), | 1324 | if (end >= max_block) |
1324 | "Read failure"); | 1325 | end = max_block; |
1325 | return -EIO; | 1326 | if ((start >= end) || (start > max_block)) |
1326 | } | 1327 | return 0; |
1327 | if (first > offset) { | 1328 | |
1328 | first2 = first - offset; | 1329 | n = ext4_block_to_path(inode, start, offsets, NULL); |
1329 | count2 = count; | 1330 | n2 = ext4_block_to_path(inode, end, offsets2, NULL); |
1331 | |||
1332 | BUG_ON(n > n2); | ||
1333 | |||
1334 | if ((n == 1) && (n == n2)) { | ||
1335 | /* We're punching only within direct block range */ | ||
1336 | ext4_free_data(handle, inode, NULL, i_data + offsets[0], | ||
1337 | i_data + offsets2[0]); | ||
1338 | return 0; | ||
1339 | } else if (n2 > n) { | ||
1340 | /* | ||
1341 | * Start and end are on a different levels so we're going to | ||
1342 | * free partial block at start, and partial block at end of | ||
1343 | * the range. If there are some levels in between then | ||
1344 | * do_indirects label will take care of that. | ||
1345 | */ | ||
1346 | |||
1347 | if (n == 1) { | ||
1348 | /* | ||
1349 | * Start is at the direct block level, free | ||
1350 | * everything to the end of the level. | ||
1351 | */ | ||
1352 | ext4_free_data(handle, inode, NULL, i_data + offsets[0], | ||
1353 | i_data + EXT4_NDIR_BLOCKS); | ||
1354 | goto end_range; | ||
1355 | } | ||
1356 | |||
1357 | |||
1358 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); | ||
1359 | if (nr) { | ||
1360 | if (partial == chain) { | ||
1361 | /* Shared branch grows from the inode */ | ||
1362 | ext4_free_branches(handle, inode, NULL, | ||
1363 | &nr, &nr+1, (chain+n-1) - partial); | ||
1364 | *partial->p = 0; | ||
1330 | } else { | 1365 | } else { |
1331 | first2 = 0; | 1366 | /* Shared branch grows from an indirect block */ |
1332 | count2 = count - (offset - first); | 1367 | BUFFER_TRACE(partial->bh, "get_write_access"); |
1368 | ext4_free_branches(handle, inode, partial->bh, | ||
1369 | partial->p, | ||
1370 | partial->p+1, (chain+n-1) - partial); | ||
1333 | } | 1371 | } |
1334 | ret = free_hole_blocks(handle, inode, bh, | 1372 | } |
1335 | (__le32 *)bh->b_data, level - 1, | 1373 | |
1336 | first2, count2, | 1374 | /* |
1337 | inode->i_sb->s_blocksize >> 2); | 1375 | * Clear the ends of indirect blocks on the shared branch |
1338 | if (ret) { | 1376 | * at the start of the range |
1339 | brelse(bh); | 1377 | */ |
1340 | goto err; | 1378 | while (partial > chain) { |
1379 | ext4_free_branches(handle, inode, partial->bh, | ||
1380 | partial->p + 1, | ||
1381 | (__le32 *)partial->bh->b_data+addr_per_block, | ||
1382 | (chain+n-1) - partial); | ||
1383 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
1384 | brelse(partial->bh); | ||
1385 | partial--; | ||
1386 | } | ||
1387 | |||
1388 | end_range: | ||
1389 | partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); | ||
1390 | if (nr2) { | ||
1391 | if (partial2 == chain2) { | ||
1392 | /* | ||
1393 | * Remember, end is exclusive so here we're at | ||
1394 | * the start of the next level we're not going | ||
1395 | * to free. Everything was covered by the start | ||
1396 | * of the range. | ||
1397 | */ | ||
1398 | return 0; | ||
1399 | } else { | ||
1400 | /* Shared branch grows from an indirect block */ | ||
1401 | partial2--; | ||
1341 | } | 1402 | } |
1403 | } else { | ||
1404 | /* | ||
1405 | * ext4_find_shared returns Indirect structure which | ||
1406 | * points to the last element which should not be | ||
1407 | * removed by truncate. But this is end of the range | ||
1408 | * in punch_hole so we need to point to the next element | ||
1409 | */ | ||
1410 | partial2->p++; | ||
1342 | } | 1411 | } |
1343 | if (level == 0 || | 1412 | |
1344 | (bh && all_zeroes((__le32 *)bh->b_data, | 1413 | /* |
1345 | (__le32 *)bh->b_data + addr_per_block))) { | 1414 | * Clear the ends of indirect blocks on the shared branch |
1346 | ext4_free_data(handle, inode, parent_bh, | 1415 | * at the end of the range |
1347 | i_data, i_data + 1); | 1416 | */ |
1417 | while (partial2 > chain2) { | ||
1418 | ext4_free_branches(handle, inode, partial2->bh, | ||
1419 | (__le32 *)partial2->bh->b_data, | ||
1420 | partial2->p, | ||
1421 | (chain2+n2-1) - partial2); | ||
1422 | BUFFER_TRACE(partial2->bh, "call brelse"); | ||
1423 | brelse(partial2->bh); | ||
1424 | partial2--; | ||
1348 | } | 1425 | } |
1349 | brelse(bh); | 1426 | goto do_indirects; |
1350 | bh = NULL; | ||
1351 | } | 1427 | } |
1352 | 1428 | ||
1353 | err: | 1429 | /* Punch happened within the same level (n == n2) */ |
1354 | return ret; | 1430 | partial = ext4_find_shared(inode, n, offsets, chain, &nr); |
1355 | } | 1431 | partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); |
1356 | 1432 | /* | |
1357 | int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, | 1433 | * ext4_find_shared returns Indirect structure which |
1358 | ext4_lblk_t first, ext4_lblk_t stop) | 1434 | * points to the last element which should not be |
1359 | { | 1435 | * removed by truncate. But this is end of the range |
1360 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1436 | * in punch_hole so we need to point to the next element |
1361 | int level, ret = 0; | 1437 | */ |
1362 | int num = EXT4_NDIR_BLOCKS; | 1438 | partial2->p++; |
1363 | ext4_lblk_t count, max = EXT4_NDIR_BLOCKS; | 1439 | while ((partial > chain) || (partial2 > chain2)) { |
1364 | __le32 *i_data = EXT4_I(inode)->i_data; | 1440 | /* We're at the same block, so we're almost finished */ |
1365 | 1441 | if ((partial->bh && partial2->bh) && | |
1366 | count = stop - first; | 1442 | (partial->bh->b_blocknr == partial2->bh->b_blocknr)) { |
1367 | for (level = 0; level < 4; level++, max *= addr_per_block) { | 1443 | if ((partial > chain) && (partial2 > chain2)) { |
1368 | if (first < max) { | 1444 | ext4_free_branches(handle, inode, partial->bh, |
1369 | ret = free_hole_blocks(handle, inode, NULL, i_data, | 1445 | partial->p + 1, |
1370 | level, first, count, num); | 1446 | partial2->p, |
1371 | if (ret) | 1447 | (chain+n-1) - partial); |
1372 | goto err; | 1448 | BUFFER_TRACE(partial->bh, "call brelse"); |
1373 | if (count > max - first) | 1449 | brelse(partial->bh); |
1374 | count -= max - first; | 1450 | BUFFER_TRACE(partial2->bh, "call brelse"); |
1375 | else | 1451 | brelse(partial2->bh); |
1376 | break; | 1452 | } |
1377 | first = 0; | 1453 | return 0; |
1378 | } else { | ||
1379 | first -= max; | ||
1380 | } | 1454 | } |
1381 | i_data += num; | 1455 | /* |
1382 | if (level == 0) { | 1456 | * Clear the ends of indirect blocks on the shared branch |
1383 | num = 1; | 1457 | * at the start of the range |
1384 | max = 1; | 1458 | */ |
1459 | if (partial > chain) { | ||
1460 | ext4_free_branches(handle, inode, partial->bh, | ||
1461 | partial->p + 1, | ||
1462 | (__le32 *)partial->bh->b_data+addr_per_block, | ||
1463 | (chain+n-1) - partial); | ||
1464 | BUFFER_TRACE(partial->bh, "call brelse"); | ||
1465 | brelse(partial->bh); | ||
1466 | partial--; | ||
1467 | } | ||
1468 | /* | ||
1469 | * Clear the ends of indirect blocks on the shared branch | ||
1470 | * at the end of the range | ||
1471 | */ | ||
1472 | if (partial2 > chain2) { | ||
1473 | ext4_free_branches(handle, inode, partial2->bh, | ||
1474 | (__le32 *)partial2->bh->b_data, | ||
1475 | partial2->p, | ||
1476 | (chain2+n-1) - partial2); | ||
1477 | BUFFER_TRACE(partial2->bh, "call brelse"); | ||
1478 | brelse(partial2->bh); | ||
1479 | partial2--; | ||
1385 | } | 1480 | } |
1386 | } | 1481 | } |
1387 | 1482 | ||
1388 | err: | 1483 | do_indirects: |
1389 | return ret; | 1484 | /* Kill the remaining (whole) subtrees */ |
1485 | switch (offsets[0]) { | ||
1486 | default: | ||
1487 | if (++n >= n2) | ||
1488 | return 0; | ||
1489 | nr = i_data[EXT4_IND_BLOCK]; | ||
1490 | if (nr) { | ||
1491 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); | ||
1492 | i_data[EXT4_IND_BLOCK] = 0; | ||
1493 | } | ||
1494 | case EXT4_IND_BLOCK: | ||
1495 | if (++n >= n2) | ||
1496 | return 0; | ||
1497 | nr = i_data[EXT4_DIND_BLOCK]; | ||
1498 | if (nr) { | ||
1499 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); | ||
1500 | i_data[EXT4_DIND_BLOCK] = 0; | ||
1501 | } | ||
1502 | case EXT4_DIND_BLOCK: | ||
1503 | if (++n >= n2) | ||
1504 | return 0; | ||
1505 | nr = i_data[EXT4_TIND_BLOCK]; | ||
1506 | if (nr) { | ||
1507 | ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); | ||
1508 | i_data[EXT4_TIND_BLOCK] = 0; | ||
1509 | } | ||
1510 | case EXT4_TIND_BLOCK: | ||
1511 | ; | ||
1512 | } | ||
1513 | return 0; | ||
1390 | } | 1514 | } |
1391 | |||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 027ee8c40470..367a60c07cf0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3506,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3506 | ret = ext4_ext_remove_space(inode, first_block, | 3506 | ret = ext4_ext_remove_space(inode, first_block, |
3507 | stop_block - 1); | 3507 | stop_block - 1); |
3508 | else | 3508 | else |
3509 | ret = ext4_free_hole_blocks(handle, inode, first_block, | 3509 | ret = ext4_ind_remove_space(handle, inode, first_block, |
3510 | stop_block); | 3510 | stop_block); |
3511 | 3511 | ||
3512 | up_write(&EXT4_I(inode)->i_data_sem); | 3512 | up_write(&EXT4_I(inode)->i_data_sem); |