aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2014-07-15 06:03:38 -0400
committerTheodore Ts'o <tytso@mit.edu>2014-07-15 06:03:38 -0400
commit4f579ae7de560e5f449587a6c3f02594d53d4d51 (patch)
tree4880232d5b88692ae01779bbd9d495a531ad378d /fs
parent71d4f7d032149b935a26eb3ff85c6c837f3714e1 (diff)
ext4: fix punch hole on files with indirect mapping
Currently punch hole code on files with direct/indirect mapping has some problems which may lead to a data loss. For example (from Jan Kara): fallocate -n -p 10240000 4096 will punch the range 10240000 - 12632064 instead of the range 1024000 - 10244096. Also the code is a bit weird and it's not using infrastructure provided by indirect.c, but rather creating it's own way. This patch fixes the issues as well as making the operation to run 4 times faster from my testing (punching out 60GB file). It uses similar approach used in ext4_ind_truncate() which takes advantage of ext4_free_branches() function. Also rename the ext4_free_hole_blocks() to something more sensible, like the equivalent we have for extent mapped files. Call it ext4_ind_remove_space(). This has been tested mostly with fsx and some xfstests which are testing punch hole but does not require unwritten extents which are not supported with direct/indirect mapping. Not problems showed up even with 1024k block size. CC: stable@vger.kernel.org Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/indirect.c281
-rw-r--r--fs/ext4/inode.c2
3 files changed, 205 insertions, 82 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d35c78c96184..5535ed2be8c7 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2143,8 +2143,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
2143extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2143extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2144extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); 2144extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
2145extern void ext4_ind_truncate(handle_t *, struct inode *inode); 2145extern void ext4_ind_truncate(handle_t *, struct inode *inode);
2146extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, 2146extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
2147 ext4_lblk_t first, ext4_lblk_t stop); 2147 ext4_lblk_t start, ext4_lblk_t end);
2148 2148
2149/* ioctl.c */ 2149/* ioctl.c */
2150extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 2150extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index fd69da194826..e75f840000a0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1295,97 +1295,220 @@ do_indirects:
1295 } 1295 }
1296} 1296}
1297 1297
1298static int free_hole_blocks(handle_t *handle, struct inode *inode, 1298/**
1299 struct buffer_head *parent_bh, __le32 *i_data, 1299 * ext4_ind_remove_space - remove space from the range
1300 int level, ext4_lblk_t first, 1300 * @handle: JBD handle for this transaction
1301 ext4_lblk_t count, int max) 1301 * @inode: inode we are dealing with
1302 * @start: First block to remove
1303 * @end: One block after the last block to remove (exclusive)
1304 *
1305 * Free the blocks in the defined range (end is exclusive endpoint of
1306 * range). This is used by ext4_punch_hole().
1307 */
1308int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
1309 ext4_lblk_t start, ext4_lblk_t end)
1302{ 1310{
1303 struct buffer_head *bh = NULL; 1311 struct ext4_inode_info *ei = EXT4_I(inode);
1312 __le32 *i_data = ei->i_data;
1304 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1313 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1305 int ret = 0; 1314 ext4_lblk_t offsets[4], offsets2[4];
1306 int i, inc; 1315 Indirect chain[4], chain2[4];
1307 ext4_lblk_t offset; 1316 Indirect *partial, *partial2;
1308 __le32 blk; 1317 ext4_lblk_t max_block;
1309 1318 __le32 nr = 0, nr2 = 0;
1310 inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level); 1319 int n = 0, n2 = 0;
1311 for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) { 1320 unsigned blocksize = inode->i_sb->s_blocksize;
1312 if (offset >= count + first)
1313 break;
1314 if (*i_data == 0 || (offset + inc) <= first)
1315 continue;
1316 blk = *i_data;
1317 if (level > 0) {
1318 ext4_lblk_t first2;
1319 ext4_lblk_t count2;
1320 1321
1321 bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); 1322 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1322 if (!bh) { 1323 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1323 EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), 1324 if (end >= max_block)
1324 "Read failure"); 1325 end = max_block;
1325 return -EIO; 1326 if ((start >= end) || (start > max_block))
1326 } 1327 return 0;
1327 if (first > offset) { 1328
1328 first2 = first - offset; 1329 n = ext4_block_to_path(inode, start, offsets, NULL);
1329 count2 = count; 1330 n2 = ext4_block_to_path(inode, end, offsets2, NULL);
1331
1332 BUG_ON(n > n2);
1333
1334 if ((n == 1) && (n == n2)) {
1335 /* We're punching only within direct block range */
1336 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1337 i_data + offsets2[0]);
1338 return 0;
1339 } else if (n2 > n) {
1340 /*
1341 * Start and end are on a different levels so we're going to
1342 * free partial block at start, and partial block at end of
1343 * the range. If there are some levels in between then
1344 * do_indirects label will take care of that.
1345 */
1346
1347 if (n == 1) {
1348 /*
1349 * Start is at the direct block level, free
1350 * everything to the end of the level.
1351 */
1352 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1353 i_data + EXT4_NDIR_BLOCKS);
1354 goto end_range;
1355 }
1356
1357
1358 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1359 if (nr) {
1360 if (partial == chain) {
1361 /* Shared branch grows from the inode */
1362 ext4_free_branches(handle, inode, NULL,
1363 &nr, &nr+1, (chain+n-1) - partial);
1364 *partial->p = 0;
1330 } else { 1365 } else {
1331 first2 = 0; 1366 /* Shared branch grows from an indirect block */
1332 count2 = count - (offset - first); 1367 BUFFER_TRACE(partial->bh, "get_write_access");
1368 ext4_free_branches(handle, inode, partial->bh,
1369 partial->p,
1370 partial->p+1, (chain+n-1) - partial);
1333 } 1371 }
1334 ret = free_hole_blocks(handle, inode, bh, 1372 }
1335 (__le32 *)bh->b_data, level - 1, 1373
1336 first2, count2, 1374 /*
1337 inode->i_sb->s_blocksize >> 2); 1375 * Clear the ends of indirect blocks on the shared branch
1338 if (ret) { 1376 * at the start of the range
1339 brelse(bh); 1377 */
1340 goto err; 1378 while (partial > chain) {
1379 ext4_free_branches(handle, inode, partial->bh,
1380 partial->p + 1,
1381 (__le32 *)partial->bh->b_data+addr_per_block,
1382 (chain+n-1) - partial);
1383 BUFFER_TRACE(partial->bh, "call brelse");
1384 brelse(partial->bh);
1385 partial--;
1386 }
1387
1388end_range:
1389 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1390 if (nr2) {
1391 if (partial2 == chain2) {
1392 /*
1393 * Remember, end is exclusive so here we're at
1394 * the start of the next level we're not going
1395 * to free. Everything was covered by the start
1396 * of the range.
1397 */
1398 return 0;
1399 } else {
1400 /* Shared branch grows from an indirect block */
1401 partial2--;
1341 } 1402 }
1403 } else {
1404 /*
1405 * ext4_find_shared returns Indirect structure which
1406 * points to the last element which should not be
1407 * removed by truncate. But this is end of the range
1408 * in punch_hole so we need to point to the next element
1409 */
1410 partial2->p++;
1342 } 1411 }
1343 if (level == 0 || 1412
1344 (bh && all_zeroes((__le32 *)bh->b_data, 1413 /*
1345 (__le32 *)bh->b_data + addr_per_block))) { 1414 * Clear the ends of indirect blocks on the shared branch
1346 ext4_free_data(handle, inode, parent_bh, 1415 * at the end of the range
1347 i_data, i_data + 1); 1416 */
1417 while (partial2 > chain2) {
1418 ext4_free_branches(handle, inode, partial2->bh,
1419 (__le32 *)partial2->bh->b_data,
1420 partial2->p,
1421 (chain2+n2-1) - partial2);
1422 BUFFER_TRACE(partial2->bh, "call brelse");
1423 brelse(partial2->bh);
1424 partial2--;
1348 } 1425 }
1349 brelse(bh); 1426 goto do_indirects;
1350 bh = NULL;
1351 } 1427 }
1352 1428
1353err: 1429 /* Punch happened within the same level (n == n2) */
1354 return ret; 1430 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1355} 1431 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1356 1432 /*
1357int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, 1433 * ext4_find_shared returns Indirect structure which
1358 ext4_lblk_t first, ext4_lblk_t stop) 1434 * points to the last element which should not be
1359{ 1435 * removed by truncate. But this is end of the range
1360 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1436 * in punch_hole so we need to point to the next element
1361 int level, ret = 0; 1437 */
1362 int num = EXT4_NDIR_BLOCKS; 1438 partial2->p++;
1363 ext4_lblk_t count, max = EXT4_NDIR_BLOCKS; 1439 while ((partial > chain) || (partial2 > chain2)) {
1364 __le32 *i_data = EXT4_I(inode)->i_data; 1440 /* We're at the same block, so we're almost finished */
1365 1441 if ((partial->bh && partial2->bh) &&
1366 count = stop - first; 1442 (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
1367 for (level = 0; level < 4; level++, max *= addr_per_block) { 1443 if ((partial > chain) && (partial2 > chain2)) {
1368 if (first < max) { 1444 ext4_free_branches(handle, inode, partial->bh,
1369 ret = free_hole_blocks(handle, inode, NULL, i_data, 1445 partial->p + 1,
1370 level, first, count, num); 1446 partial2->p,
1371 if (ret) 1447 (chain+n-1) - partial);
1372 goto err; 1448 BUFFER_TRACE(partial->bh, "call brelse");
1373 if (count > max - first) 1449 brelse(partial->bh);
1374 count -= max - first; 1450 BUFFER_TRACE(partial2->bh, "call brelse");
1375 else 1451 brelse(partial2->bh);
1376 break; 1452 }
1377 first = 0; 1453 return 0;
1378 } else {
1379 first -= max;
1380 } 1454 }
1381 i_data += num; 1455 /*
1382 if (level == 0) { 1456 * Clear the ends of indirect blocks on the shared branch
1383 num = 1; 1457 * at the start of the range
1384 max = 1; 1458 */
1459 if (partial > chain) {
1460 ext4_free_branches(handle, inode, partial->bh,
1461 partial->p + 1,
1462 (__le32 *)partial->bh->b_data+addr_per_block,
1463 (chain+n-1) - partial);
1464 BUFFER_TRACE(partial->bh, "call brelse");
1465 brelse(partial->bh);
1466 partial--;
1467 }
1468 /*
1469 * Clear the ends of indirect blocks on the shared branch
1470 * at the end of the range
1471 */
1472 if (partial2 > chain2) {
1473 ext4_free_branches(handle, inode, partial2->bh,
1474 (__le32 *)partial2->bh->b_data,
1475 partial2->p,
1476 (chain2+n-1) - partial2);
1477 BUFFER_TRACE(partial2->bh, "call brelse");
1478 brelse(partial2->bh);
1479 partial2--;
1385 } 1480 }
1386 } 1481 }
1387 1482
1388err: 1483do_indirects:
1389 return ret; 1484 /* Kill the remaining (whole) subtrees */
1485 switch (offsets[0]) {
1486 default:
1487 if (++n >= n2)
1488 return 0;
1489 nr = i_data[EXT4_IND_BLOCK];
1490 if (nr) {
1491 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1492 i_data[EXT4_IND_BLOCK] = 0;
1493 }
1494 case EXT4_IND_BLOCK:
1495 if (++n >= n2)
1496 return 0;
1497 nr = i_data[EXT4_DIND_BLOCK];
1498 if (nr) {
1499 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1500 i_data[EXT4_DIND_BLOCK] = 0;
1501 }
1502 case EXT4_DIND_BLOCK:
1503 if (++n >= n2)
1504 return 0;
1505 nr = i_data[EXT4_TIND_BLOCK];
1506 if (nr) {
1507 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1508 i_data[EXT4_TIND_BLOCK] = 0;
1509 }
1510 case EXT4_TIND_BLOCK:
1511 ;
1512 }
1513 return 0;
1390} 1514}
1391
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 027ee8c40470..367a60c07cf0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3506,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3506 ret = ext4_ext_remove_space(inode, first_block, 3506 ret = ext4_ext_remove_space(inode, first_block,
3507 stop_block - 1); 3507 stop_block - 1);
3508 else 3508 else
3509 ret = ext4_free_hole_blocks(handle, inode, first_block, 3509 ret = ext4_ind_remove_space(handle, inode, first_block,
3510 stop_block); 3510 stop_block);
3511 3511
3512 up_write(&EXT4_I(inode)->i_data_sem); 3512 up_write(&EXT4_I(inode)->i_data_sem);