aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorTristan Ye <tristan.ye@oracle.com>2010-05-11 05:54:45 -0400
committerJoel Becker <joel.becker@oracle.com>2010-05-18 15:31:05 -0400
commitc1631d4a484fbb498e35d661f1aebd64c86b66bf (patch)
tree06e951cfebbd616dbaa0017d1c030a3f6e0d8d88 /fs/ocfs2
parentee149a7c6cbaee0e3a1a7d9e9f92711228ef5236 (diff)
Ocfs2: Optimize punching-hole code.
This patch simplifies the logic of handling existing holes and skipping extent blocks and removes some confusing comments. The patch survived the fill_verify_holes testcase in ocfs2-test. It also passed my manual sanity check and stress tests with enormous extent records. Currently punching a hole on a file with 3+ extent tree depth was really a performance disaster. It can even take several hours, though we may not hit this in real life with such a huge extent number. One simple way to improve the performance is quite straightforward. From the logic of truncate, we can punch the hole from hole_end to hole_start, which reduces the overhead of btree operations in a significant way, such as tree rotation and moving. Following is the testing result when punching hole from 0 to file end in bytes, on a 1G file, 1G file consists of 256k extent records, each record cover 4k data(just one cluster, clustersize is 4k): =========================================================================== * Original punching-hole mechanism: =========================================================================== I waited 1 hour for its completion, unfortunately it's still ongoing. =========================================================================== * Patched punching-hode mechanism: =========================================================================== real 0m2.518s user 0m0.000s sys 0m2.445s That means we've gained up to 1000 times improvement on performance in this case, whee! It's fairly cool. and it looks like that performance gain will be raising when extent records grow. The patch was based on my former 2 patches, which were about truncating codes optimization and fixup to handle CoW on punching hole. Signed-off-by: Tristan Ye <tristan.ye@oracle.com> Acked-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/file.c164
1 files changed, 140 insertions, 24 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3346e5b199d..9c1047c2e44 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1418,18 +1418,90 @@ out:
1418 return ret; 1418 return ret;
1419} 1419}
1420 1420
1421static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
1422{
1423 int i;
1424 struct ocfs2_extent_rec *rec = NULL;
1425
1426 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1427
1428 rec = &el->l_recs[i];
1429
1430 if (le32_to_cpu(rec->e_cpos) < pos)
1431 break;
1432 }
1433
1434 return i;
1435}
1436
1437/*
1438 * Helper to calculate the punching pos and length in one run, we handle the
1439 * following three cases in order:
1440 *
1441 * - remove the entire record
1442 * - remove a partial record
1443 * - no record needs to be removed (hole-punching completed)
1444*/
1445static void ocfs2_calc_trunc_pos(struct inode *inode,
1446 struct ocfs2_extent_list *el,
1447 struct ocfs2_extent_rec *rec,
1448 u32 trunc_start, u32 *trunc_cpos,
1449 u32 *trunc_len, u32 *trunc_end,
1450 u64 *blkno, int *done)
1451{
1452 int ret = 0;
1453 u32 coff, range;
1454
1455 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1456
1457 if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
1458 *trunc_cpos = le32_to_cpu(rec->e_cpos);
1459 /*
1460 * Skip holes if any.
1461 */
1462 if (range < *trunc_end)
1463 *trunc_end = range;
1464 *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
1465 *blkno = le64_to_cpu(rec->e_blkno);
1466 *trunc_end = le32_to_cpu(rec->e_cpos);
1467 } else if (range > trunc_start) {
1468 *trunc_cpos = trunc_start;
1469 *trunc_len = *trunc_end - trunc_start;
1470 coff = trunc_start - le32_to_cpu(rec->e_cpos);
1471 *blkno = le64_to_cpu(rec->e_blkno) +
1472 ocfs2_clusters_to_blocks(inode->i_sb, coff);
1473 *trunc_end = trunc_start;
1474 } else {
1475 /*
1476 * It may have two following possibilities:
1477 *
1478 * - last record has been removed
1479 * - trunc_start was within a hole
1480 *
1481 * both two cases mean the completion of hole punching.
1482 */
1483 ret = 1;
1484 }
1485
1486 *done = ret;
1487}
1488
1421static int ocfs2_remove_inode_range(struct inode *inode, 1489static int ocfs2_remove_inode_range(struct inode *inode,
1422 struct buffer_head *di_bh, u64 byte_start, 1490 struct buffer_head *di_bh, u64 byte_start,
1423 u64 byte_len) 1491 u64 byte_len)
1424{ 1492{
1425 int ret = 0, flags = 0; 1493 int ret = 0, flags = 0, done = 0, i;
1426 u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; 1494 u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
1495 u32 cluster_in_el;
1427 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1496 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1428 struct ocfs2_cached_dealloc_ctxt dealloc; 1497 struct ocfs2_cached_dealloc_ctxt dealloc;
1429 struct address_space *mapping = inode->i_mapping; 1498 struct address_space *mapping = inode->i_mapping;
1430 struct ocfs2_extent_tree et; 1499 struct ocfs2_extent_tree et;
1500 struct ocfs2_path *path = NULL;
1501 struct ocfs2_extent_list *el = NULL;
1502 struct ocfs2_extent_rec *rec = NULL;
1431 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1503 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1432 u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); 1504 u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
1433 1505
1434 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 1506 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1435 ocfs2_init_dealloc_ctxt(&dealloc); 1507 ocfs2_init_dealloc_ctxt(&dealloc);
@@ -1477,16 +1549,13 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1477 } 1549 }
1478 1550
1479 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1551 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1480 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1552 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
1481 if (trunc_len >= trunc_start) 1553 cluster_in_el = trunc_end;
1482 trunc_len -= trunc_start;
1483 else
1484 trunc_len = 0;
1485 1554
1486 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", 1555 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
1487 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1556 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1488 (unsigned long long)byte_start, 1557 (unsigned long long)byte_start,
1489 (unsigned long long)byte_len, trunc_start, trunc_len); 1558 (unsigned long long)byte_len, trunc_start, trunc_end);
1490 1559
1491 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1560 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1492 if (ret) { 1561 if (ret) {
@@ -1494,32 +1563,79 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1494 goto out; 1563 goto out;
1495 } 1564 }
1496 1565
1497 cpos = trunc_start; 1566 path = ocfs2_new_path_from_et(&et);
1498 while (trunc_len) { 1567 if (!path) {
1499 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, 1568 ret = -ENOMEM;
1500 &alloc_size, &flags); 1569 mlog_errno(ret);
1570 goto out;
1571 }
1572
1573 while (trunc_end > trunc_start) {
1574
1575 ret = ocfs2_find_path(INODE_CACHE(inode), path,
1576 cluster_in_el);
1501 if (ret) { 1577 if (ret) {
1502 mlog_errno(ret); 1578 mlog_errno(ret);
1503 goto out; 1579 goto out;
1504 } 1580 }
1505 1581
1506 if (alloc_size > trunc_len) 1582 el = path_leaf_el(path);
1507 alloc_size = trunc_len; 1583
1584 i = ocfs2_find_rec(el, trunc_end);
1585 /*
1586 * Need to go to previous extent block.
1587 */
1588 if (i < 0) {
1589 if (path->p_tree_depth == 0)
1590 break;
1508 1591
1509 /* Only do work for non-holes */ 1592 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
1510 if (phys_cpos != 0) { 1593 path,
1511 ret = ocfs2_remove_btree_range(inode, &et, cpos, 1594 &cluster_in_el);
1512 phys_cpos, alloc_size,
1513 flags, &dealloc,
1514 refcount_loc);
1515 if (ret) { 1595 if (ret) {
1516 mlog_errno(ret); 1596 mlog_errno(ret);
1517 goto out; 1597 goto out;
1518 } 1598 }
1599
1600 /*
1601 * We've reached the leftmost extent block,
1602 * it's safe to leave.
1603 */
1604 if (cluster_in_el == 0)
1605 break;
1606
1607 /*
1608 * The 'pos' searched for previous extent block is
1609 * always one cluster less than actual trunc_end.
1610 */
1611 trunc_end = cluster_in_el + 1;
1612
1613 ocfs2_reinit_path(path, 1);
1614
1615 continue;
1616
1617 } else
1618 rec = &el->l_recs[i];
1619
1620 ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
1621 &trunc_len, &trunc_end, &blkno, &done);
1622 if (done)
1623 break;
1624
1625 flags = rec->e_flags;
1626 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
1627
1628 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
1629 phys_cpos, trunc_len, flags,
1630 &dealloc, refcount_loc);
1631 if (ret < 0) {
1632 mlog_errno(ret);
1633 goto out;
1519 } 1634 }
1520 1635
1521 cpos += alloc_size; 1636 cluster_in_el = trunc_end;
1522 trunc_len -= alloc_size; 1637
1638 ocfs2_reinit_path(path, 1);
1523 } 1639 }
1524 1640
1525 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); 1641 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);