diff options
author | Zheng Liu <wenqing.lz@taobao.com> | 2013-01-28 09:21:37 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-01-28 09:21:37 -0500 |
commit | 8bad6fc813a3a5300f51369c39d315679fd88c72 (patch) | |
tree | c455b1f18cc19627a9ab1055d57b72042aeae0a0 /fs/ext4/indirect.c | |
parent | 03dafb5f59bd31b3f590329e95434203f0ca6661 (diff) |
ext4: add punching hole support for non-extent-mapped files
This patch add supports for indirect file support punching hole. It
is almost the same as ext4_ext_punch_hole. First, we invalidate all
pages between this hole, and then we try to deallocate all blocks of
this hole.
A recursive function is used to handle deallocation of blocks. In
this function, it iterates over the entries in inode's i_blocks or
indirect blocks, and try to free the block for each one of them.
After applying this patch, xfstest #255 will not pass w/o extent because
indirect-based file doesn't support unwritten extents.
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/indirect.c')
-rw-r--r-- | fs/ext4/indirect.c | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 8d83d1e508e4..bdd20231e66c 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -1518,3 +1518,243 @@ out_stop: | |||
1518 | trace_ext4_truncate_exit(inode); | 1518 | trace_ext4_truncate_exit(inode); |
1519 | } | 1519 | } |
1520 | 1520 | ||
1521 | static int free_hole_blocks(handle_t *handle, struct inode *inode, | ||
1522 | struct buffer_head *parent_bh, __le32 *i_data, | ||
1523 | int level, ext4_lblk_t first, | ||
1524 | ext4_lblk_t count, int max) | ||
1525 | { | ||
1526 | struct buffer_head *bh = NULL; | ||
1527 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
1528 | int ret = 0; | ||
1529 | int i, inc; | ||
1530 | ext4_lblk_t offset; | ||
1531 | __le32 blk; | ||
1532 | |||
1533 | inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level); | ||
1534 | for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) { | ||
1535 | if (offset >= count + first) | ||
1536 | break; | ||
1537 | if (*i_data == 0 || (offset + inc) <= first) | ||
1538 | continue; | ||
1539 | blk = *i_data; | ||
1540 | if (level > 0) { | ||
1541 | ext4_lblk_t first2; | ||
1542 | bh = sb_bread(inode->i_sb, blk); | ||
1543 | if (!bh) { | ||
1544 | EXT4_ERROR_INODE_BLOCK(inode, blk, | ||
1545 | "Read failure"); | ||
1546 | return -EIO; | ||
1547 | } | ||
1548 | first2 = (first > offset) ? first - offset : 0; | ||
1549 | ret = free_hole_blocks(handle, inode, bh, | ||
1550 | (__le32 *)bh->b_data, level - 1, | ||
1551 | first2, count - offset, | ||
1552 | inode->i_sb->s_blocksize >> 2); | ||
1553 | if (ret) { | ||
1554 | brelse(bh); | ||
1555 | goto err; | ||
1556 | } | ||
1557 | } | ||
1558 | if (level == 0 || | ||
1559 | (bh && all_zeroes((__le32 *)bh->b_data, | ||
1560 | (__le32 *)bh->b_data + addr_per_block))) { | ||
1561 | ext4_free_data(handle, inode, parent_bh, &blk, &blk+1); | ||
1562 | *i_data = 0; | ||
1563 | } | ||
1564 | brelse(bh); | ||
1565 | bh = NULL; | ||
1566 | } | ||
1567 | |||
1568 | err: | ||
1569 | return ret; | ||
1570 | } | ||
1571 | |||
1572 | static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, | ||
1573 | ext4_lblk_t first, ext4_lblk_t stop) | ||
1574 | { | ||
1575 | int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
1576 | int level, ret = 0; | ||
1577 | int num = EXT4_NDIR_BLOCKS; | ||
1578 | ext4_lblk_t count, max = EXT4_NDIR_BLOCKS; | ||
1579 | __le32 *i_data = EXT4_I(inode)->i_data; | ||
1580 | |||
1581 | count = stop - first; | ||
1582 | for (level = 0; level < 4; level++, max *= addr_per_block) { | ||
1583 | if (first < max) { | ||
1584 | ret = free_hole_blocks(handle, inode, NULL, i_data, | ||
1585 | level, first, count, num); | ||
1586 | if (ret) | ||
1587 | goto err; | ||
1588 | if (count > max - first) | ||
1589 | count -= max - first; | ||
1590 | else | ||
1591 | break; | ||
1592 | first = 0; | ||
1593 | } else { | ||
1594 | first -= max; | ||
1595 | } | ||
1596 | i_data += num; | ||
1597 | if (level == 0) { | ||
1598 | num = 1; | ||
1599 | max = 1; | ||
1600 | } | ||
1601 | } | ||
1602 | |||
1603 | err: | ||
1604 | return ret; | ||
1605 | } | ||
1606 | |||
1607 | int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
1608 | { | ||
1609 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1610 | struct super_block *sb = inode->i_sb; | ||
1611 | ext4_lblk_t first_block, stop_block; | ||
1612 | struct address_space *mapping = inode->i_mapping; | ||
1613 | handle_t *handle = NULL; | ||
1614 | loff_t first_page, last_page, page_len; | ||
1615 | loff_t first_page_offset, last_page_offset; | ||
1616 | int err = 0; | ||
1617 | |||
1618 | /* | ||
1619 | * Write out all dirty pages to avoid race conditions | ||
1620 | * Then release them. | ||
1621 | */ | ||
1622 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
1623 | err = filemap_write_and_wait_range(mapping, | ||
1624 | offset, offset + length - 1); | ||
1625 | if (err) | ||
1626 | return err; | ||
1627 | } | ||
1628 | |||
1629 | mutex_lock(&inode->i_mutex); | ||
1630 | /* It's not possible punch hole on append only file */ | ||
1631 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
1632 | err = -EPERM; | ||
1633 | goto out_mutex; | ||
1634 | } | ||
1635 | if (IS_SWAPFILE(inode)) { | ||
1636 | err = -ETXTBSY; | ||
1637 | goto out_mutex; | ||
1638 | } | ||
1639 | |||
1640 | /* No need to punch hole beyond i_size */ | ||
1641 | if (offset >= inode->i_size) | ||
1642 | goto out_mutex; | ||
1643 | |||
1644 | /* | ||
1645 | * If the hole extents beyond i_size, set the hole | ||
1646 | * to end after the page that contains i_size | ||
1647 | */ | ||
1648 | if (offset + length > inode->i_size) { | ||
1649 | length = inode->i_size + | ||
1650 | PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) - | ||
1651 | offset; | ||
1652 | } | ||
1653 | |||
1654 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1655 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | ||
1656 | |||
1657 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | ||
1658 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | ||
1659 | |||
1660 | /* Now release the pages */ | ||
1661 | if (last_page_offset > first_page_offset) { | ||
1662 | truncate_pagecache_range(inode, first_page_offset, | ||
1663 | last_page_offset - 1); | ||
1664 | } | ||
1665 | |||
1666 | /* Wait all existing dio works, newcomers will block on i_mutex */ | ||
1667 | inode_dio_wait(inode); | ||
1668 | |||
1669 | handle = start_transaction(inode); | ||
1670 | if (IS_ERR(handle)) | ||
1671 | goto out_mutex; | ||
1672 | |||
1673 | /* | ||
1674 | * Now we need to zero out the non-page-aligned data in the | ||
1675 | * pages at the start and tail of the hole, and unmap the buffer | ||
1676 | * heads for the block aligned regions of the page that were | ||
1677 | * completely zerod. | ||
1678 | */ | ||
1679 | if (first_page > last_page) { | ||
1680 | /* | ||
1681 | * If the file space being truncated is contained within a page | ||
1682 | * just zero out and unmap the middle of that page | ||
1683 | */ | ||
1684 | err = ext4_discard_partial_page_buffers(handle, | ||
1685 | mapping, offset, length, 0); | ||
1686 | if (err) | ||
1687 | goto out; | ||
1688 | } else { | ||
1689 | /* | ||
1690 | * Zero out and unmap the paritial page that contains | ||
1691 | * the start of the hole | ||
1692 | */ | ||
1693 | page_len = first_page_offset - offset; | ||
1694 | if (page_len > 0) { | ||
1695 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
1696 | offset, page_len, 0); | ||
1697 | if (err) | ||
1698 | goto out; | ||
1699 | } | ||
1700 | |||
1701 | /* | ||
1702 | * Zero out and unmap the partial page that contains | ||
1703 | * the end of the hole | ||
1704 | */ | ||
1705 | page_len = offset + length - last_page_offset; | ||
1706 | if (page_len > 0) { | ||
1707 | err = ext4_discard_partial_page_buffers(handle, mapping, | ||
1708 | last_page_offset, page_len, 0); | ||
1709 | if (err) | ||
1710 | goto out; | ||
1711 | } | ||
1712 | } | ||
1713 | |||
1714 | /* | ||
1715 | * If i_size contained in the last page, we need to | ||
1716 | * unmap and zero the paritial page after i_size | ||
1717 | */ | ||
1718 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
1719 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
1720 | page_len = PAGE_CACHE_SIZE - | ||
1721 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
1722 | if (page_len > 0) { | ||
1723 | err = ext4_discard_partial_page_buffers(handle, | ||
1724 | mapping, inode->i_size, page_len, 0); | ||
1725 | if (err) | ||
1726 | goto out; | ||
1727 | } | ||
1728 | } | ||
1729 | |||
1730 | first_block = (offset + sb->s_blocksize - 1) >> | ||
1731 | EXT4_BLOCK_SIZE_BITS(sb); | ||
1732 | stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
1733 | |||
1734 | if (first_block >= stop_block) | ||
1735 | goto out; | ||
1736 | |||
1737 | down_write(&EXT4_I(inode)->i_data_sem); | ||
1738 | ext4_discard_preallocations(inode); | ||
1739 | |||
1740 | err = ext4_es_remove_extent(inode, first_block, | ||
1741 | stop_block - first_block); | ||
1742 | err = ext4_free_hole_blocks(handle, inode, first_block, stop_block); | ||
1743 | |||
1744 | ext4_discard_preallocations(inode); | ||
1745 | |||
1746 | if (IS_SYNC(inode)) | ||
1747 | ext4_handle_sync(handle); | ||
1748 | |||
1749 | up_write(&EXT4_I(inode)->i_data_sem); | ||
1750 | |||
1751 | out: | ||
1752 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
1753 | ext4_mark_inode_dirty(handle, inode); | ||
1754 | ext4_journal_stop(handle); | ||
1755 | |||
1756 | out_mutex: | ||
1757 | mutex_unlock(&inode->i_mutex); | ||
1758 | |||
1759 | return err; | ||
1760 | } | ||