diff options
author | Josef Bacik <jbacik@fusionio.com> | 2012-08-29 14:27:18 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2012-10-01 15:19:07 -0400 |
commit | 2aaa66558172b017f36bf38ae69372813dedee9d (patch) | |
tree | d6150a615935d36166cd06d69e3252451e62e724 /fs/btrfs/file.c | |
parent | 2671485d395c07fca104c972785898d7c52fc942 (diff) |
Btrfs: add hole punching
This patch adds hole punching via fallocate. Thanks,
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 332 |
1 files changed, 328 insertions, 4 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 58598c249951..57026a6e9c94 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "tree-log.h" | 39 | #include "tree-log.h" |
40 | #include "locking.h" | 40 | #include "locking.h" |
41 | #include "compat.h" | 41 | #include "compat.h" |
42 | #include "volumes.h" | ||
42 | 43 | ||
43 | /* | 44 | /* |
44 | * when auto defrag is enabled we | 45 | * when auto defrag is enabled we |
@@ -584,7 +585,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
584 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | 585 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
585 | struct btrfs_root *root, struct inode *inode, | 586 | struct btrfs_root *root, struct inode *inode, |
586 | struct btrfs_path *path, u64 start, u64 end, | 587 | struct btrfs_path *path, u64 start, u64 end, |
587 | int drop_cache) | 588 | u64 *drop_end, int drop_cache) |
588 | { | 589 | { |
589 | struct extent_buffer *leaf; | 590 | struct extent_buffer *leaf; |
590 | struct btrfs_file_extent_item *fi; | 591 | struct btrfs_file_extent_item *fi; |
@@ -822,6 +823,8 @@ next_slot: | |||
822 | btrfs_abort_transaction(trans, root, ret); | 823 | btrfs_abort_transaction(trans, root, ret); |
823 | } | 824 | } |
824 | 825 | ||
826 | if (drop_end) | ||
827 | *drop_end = min(end, extent_end); | ||
825 | btrfs_release_path(path); | 828 | btrfs_release_path(path); |
826 | return ret; | 829 | return ret; |
827 | } | 830 | } |
@@ -836,7 +839,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
836 | path = btrfs_alloc_path(); | 839 | path = btrfs_alloc_path(); |
837 | if (!path) | 840 | if (!path) |
838 | return -ENOMEM; | 841 | return -ENOMEM; |
839 | ret = __btrfs_drop_extents(trans, root, inode, path, start, end, | 842 | ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, |
840 | drop_cache); | 843 | drop_cache); |
841 | btrfs_free_path(path); | 844 | btrfs_free_path(path); |
842 | return ret; | 845 | return ret; |
@@ -1645,6 +1648,324 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1645 | return 0; | 1648 | return 0; |
1646 | } | 1649 | } |
1647 | 1650 | ||
1651 | static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf, | ||
1652 | int slot, u64 start, u64 end) | ||
1653 | { | ||
1654 | struct btrfs_file_extent_item *fi; | ||
1655 | struct btrfs_key key; | ||
1656 | |||
1657 | if (slot < 0 || slot >= btrfs_header_nritems(leaf)) | ||
1658 | return 0; | ||
1659 | |||
1660 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
1661 | if (key.objectid != btrfs_ino(inode) || | ||
1662 | key.type != BTRFS_EXTENT_DATA_KEY) | ||
1663 | return 0; | ||
1664 | |||
1665 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
1666 | |||
1667 | if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) | ||
1668 | return 0; | ||
1669 | |||
1670 | if (btrfs_file_extent_disk_bytenr(leaf, fi)) | ||
1671 | return 0; | ||
1672 | |||
1673 | if (key.offset == end) | ||
1674 | return 1; | ||
1675 | if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start) | ||
1676 | return 1; | ||
1677 | return 0; | ||
1678 | } | ||
1679 | |||
1680 | static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode, | ||
1681 | struct btrfs_path *path, u64 offset, u64 end) | ||
1682 | { | ||
1683 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1684 | struct extent_buffer *leaf; | ||
1685 | struct btrfs_file_extent_item *fi; | ||
1686 | struct extent_map *hole_em; | ||
1687 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
1688 | struct btrfs_key key; | ||
1689 | int ret; | ||
1690 | |||
1691 | key.objectid = btrfs_ino(inode); | ||
1692 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
1693 | key.offset = offset; | ||
1694 | |||
1695 | |||
1696 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | ||
1697 | if (ret < 0) | ||
1698 | return ret; | ||
1699 | BUG_ON(!ret); | ||
1700 | |||
1701 | leaf = path->nodes[0]; | ||
1702 | if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) { | ||
1703 | u64 num_bytes; | ||
1704 | |||
1705 | path->slots[0]--; | ||
1706 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
1707 | struct btrfs_file_extent_item); | ||
1708 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + | ||
1709 | end - offset; | ||
1710 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
1711 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
1712 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
1713 | btrfs_mark_buffer_dirty(leaf); | ||
1714 | goto out; | ||
1715 | } | ||
1716 | |||
1717 | if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { | ||
1718 | u64 num_bytes; | ||
1719 | |||
1720 | path->slots[0]++; | ||
1721 | key.offset = offset; | ||
1722 | btrfs_set_item_key_safe(trans, root, path, &key); | ||
1723 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
1724 | struct btrfs_file_extent_item); | ||
1725 | num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - | ||
1726 | offset; | ||
1727 | btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); | ||
1728 | btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); | ||
1729 | btrfs_set_file_extent_offset(leaf, fi, 0); | ||
1730 | btrfs_mark_buffer_dirty(leaf); | ||
1731 | goto out; | ||
1732 | } | ||
1733 | btrfs_release_path(path); | ||
1734 | |||
1735 | ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, | ||
1736 | 0, 0, end - offset, 0, end - offset, | ||
1737 | 0, 0, 0); | ||
1738 | if (ret) | ||
1739 | return ret; | ||
1740 | |||
1741 | out: | ||
1742 | btrfs_release_path(path); | ||
1743 | |||
1744 | hole_em = alloc_extent_map(); | ||
1745 | if (!hole_em) { | ||
1746 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
1747 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
1748 | &BTRFS_I(inode)->runtime_flags); | ||
1749 | } else { | ||
1750 | hole_em->start = offset; | ||
1751 | hole_em->len = end - offset; | ||
1752 | hole_em->orig_start = offset; | ||
1753 | |||
1754 | hole_em->block_start = EXTENT_MAP_HOLE; | ||
1755 | hole_em->block_len = 0; | ||
1756 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
1757 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | ||
1758 | hole_em->generation = trans->transid; | ||
1759 | |||
1760 | do { | ||
1761 | btrfs_drop_extent_cache(inode, offset, end - 1, 0); | ||
1762 | write_lock(&em_tree->lock); | ||
1763 | ret = add_extent_mapping(em_tree, hole_em); | ||
1764 | if (!ret) | ||
1765 | list_move(&hole_em->list, | ||
1766 | &em_tree->modified_extents); | ||
1767 | write_unlock(&em_tree->lock); | ||
1768 | } while (ret == -EEXIST); | ||
1769 | free_extent_map(hole_em); | ||
1770 | if (ret) | ||
1771 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
1772 | &BTRFS_I(inode)->runtime_flags); | ||
1773 | } | ||
1774 | |||
1775 | return 0; | ||
1776 | } | ||
1777 | |||
1778 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | ||
1779 | { | ||
1780 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1781 | struct extent_state *cached_state = NULL; | ||
1782 | struct btrfs_path *path; | ||
1783 | struct btrfs_block_rsv *rsv; | ||
1784 | struct btrfs_trans_handle *trans; | ||
1785 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1786 | u64 lockstart = (offset + mask) & ~mask; | ||
1787 | u64 lockend = ((offset + len) & ~mask) - 1; | ||
1788 | u64 cur_offset = lockstart; | ||
1789 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
1790 | u64 drop_end; | ||
1791 | unsigned long nr; | ||
1792 | int ret = 0; | ||
1793 | int err = 0; | ||
1794 | bool same_page = (offset >> PAGE_CACHE_SHIFT) == | ||
1795 | ((offset + len) >> PAGE_CACHE_SHIFT); | ||
1796 | |||
1797 | btrfs_wait_ordered_range(inode, offset, len); | ||
1798 | |||
1799 | mutex_lock(&inode->i_mutex); | ||
1800 | if (offset >= inode->i_size) { | ||
1801 | mutex_unlock(&inode->i_mutex); | ||
1802 | return 0; | ||
1803 | } | ||
1804 | |||
1805 | /* | ||
1806 | * Only do this if we are in the same page and we aren't doing the | ||
1807 | * entire page. | ||
1808 | */ | ||
1809 | if (same_page && len < PAGE_CACHE_SIZE) { | ||
1810 | ret = btrfs_truncate_page(inode, offset, len, 0); | ||
1811 | mutex_unlock(&inode->i_mutex); | ||
1812 | return ret; | ||
1813 | } | ||
1814 | |||
1815 | /* zero back part of the first page */ | ||
1816 | ret = btrfs_truncate_page(inode, offset, 0, 0); | ||
1817 | if (ret) { | ||
1818 | mutex_unlock(&inode->i_mutex); | ||
1819 | return ret; | ||
1820 | } | ||
1821 | |||
1822 | /* zero the front end of the last page */ | ||
1823 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | ||
1824 | if (ret) { | ||
1825 | mutex_unlock(&inode->i_mutex); | ||
1826 | return ret; | ||
1827 | } | ||
1828 | |||
1829 | if (lockend < lockstart) { | ||
1830 | mutex_unlock(&inode->i_mutex); | ||
1831 | return 0; | ||
1832 | } | ||
1833 | |||
1834 | while (1) { | ||
1835 | struct btrfs_ordered_extent *ordered; | ||
1836 | |||
1837 | truncate_pagecache_range(inode, lockstart, lockend); | ||
1838 | |||
1839 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
1840 | 0, &cached_state); | ||
1841 | ordered = btrfs_lookup_first_ordered_extent(inode, lockend); | ||
1842 | |||
1843 | /* | ||
1844 | * We need to make sure we have no ordered extents in this range | ||
1845 | * and nobody raced in and read a page in this range, if we did | ||
1846 | * we need to try again. | ||
1847 | */ | ||
1848 | if ((!ordered || | ||
1849 | (ordered->file_offset + ordered->len < lockstart || | ||
1850 | ordered->file_offset > lockend)) && | ||
1851 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
1852 | lockend, EXTENT_UPTODATE, 0, | ||
1853 | cached_state)) { | ||
1854 | if (ordered) | ||
1855 | btrfs_put_ordered_extent(ordered); | ||
1856 | break; | ||
1857 | } | ||
1858 | if (ordered) | ||
1859 | btrfs_put_ordered_extent(ordered); | ||
1860 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, | ||
1861 | lockend, &cached_state, GFP_NOFS); | ||
1862 | btrfs_wait_ordered_range(inode, lockstart, | ||
1863 | lockend - lockstart + 1); | ||
1864 | } | ||
1865 | |||
1866 | path = btrfs_alloc_path(); | ||
1867 | if (!path) { | ||
1868 | ret = -ENOMEM; | ||
1869 | goto out; | ||
1870 | } | ||
1871 | |||
1872 | rsv = btrfs_alloc_block_rsv(root); | ||
1873 | if (!rsv) { | ||
1874 | ret = -ENOMEM; | ||
1875 | goto out_free; | ||
1876 | } | ||
1877 | rsv->size = btrfs_calc_trunc_metadata_size(root, 1); | ||
1878 | rsv->failfast = 1; | ||
1879 | |||
1880 | /* | ||
1881 | * 1 - update the inode | ||
1882 | * 1 - removing the extents in the range | ||
1883 | * 1 - adding the hole extent | ||
1884 | */ | ||
1885 | trans = btrfs_start_transaction(root, 3); | ||
1886 | if (IS_ERR(trans)) { | ||
1887 | err = PTR_ERR(trans); | ||
1888 | goto out_free; | ||
1889 | } | ||
1890 | |||
1891 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, | ||
1892 | min_size); | ||
1893 | BUG_ON(ret); | ||
1894 | trans->block_rsv = rsv; | ||
1895 | |||
1896 | while (cur_offset < lockend) { | ||
1897 | ret = __btrfs_drop_extents(trans, root, inode, path, | ||
1898 | cur_offset, lockend + 1, | ||
1899 | &drop_end, 1); | ||
1900 | if (ret != -ENOSPC) | ||
1901 | break; | ||
1902 | |||
1903 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
1904 | |||
1905 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
1906 | if (ret) { | ||
1907 | err = ret; | ||
1908 | break; | ||
1909 | } | ||
1910 | |||
1911 | cur_offset = drop_end; | ||
1912 | |||
1913 | ret = btrfs_update_inode(trans, root, inode); | ||
1914 | if (ret) { | ||
1915 | err = ret; | ||
1916 | break; | ||
1917 | } | ||
1918 | |||
1919 | nr = trans->blocks_used; | ||
1920 | btrfs_end_transaction(trans, root); | ||
1921 | btrfs_btree_balance_dirty(root, nr); | ||
1922 | |||
1923 | trans = btrfs_start_transaction(root, 3); | ||
1924 | if (IS_ERR(trans)) { | ||
1925 | ret = PTR_ERR(trans); | ||
1926 | trans = NULL; | ||
1927 | break; | ||
1928 | } | ||
1929 | |||
1930 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, | ||
1931 | rsv, min_size); | ||
1932 | BUG_ON(ret); /* shouldn't happen */ | ||
1933 | trans->block_rsv = rsv; | ||
1934 | } | ||
1935 | |||
1936 | if (ret) { | ||
1937 | err = ret; | ||
1938 | goto out_trans; | ||
1939 | } | ||
1940 | |||
1941 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
1942 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | ||
1943 | if (ret) { | ||
1944 | err = ret; | ||
1945 | goto out_trans; | ||
1946 | } | ||
1947 | |||
1948 | out_trans: | ||
1949 | if (!trans) | ||
1950 | goto out_free; | ||
1951 | |||
1952 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
1953 | ret = btrfs_update_inode(trans, root, inode); | ||
1954 | nr = trans->blocks_used; | ||
1955 | btrfs_end_transaction(trans, root); | ||
1956 | btrfs_btree_balance_dirty(root, nr); | ||
1957 | out_free: | ||
1958 | btrfs_free_path(path); | ||
1959 | btrfs_free_block_rsv(root, rsv); | ||
1960 | out: | ||
1961 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
1962 | &cached_state, GFP_NOFS); | ||
1963 | mutex_unlock(&inode->i_mutex); | ||
1964 | if (ret && !err) | ||
1965 | err = ret; | ||
1966 | return err; | ||
1967 | } | ||
1968 | |||
1648 | static long btrfs_fallocate(struct file *file, int mode, | 1969 | static long btrfs_fallocate(struct file *file, int mode, |
1649 | loff_t offset, loff_t len) | 1970 | loff_t offset, loff_t len) |
1650 | { | 1971 | { |
@@ -1663,10 +1984,13 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1663 | alloc_start = offset & ~mask; | 1984 | alloc_start = offset & ~mask; |
1664 | alloc_end = (offset + len + mask) & ~mask; | 1985 | alloc_end = (offset + len + mask) & ~mask; |
1665 | 1986 | ||
1666 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 1987 | /* Make sure we aren't being give some crap mode */ |
1667 | if (mode & ~FALLOC_FL_KEEP_SIZE) | 1988 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
1668 | return -EOPNOTSUPP; | 1989 | return -EOPNOTSUPP; |
1669 | 1990 | ||
1991 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
1992 | return btrfs_punch_hole(inode, offset, len); | ||
1993 | |||
1670 | /* | 1994 | /* |
1671 | * Make sure we have enough space before we do the | 1995 | * Make sure we have enough space before we do the |
1672 | * allocation. | 1996 | * allocation. |