aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2012-08-29 14:27:18 -0400
committerChris Mason <chris.mason@fusionio.com>2012-10-01 15:19:07 -0400
commit2aaa66558172b017f36bf38ae69372813dedee9d (patch)
treed6150a615935d36166cd06d69e3252451e62e724 /fs/btrfs/file.c
parent2671485d395c07fca104c972785898d7c52fc942 (diff)
Btrfs: add hole punching
This patch adds hole punching via fallocate. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c332
1 files changed, 328 insertions, 4 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 58598c249951..57026a6e9c94 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -39,6 +39,7 @@
39#include "tree-log.h" 39#include "tree-log.h"
40#include "locking.h" 40#include "locking.h"
41#include "compat.h" 41#include "compat.h"
42#include "volumes.h"
42 43
43/* 44/*
44 * when auto defrag is enabled we 45 * when auto defrag is enabled we
@@ -584,7 +585,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
584int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 585int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
585 struct btrfs_root *root, struct inode *inode, 586 struct btrfs_root *root, struct inode *inode,
586 struct btrfs_path *path, u64 start, u64 end, 587 struct btrfs_path *path, u64 start, u64 end,
587 int drop_cache) 588 u64 *drop_end, int drop_cache)
588{ 589{
589 struct extent_buffer *leaf; 590 struct extent_buffer *leaf;
590 struct btrfs_file_extent_item *fi; 591 struct btrfs_file_extent_item *fi;
@@ -822,6 +823,8 @@ next_slot:
822 btrfs_abort_transaction(trans, root, ret); 823 btrfs_abort_transaction(trans, root, ret);
823 } 824 }
824 825
826 if (drop_end)
827 *drop_end = min(end, extent_end);
825 btrfs_release_path(path); 828 btrfs_release_path(path);
826 return ret; 829 return ret;
827} 830}
@@ -836,7 +839,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
836 path = btrfs_alloc_path(); 839 path = btrfs_alloc_path();
837 if (!path) 840 if (!path)
838 return -ENOMEM; 841 return -ENOMEM;
839 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, 842 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
840 drop_cache); 843 drop_cache);
841 btrfs_free_path(path); 844 btrfs_free_path(path);
842 return ret; 845 return ret;
@@ -1645,6 +1648,324 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
1645 return 0; 1648 return 0;
1646} 1649}
1647 1650
1651static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
1652 int slot, u64 start, u64 end)
1653{
1654 struct btrfs_file_extent_item *fi;
1655 struct btrfs_key key;
1656
1657 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
1658 return 0;
1659
1660 btrfs_item_key_to_cpu(leaf, &key, slot);
1661 if (key.objectid != btrfs_ino(inode) ||
1662 key.type != BTRFS_EXTENT_DATA_KEY)
1663 return 0;
1664
1665 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1666
1667 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
1668 return 0;
1669
1670 if (btrfs_file_extent_disk_bytenr(leaf, fi))
1671 return 0;
1672
1673 if (key.offset == end)
1674 return 1;
1675 if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
1676 return 1;
1677 return 0;
1678}
1679
1680static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
1681 struct btrfs_path *path, u64 offset, u64 end)
1682{
1683 struct btrfs_root *root = BTRFS_I(inode)->root;
1684 struct extent_buffer *leaf;
1685 struct btrfs_file_extent_item *fi;
1686 struct extent_map *hole_em;
1687 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1688 struct btrfs_key key;
1689 int ret;
1690
1691 key.objectid = btrfs_ino(inode);
1692 key.type = BTRFS_EXTENT_DATA_KEY;
1693 key.offset = offset;
1694
1695
1696 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1697 if (ret < 0)
1698 return ret;
1699 BUG_ON(!ret);
1700
1701 leaf = path->nodes[0];
1702 if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
1703 u64 num_bytes;
1704
1705 path->slots[0]--;
1706 fi = btrfs_item_ptr(leaf, path->slots[0],
1707 struct btrfs_file_extent_item);
1708 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
1709 end - offset;
1710 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
1711 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
1712 btrfs_set_file_extent_offset(leaf, fi, 0);
1713 btrfs_mark_buffer_dirty(leaf);
1714 goto out;
1715 }
1716
1717 if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
1718 u64 num_bytes;
1719
1720 path->slots[0]++;
1721 key.offset = offset;
1722 btrfs_set_item_key_safe(trans, root, path, &key);
1723 fi = btrfs_item_ptr(leaf, path->slots[0],
1724 struct btrfs_file_extent_item);
1725 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
1726 offset;
1727 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
1728 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
1729 btrfs_set_file_extent_offset(leaf, fi, 0);
1730 btrfs_mark_buffer_dirty(leaf);
1731 goto out;
1732 }
1733 btrfs_release_path(path);
1734
1735 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
1736 0, 0, end - offset, 0, end - offset,
1737 0, 0, 0);
1738 if (ret)
1739 return ret;
1740
1741out:
1742 btrfs_release_path(path);
1743
1744 hole_em = alloc_extent_map();
1745 if (!hole_em) {
1746 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
1747 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1748 &BTRFS_I(inode)->runtime_flags);
1749 } else {
1750 hole_em->start = offset;
1751 hole_em->len = end - offset;
1752 hole_em->orig_start = offset;
1753
1754 hole_em->block_start = EXTENT_MAP_HOLE;
1755 hole_em->block_len = 0;
1756 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
1757 hole_em->compress_type = BTRFS_COMPRESS_NONE;
1758 hole_em->generation = trans->transid;
1759
1760 do {
1761 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
1762 write_lock(&em_tree->lock);
1763 ret = add_extent_mapping(em_tree, hole_em);
1764 if (!ret)
1765 list_move(&hole_em->list,
1766 &em_tree->modified_extents);
1767 write_unlock(&em_tree->lock);
1768 } while (ret == -EEXIST);
1769 free_extent_map(hole_em);
1770 if (ret)
1771 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1772 &BTRFS_I(inode)->runtime_flags);
1773 }
1774
1775 return 0;
1776}
1777
1778static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1779{
1780 struct btrfs_root *root = BTRFS_I(inode)->root;
1781 struct extent_state *cached_state = NULL;
1782 struct btrfs_path *path;
1783 struct btrfs_block_rsv *rsv;
1784 struct btrfs_trans_handle *trans;
1785 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
1786 u64 lockstart = (offset + mask) & ~mask;
1787 u64 lockend = ((offset + len) & ~mask) - 1;
1788 u64 cur_offset = lockstart;
1789 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
1790 u64 drop_end;
1791 unsigned long nr;
1792 int ret = 0;
1793 int err = 0;
1794 bool same_page = (offset >> PAGE_CACHE_SHIFT) ==
1795 ((offset + len) >> PAGE_CACHE_SHIFT);
1796
1797 btrfs_wait_ordered_range(inode, offset, len);
1798
1799 mutex_lock(&inode->i_mutex);
1800 if (offset >= inode->i_size) {
1801 mutex_unlock(&inode->i_mutex);
1802 return 0;
1803 }
1804
1805 /*
1806 * Only do this if we are in the same page and we aren't doing the
1807 * entire page.
1808 */
1809 if (same_page && len < PAGE_CACHE_SIZE) {
1810 ret = btrfs_truncate_page(inode, offset, len, 0);
1811 mutex_unlock(&inode->i_mutex);
1812 return ret;
1813 }
1814
1815 /* zero back part of the first page */
1816 ret = btrfs_truncate_page(inode, offset, 0, 0);
1817 if (ret) {
1818 mutex_unlock(&inode->i_mutex);
1819 return ret;
1820 }
1821
1822 /* zero the front end of the last page */
1823 ret = btrfs_truncate_page(inode, offset + len, 0, 1);
1824 if (ret) {
1825 mutex_unlock(&inode->i_mutex);
1826 return ret;
1827 }
1828
1829 if (lockend < lockstart) {
1830 mutex_unlock(&inode->i_mutex);
1831 return 0;
1832 }
1833
1834 while (1) {
1835 struct btrfs_ordered_extent *ordered;
1836
1837 truncate_pagecache_range(inode, lockstart, lockend);
1838
1839 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
1840 0, &cached_state);
1841 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
1842
1843 /*
1844 * We need to make sure we have no ordered extents in this range
1845 * and nobody raced in and read a page in this range, if we did
1846 * we need to try again.
1847 */
1848 if ((!ordered ||
1849 (ordered->file_offset + ordered->len < lockstart ||
1850 ordered->file_offset > lockend)) &&
1851 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
1852 lockend, EXTENT_UPTODATE, 0,
1853 cached_state)) {
1854 if (ordered)
1855 btrfs_put_ordered_extent(ordered);
1856 break;
1857 }
1858 if (ordered)
1859 btrfs_put_ordered_extent(ordered);
1860 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
1861 lockend, &cached_state, GFP_NOFS);
1862 btrfs_wait_ordered_range(inode, lockstart,
1863 lockend - lockstart + 1);
1864 }
1865
1866 path = btrfs_alloc_path();
1867 if (!path) {
1868 ret = -ENOMEM;
1869 goto out;
1870 }
1871
1872 rsv = btrfs_alloc_block_rsv(root);
1873 if (!rsv) {
1874 ret = -ENOMEM;
1875 goto out_free;
1876 }
1877 rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
1878 rsv->failfast = 1;
1879
1880 /*
1881 * 1 - update the inode
1882 * 1 - removing the extents in the range
1883 * 1 - adding the hole extent
1884 */
1885 trans = btrfs_start_transaction(root, 3);
1886 if (IS_ERR(trans)) {
1887 err = PTR_ERR(trans);
1888 goto out_free;
1889 }
1890
1891 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
1892 min_size);
1893 BUG_ON(ret);
1894 trans->block_rsv = rsv;
1895
1896 while (cur_offset < lockend) {
1897 ret = __btrfs_drop_extents(trans, root, inode, path,
1898 cur_offset, lockend + 1,
1899 &drop_end, 1);
1900 if (ret != -ENOSPC)
1901 break;
1902
1903 trans->block_rsv = &root->fs_info->trans_block_rsv;
1904
1905 ret = fill_holes(trans, inode, path, cur_offset, drop_end);
1906 if (ret) {
1907 err = ret;
1908 break;
1909 }
1910
1911 cur_offset = drop_end;
1912
1913 ret = btrfs_update_inode(trans, root, inode);
1914 if (ret) {
1915 err = ret;
1916 break;
1917 }
1918
1919 nr = trans->blocks_used;
1920 btrfs_end_transaction(trans, root);
1921 btrfs_btree_balance_dirty(root, nr);
1922
1923 trans = btrfs_start_transaction(root, 3);
1924 if (IS_ERR(trans)) {
1925 ret = PTR_ERR(trans);
1926 trans = NULL;
1927 break;
1928 }
1929
1930 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
1931 rsv, min_size);
1932 BUG_ON(ret); /* shouldn't happen */
1933 trans->block_rsv = rsv;
1934 }
1935
1936 if (ret) {
1937 err = ret;
1938 goto out_trans;
1939 }
1940
1941 trans->block_rsv = &root->fs_info->trans_block_rsv;
1942 ret = fill_holes(trans, inode, path, cur_offset, drop_end);
1943 if (ret) {
1944 err = ret;
1945 goto out_trans;
1946 }
1947
1948out_trans:
1949 if (!trans)
1950 goto out_free;
1951
1952 trans->block_rsv = &root->fs_info->trans_block_rsv;
1953 ret = btrfs_update_inode(trans, root, inode);
1954 nr = trans->blocks_used;
1955 btrfs_end_transaction(trans, root);
1956 btrfs_btree_balance_dirty(root, nr);
1957out_free:
1958 btrfs_free_path(path);
1959 btrfs_free_block_rsv(root, rsv);
1960out:
1961 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
1962 &cached_state, GFP_NOFS);
1963 mutex_unlock(&inode->i_mutex);
1964 if (ret && !err)
1965 err = ret;
1966 return err;
1967}
1968
1648static long btrfs_fallocate(struct file *file, int mode, 1969static long btrfs_fallocate(struct file *file, int mode,
1649 loff_t offset, loff_t len) 1970 loff_t offset, loff_t len)
1650{ 1971{
@@ -1663,10 +1984,13 @@ static long btrfs_fallocate(struct file *file, int mode,
1663 alloc_start = offset & ~mask; 1984 alloc_start = offset & ~mask;
1664 alloc_end = (offset + len + mask) & ~mask; 1985 alloc_end = (offset + len + mask) & ~mask;
1665 1986
1666 /* We only support the FALLOC_FL_KEEP_SIZE mode */ 1987 /* Make sure we aren't being give some crap mode */
1667 if (mode & ~FALLOC_FL_KEEP_SIZE) 1988 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
1668 return -EOPNOTSUPP; 1989 return -EOPNOTSUPP;
1669 1990
1991 if (mode & FALLOC_FL_PUNCH_HOLE)
1992 return btrfs_punch_hole(inode, offset, len);
1993
1670 /* 1994 /*
1671 * Make sure we have enough space before we do the 1995 * Make sure we have enough space before we do the
1672 * allocation. 1996 * allocation.