aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-06 23:03:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-06 23:03:41 -0500
commit6a6662ced4153f6dbcfc40d7225c3cc45416039c (patch)
tree77ad5d577333f02cd854e44827a407dd0388d4eb /fs/btrfs/inode.c
parent32aaeffbd4a7457bf2f7448b33b5946ff2a960eb (diff)
parent7c7e82a77fe3d89ae50824aa7c897454675eb4c4 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (114 commits) Btrfs: check for a null fs root when writing to the backup root log Btrfs: fix race during transaction joins Btrfs: fix a potential btrfs_bio leak on scrub fixups Btrfs: rename btrfs_bio multi -> bbio for consistency Btrfs: stop leaking btrfs_bios on readahead Btrfs: stop the readahead threads on failed mount Btrfs: fix extent_buffer leak in the metadata IO error handling Btrfs: fix the new inspection ioctls for 32 bit compat Btrfs: fix delayed insertion reservation Btrfs: ClearPageError during writepage and clean_tree_block Btrfs: be smarter about committing the transaction in reserve_metadata_bytes Btrfs: make a delayed_block_rsv for the delayed item insertion Btrfs: add a log of past tree roots btrfs: separate superblock items out of fs_info Btrfs: use the global reserve when truncating the free space cache inode Btrfs: release metadata from global reserve if we have to fallback for unlink Btrfs: make sure to flush queued bios if write_cache_pages waits Btrfs: fix extent pinning bugs in the tree log Btrfs: make sure btrfs_remove_free_space doesn't leak EAGAIN Btrfs: don't wait as long for more batches during SSD log commit ...
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c457
1 files changed, 147 insertions, 310 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 75686a61bd45..966ddcc4c63d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -45,10 +45,10 @@
45#include "btrfs_inode.h" 45#include "btrfs_inode.h"
46#include "ioctl.h" 46#include "ioctl.h"
47#include "print-tree.h" 47#include "print-tree.h"
48#include "volumes.h"
49#include "ordered-data.h" 48#include "ordered-data.h"
50#include "xattr.h" 49#include "xattr.h"
51#include "tree-log.h" 50#include "tree-log.h"
51#include "volumes.h"
52#include "compression.h" 52#include "compression.h"
53#include "locking.h" 53#include "locking.h"
54#include "free-space-cache.h" 54#include "free-space-cache.h"
@@ -393,7 +393,10 @@ again:
393 (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { 393 (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
394 WARN_ON(pages); 394 WARN_ON(pages);
395 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 395 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
396 BUG_ON(!pages); 396 if (!pages) {
397 /* just bail out to the uncompressed code */
398 goto cont;
399 }
397 400
398 if (BTRFS_I(inode)->force_compress) 401 if (BTRFS_I(inode)->force_compress)
399 compress_type = BTRFS_I(inode)->force_compress; 402 compress_type = BTRFS_I(inode)->force_compress;
@@ -424,6 +427,7 @@ again:
424 will_compress = 1; 427 will_compress = 1;
425 } 428 }
426 } 429 }
430cont:
427 if (start == 0) { 431 if (start == 0) {
428 trans = btrfs_join_transaction(root); 432 trans = btrfs_join_transaction(root);
429 BUG_ON(IS_ERR(trans)); 433 BUG_ON(IS_ERR(trans));
@@ -820,7 +824,7 @@ static noinline int cow_file_range(struct inode *inode,
820 } 824 }
821 825
822 BUG_ON(disk_num_bytes > 826 BUG_ON(disk_num_bytes >
823 btrfs_super_total_bytes(&root->fs_info->super_copy)); 827 btrfs_super_total_bytes(root->fs_info->super_copy));
824 828
825 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); 829 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
826 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 830 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
@@ -1792,12 +1796,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1792 } 1796 }
1793 ret = 0; 1797 ret = 0;
1794out: 1798out:
1795 if (nolock) { 1799 if (root != root->fs_info->tree_root)
1796 if (trans)
1797 btrfs_end_transaction_nolock(trans, root);
1798 } else {
1799 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1800 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1800 if (trans) 1801 if (trans) {
1802 if (nolock)
1803 btrfs_end_transaction_nolock(trans, root);
1804 else
1801 btrfs_end_transaction(trans, root); 1805 btrfs_end_transaction(trans, root);
1802 } 1806 }
1803 1807
@@ -1819,153 +1823,9 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1819} 1823}
1820 1824
1821/* 1825/*
1822 * When IO fails, either with EIO or csum verification fails, we
1823 * try other mirrors that might have a good copy of the data. This
1824 * io_failure_record is used to record state as we go through all the
1825 * mirrors. If another mirror has good data, the page is set up to date
1826 * and things continue. If a good mirror can't be found, the original
1827 * bio end_io callback is called to indicate things have failed.
1828 */
1829struct io_failure_record {
1830 struct page *page;
1831 u64 start;
1832 u64 len;
1833 u64 logical;
1834 unsigned long bio_flags;
1835 int last_mirror;
1836};
1837
1838static int btrfs_io_failed_hook(struct bio *failed_bio,
1839 struct page *page, u64 start, u64 end,
1840 struct extent_state *state)
1841{
1842 struct io_failure_record *failrec = NULL;
1843 u64 private;
1844 struct extent_map *em;
1845 struct inode *inode = page->mapping->host;
1846 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1847 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1848 struct bio *bio;
1849 int num_copies;
1850 int ret;
1851 int rw;
1852 u64 logical;
1853
1854 ret = get_state_private(failure_tree, start, &private);
1855 if (ret) {
1856 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
1857 if (!failrec)
1858 return -ENOMEM;
1859 failrec->start = start;
1860 failrec->len = end - start + 1;
1861 failrec->last_mirror = 0;
1862 failrec->bio_flags = 0;
1863
1864 read_lock(&em_tree->lock);
1865 em = lookup_extent_mapping(em_tree, start, failrec->len);
1866 if (em->start > start || em->start + em->len < start) {
1867 free_extent_map(em);
1868 em = NULL;
1869 }
1870 read_unlock(&em_tree->lock);
1871
1872 if (IS_ERR_OR_NULL(em)) {
1873 kfree(failrec);
1874 return -EIO;
1875 }
1876 logical = start - em->start;
1877 logical = em->block_start + logical;
1878 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
1879 logical = em->block_start;
1880 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
1881 extent_set_compress_type(&failrec->bio_flags,
1882 em->compress_type);
1883 }
1884 failrec->logical = logical;
1885 free_extent_map(em);
1886 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
1887 EXTENT_DIRTY, GFP_NOFS);
1888 set_state_private(failure_tree, start,
1889 (u64)(unsigned long)failrec);
1890 } else {
1891 failrec = (struct io_failure_record *)(unsigned long)private;
1892 }
1893 num_copies = btrfs_num_copies(
1894 &BTRFS_I(inode)->root->fs_info->mapping_tree,
1895 failrec->logical, failrec->len);
1896 failrec->last_mirror++;
1897 if (!state) {
1898 spin_lock(&BTRFS_I(inode)->io_tree.lock);
1899 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
1900 failrec->start,
1901 EXTENT_LOCKED);
1902 if (state && state->start != failrec->start)
1903 state = NULL;
1904 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
1905 }
1906 if (!state || failrec->last_mirror > num_copies) {
1907 set_state_private(failure_tree, failrec->start, 0);
1908 clear_extent_bits(failure_tree, failrec->start,
1909 failrec->start + failrec->len - 1,
1910 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
1911 kfree(failrec);
1912 return -EIO;
1913 }
1914 bio = bio_alloc(GFP_NOFS, 1);
1915 bio->bi_private = state;
1916 bio->bi_end_io = failed_bio->bi_end_io;
1917 bio->bi_sector = failrec->logical >> 9;
1918 bio->bi_bdev = failed_bio->bi_bdev;
1919 bio->bi_size = 0;
1920
1921 bio_add_page(bio, page, failrec->len, start - page_offset(page));
1922 if (failed_bio->bi_rw & REQ_WRITE)
1923 rw = WRITE;
1924 else
1925 rw = READ;
1926
1927 ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1928 failrec->last_mirror,
1929 failrec->bio_flags, 0);
1930 return ret;
1931}
1932
1933/*
1934 * each time an IO finishes, we do a fast check in the IO failure tree
1935 * to see if we need to process or clean up an io_failure_record
1936 */
1937static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1938{
1939 u64 private;
1940 u64 private_failure;
1941 struct io_failure_record *failure;
1942 int ret;
1943
1944 private = 0;
1945 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1946 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1947 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1948 start, &private_failure);
1949 if (ret == 0) {
1950 failure = (struct io_failure_record *)(unsigned long)
1951 private_failure;
1952 set_state_private(&BTRFS_I(inode)->io_failure_tree,
1953 failure->start, 0);
1954 clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
1955 failure->start,
1956 failure->start + failure->len - 1,
1957 EXTENT_DIRTY | EXTENT_LOCKED,
1958 GFP_NOFS);
1959 kfree(failure);
1960 }
1961 }
1962 return 0;
1963}
1964
1965/*
1966 * when reads are done, we need to check csums to verify the data is correct 1826 * when reads are done, we need to check csums to verify the data is correct
1967 * if there's a match, we allow the bio to finish. If not, we go through 1827 * if there's a match, we allow the bio to finish. If not, the code in
1968 * the io_failure_record routines to find good copies 1828 * extent_io.c will try to find good copies for us.
1969 */ 1829 */
1970static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 1830static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1971 struct extent_state *state) 1831 struct extent_state *state)
@@ -2011,10 +1871,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2011 1871
2012 kunmap_atomic(kaddr, KM_USER0); 1872 kunmap_atomic(kaddr, KM_USER0);
2013good: 1873good:
2014 /* if the io failure tree for this inode is non-empty,
2015 * check to see if we've recovered from a failed IO
2016 */
2017 btrfs_clean_io_failures(inode, start);
2018 return 0; 1874 return 0;
2019 1875
2020zeroit: 1876zeroit:
@@ -2079,89 +1935,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2079 up_read(&root->fs_info->cleanup_work_sem); 1935 up_read(&root->fs_info->cleanup_work_sem);
2080} 1936}
2081 1937
2082/*
2083 * calculate extra metadata reservation when snapshotting a subvolume
2084 * contains orphan files.
2085 */
2086void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
2087 struct btrfs_pending_snapshot *pending,
2088 u64 *bytes_to_reserve)
2089{
2090 struct btrfs_root *root;
2091 struct btrfs_block_rsv *block_rsv;
2092 u64 num_bytes;
2093 int index;
2094
2095 root = pending->root;
2096 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2097 return;
2098
2099 block_rsv = root->orphan_block_rsv;
2100
2101 /* orphan block reservation for the snapshot */
2102 num_bytes = block_rsv->size;
2103
2104 /*
2105 * after the snapshot is created, COWing tree blocks may use more
2106 * space than it frees. So we should make sure there is enough
2107 * reserved space.
2108 */
2109 index = trans->transid & 0x1;
2110 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2111 num_bytes += block_rsv->size -
2112 (block_rsv->reserved + block_rsv->freed[index]);
2113 }
2114
2115 *bytes_to_reserve += num_bytes;
2116}
2117
2118void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
2119 struct btrfs_pending_snapshot *pending)
2120{
2121 struct btrfs_root *root = pending->root;
2122 struct btrfs_root *snap = pending->snap;
2123 struct btrfs_block_rsv *block_rsv;
2124 u64 num_bytes;
2125 int index;
2126 int ret;
2127
2128 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2129 return;
2130
2131 /* refill source subvolume's orphan block reservation */
2132 block_rsv = root->orphan_block_rsv;
2133 index = trans->transid & 0x1;
2134 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2135 num_bytes = block_rsv->size -
2136 (block_rsv->reserved + block_rsv->freed[index]);
2137 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2138 root->orphan_block_rsv,
2139 num_bytes);
2140 BUG_ON(ret);
2141 }
2142
2143 /* setup orphan block reservation for the snapshot */
2144 block_rsv = btrfs_alloc_block_rsv(snap);
2145 BUG_ON(!block_rsv);
2146
2147 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2148 snap->orphan_block_rsv = block_rsv;
2149
2150 num_bytes = root->orphan_block_rsv->size;
2151 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2152 block_rsv, num_bytes);
2153 BUG_ON(ret);
2154
2155#if 0
2156 /* insert orphan item for the snapshot */
2157 WARN_ON(!root->orphan_item_inserted);
2158 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2159 snap->root_key.objectid);
2160 BUG_ON(ret);
2161 snap->orphan_item_inserted = 1;
2162#endif
2163}
2164
2165enum btrfs_orphan_cleanup_state { 1938enum btrfs_orphan_cleanup_state {
2166 ORPHAN_CLEANUP_STARTED = 1, 1939 ORPHAN_CLEANUP_STARTED = 1,
2167 ORPHAN_CLEANUP_DONE = 2, 1940 ORPHAN_CLEANUP_DONE = 2,
@@ -2247,9 +2020,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2247 } 2020 }
2248 spin_unlock(&root->orphan_lock); 2021 spin_unlock(&root->orphan_lock);
2249 2022
2250 if (block_rsv)
2251 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2252
2253 /* grab metadata reservation from transaction handle */ 2023 /* grab metadata reservation from transaction handle */
2254 if (reserve) { 2024 if (reserve) {
2255 ret = btrfs_orphan_reserve_metadata(trans, inode); 2025 ret = btrfs_orphan_reserve_metadata(trans, inode);
@@ -2316,6 +2086,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2316 struct btrfs_key key, found_key; 2086 struct btrfs_key key, found_key;
2317 struct btrfs_trans_handle *trans; 2087 struct btrfs_trans_handle *trans;
2318 struct inode *inode; 2088 struct inode *inode;
2089 u64 last_objectid = 0;
2319 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2090 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2320 2091
2321 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) 2092 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
@@ -2367,41 +2138,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2367 * crossing root thing. we store the inode number in the 2138 * crossing root thing. we store the inode number in the
2368 * offset of the orphan item. 2139 * offset of the orphan item.
2369 */ 2140 */
2141
2142 if (found_key.offset == last_objectid) {
2143 printk(KERN_ERR "btrfs: Error removing orphan entry, "
2144 "stopping orphan cleanup\n");
2145 ret = -EINVAL;
2146 goto out;
2147 }
2148
2149 last_objectid = found_key.offset;
2150
2370 found_key.objectid = found_key.offset; 2151 found_key.objectid = found_key.offset;
2371 found_key.type = BTRFS_INODE_ITEM_KEY; 2152 found_key.type = BTRFS_INODE_ITEM_KEY;
2372 found_key.offset = 0; 2153 found_key.offset = 0;
2373 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); 2154 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2374 if (IS_ERR(inode)) { 2155 ret = PTR_RET(inode);
2375 ret = PTR_ERR(inode); 2156 if (ret && ret != -ESTALE)
2376 goto out; 2157 goto out;
2377 }
2378 2158
2379 /* 2159 /*
2380 * add this inode to the orphan list so btrfs_orphan_del does 2160 * Inode is already gone but the orphan item is still there,
2381 * the proper thing when we hit it 2161 * kill the orphan item.
2382 */ 2162 */
2383 spin_lock(&root->orphan_lock); 2163 if (ret == -ESTALE) {
2384 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2164 trans = btrfs_start_transaction(root, 1);
2385 spin_unlock(&root->orphan_lock);
2386
2387 /*
2388 * if this is a bad inode, means we actually succeeded in
2389 * removing the inode, but not the orphan record, which means
2390 * we need to manually delete the orphan since iput will just
2391 * do a destroy_inode
2392 */
2393 if (is_bad_inode(inode)) {
2394 trans = btrfs_start_transaction(root, 0);
2395 if (IS_ERR(trans)) { 2165 if (IS_ERR(trans)) {
2396 ret = PTR_ERR(trans); 2166 ret = PTR_ERR(trans);
2397 goto out; 2167 goto out;
2398 } 2168 }
2399 btrfs_orphan_del(trans, inode); 2169 ret = btrfs_del_orphan_item(trans, root,
2170 found_key.objectid);
2171 BUG_ON(ret);
2400 btrfs_end_transaction(trans, root); 2172 btrfs_end_transaction(trans, root);
2401 iput(inode);
2402 continue; 2173 continue;
2403 } 2174 }
2404 2175
2176 /*
2177 * add this inode to the orphan list so btrfs_orphan_del does
2178 * the proper thing when we hit it
2179 */
2180 spin_lock(&root->orphan_lock);
2181 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2182 spin_unlock(&root->orphan_lock);
2183
2405 /* if we have links, this was a truncate, lets do that */ 2184 /* if we have links, this was a truncate, lets do that */
2406 if (inode->i_nlink) { 2185 if (inode->i_nlink) {
2407 if (!S_ISREG(inode->i_mode)) { 2186 if (!S_ISREG(inode->i_mode)) {
@@ -2835,7 +2614,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2835 u64 ino = btrfs_ino(inode); 2614 u64 ino = btrfs_ino(inode);
2836 u64 dir_ino = btrfs_ino(dir); 2615 u64 dir_ino = btrfs_ino(dir);
2837 2616
2838 trans = btrfs_start_transaction(root, 10); 2617 /*
2618 * 1 for the possible orphan item
2619 * 1 for the dir item
2620 * 1 for the dir index
2621 * 1 for the inode ref
2622 * 1 for the inode ref in the tree log
2623 * 2 for the dir entries in the log
2624 * 1 for the inode
2625 */
2626 trans = btrfs_start_transaction(root, 8);
2839 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 2627 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2840 return trans; 2628 return trans;
2841 2629
@@ -2858,7 +2646,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2858 return ERR_PTR(-ENOMEM); 2646 return ERR_PTR(-ENOMEM);
2859 } 2647 }
2860 2648
2861 trans = btrfs_start_transaction(root, 0); 2649 /* 1 for the orphan item */
2650 trans = btrfs_start_transaction(root, 1);
2862 if (IS_ERR(trans)) { 2651 if (IS_ERR(trans)) {
2863 btrfs_free_path(path); 2652 btrfs_free_path(path);
2864 root->fs_info->enospc_unlink = 0; 2653 root->fs_info->enospc_unlink = 0;
@@ -2963,6 +2752,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2963 err = 0; 2752 err = 0;
2964out: 2753out:
2965 btrfs_free_path(path); 2754 btrfs_free_path(path);
2755 /* Migrate the orphan reservation over */
2756 if (!err)
2757 err = btrfs_block_rsv_migrate(trans->block_rsv,
2758 &root->fs_info->global_block_rsv,
2759 trans->bytes_reserved);
2760
2966 if (err) { 2761 if (err) {
2967 btrfs_end_transaction(trans, root); 2762 btrfs_end_transaction(trans, root);
2968 root->fs_info->enospc_unlink = 0; 2763 root->fs_info->enospc_unlink = 0;
@@ -2977,6 +2772,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans,
2977 struct btrfs_root *root) 2772 struct btrfs_root *root)
2978{ 2773{
2979 if (trans->block_rsv == &root->fs_info->global_block_rsv) { 2774 if (trans->block_rsv == &root->fs_info->global_block_rsv) {
2775 btrfs_block_rsv_release(root, trans->block_rsv,
2776 trans->bytes_reserved);
2777 trans->block_rsv = &root->fs_info->trans_block_rsv;
2980 BUG_ON(!root->fs_info->enospc_unlink); 2778 BUG_ON(!root->fs_info->enospc_unlink);
2981 root->fs_info->enospc_unlink = 0; 2779 root->fs_info->enospc_unlink = 0;
2982 } 2780 }
@@ -3368,6 +3166,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3368 pgoff_t index = from >> PAGE_CACHE_SHIFT; 3166 pgoff_t index = from >> PAGE_CACHE_SHIFT;
3369 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3167 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3370 struct page *page; 3168 struct page *page;
3169 gfp_t mask = btrfs_alloc_write_mask(mapping);
3371 int ret = 0; 3170 int ret = 0;
3372 u64 page_start; 3171 u64 page_start;
3373 u64 page_end; 3172 u64 page_end;
@@ -3380,7 +3179,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3380 3179
3381 ret = -ENOMEM; 3180 ret = -ENOMEM;
3382again: 3181again:
3383 page = find_or_create_page(mapping, index, GFP_NOFS); 3182 page = find_or_create_page(mapping, index, mask);
3384 if (!page) { 3183 if (!page) {
3385 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 3184 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3386 goto out; 3185 goto out;
@@ -3613,6 +3412,8 @@ void btrfs_evict_inode(struct inode *inode)
3613{ 3412{
3614 struct btrfs_trans_handle *trans; 3413 struct btrfs_trans_handle *trans;
3615 struct btrfs_root *root = BTRFS_I(inode)->root; 3414 struct btrfs_root *root = BTRFS_I(inode)->root;
3415 struct btrfs_block_rsv *rsv, *global_rsv;
3416 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
3616 unsigned long nr; 3417 unsigned long nr;
3617 int ret; 3418 int ret;
3618 3419
@@ -3640,22 +3441,55 @@ void btrfs_evict_inode(struct inode *inode)
3640 goto no_delete; 3441 goto no_delete;
3641 } 3442 }
3642 3443
3444 rsv = btrfs_alloc_block_rsv(root);
3445 if (!rsv) {
3446 btrfs_orphan_del(NULL, inode);
3447 goto no_delete;
3448 }
3449 rsv->size = min_size;
3450 global_rsv = &root->fs_info->global_block_rsv;
3451
3643 btrfs_i_size_write(inode, 0); 3452 btrfs_i_size_write(inode, 0);
3644 3453
3454 /*
3455 * This is a bit simpler than btrfs_truncate since
3456 *
3457 * 1) We've already reserved our space for our orphan item in the
3458 * unlink.
3459 * 2) We're going to delete the inode item, so we don't need to update
3460 * it at all.
3461 *
3462 * So we just need to reserve some slack space in case we add bytes when
3463 * doing the truncate.
3464 */
3645 while (1) { 3465 while (1) {
3646 trans = btrfs_join_transaction(root); 3466 ret = btrfs_block_rsv_refill(root, rsv, min_size);
3647 BUG_ON(IS_ERR(trans)); 3467
3648 trans->block_rsv = root->orphan_block_rsv; 3468 /*
3469 * Try and steal from the global reserve since we will
3470 * likely not use this space anyway, we want to try as
3471 * hard as possible to get this to work.
3472 */
3473 if (ret)
3474 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
3649 3475
3650 ret = btrfs_block_rsv_check(trans, root,
3651 root->orphan_block_rsv, 0, 5);
3652 if (ret) { 3476 if (ret) {
3653 BUG_ON(ret != -EAGAIN); 3477 printk(KERN_WARNING "Could not get space for a "
3654 ret = btrfs_commit_transaction(trans, root); 3478 "delete, will truncate on mount %d\n", ret);
3655 BUG_ON(ret); 3479 btrfs_orphan_del(NULL, inode);
3656 continue; 3480 btrfs_free_block_rsv(root, rsv);
3481 goto no_delete;
3657 } 3482 }
3658 3483
3484 trans = btrfs_start_transaction(root, 0);
3485 if (IS_ERR(trans)) {
3486 btrfs_orphan_del(NULL, inode);
3487 btrfs_free_block_rsv(root, rsv);
3488 goto no_delete;
3489 }
3490
3491 trans->block_rsv = rsv;
3492
3659 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); 3493 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3660 if (ret != -EAGAIN) 3494 if (ret != -EAGAIN)
3661 break; 3495 break;
@@ -3664,14 +3498,17 @@ void btrfs_evict_inode(struct inode *inode)
3664 btrfs_end_transaction(trans, root); 3498 btrfs_end_transaction(trans, root);
3665 trans = NULL; 3499 trans = NULL;
3666 btrfs_btree_balance_dirty(root, nr); 3500 btrfs_btree_balance_dirty(root, nr);
3667
3668 } 3501 }
3669 3502
3503 btrfs_free_block_rsv(root, rsv);
3504
3670 if (ret == 0) { 3505 if (ret == 0) {
3506 trans->block_rsv = root->orphan_block_rsv;
3671 ret = btrfs_orphan_del(trans, inode); 3507 ret = btrfs_orphan_del(trans, inode);
3672 BUG_ON(ret); 3508 BUG_ON(ret);
3673 } 3509 }
3674 3510
3511 trans->block_rsv = &root->fs_info->trans_block_rsv;
3675 if (!(root == root->fs_info->tree_root || 3512 if (!(root == root->fs_info->tree_root ||
3676 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) 3513 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
3677 btrfs_return_ino(root, btrfs_ino(inode)); 3514 btrfs_return_ino(root, btrfs_ino(inode));
@@ -5795,8 +5632,7 @@ again:
5795 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { 5632 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5796 ret = btrfs_ordered_update_i_size(inode, 0, ordered); 5633 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5797 if (!ret) 5634 if (!ret)
5798 ret = btrfs_update_inode(trans, root, inode); 5635 err = btrfs_update_inode(trans, root, inode);
5799 err = ret;
5800 goto out; 5636 goto out;
5801 } 5637 }
5802 5638
@@ -6289,7 +6125,7 @@ int btrfs_readpage(struct file *file, struct page *page)
6289{ 6125{
6290 struct extent_io_tree *tree; 6126 struct extent_io_tree *tree;
6291 tree = &BTRFS_I(page->mapping->host)->io_tree; 6127 tree = &BTRFS_I(page->mapping->host)->io_tree;
6292 return extent_read_full_page(tree, page, btrfs_get_extent); 6128 return extent_read_full_page(tree, page, btrfs_get_extent, 0);
6293} 6129}
6294 6130
6295static int btrfs_writepage(struct page *page, struct writeback_control *wbc) 6131static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -6541,6 +6377,7 @@ static int btrfs_truncate(struct inode *inode)
6541 struct btrfs_trans_handle *trans; 6377 struct btrfs_trans_handle *trans;
6542 unsigned long nr; 6378 unsigned long nr;
6543 u64 mask = root->sectorsize - 1; 6379 u64 mask = root->sectorsize - 1;
6380 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
6544 6381
6545 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); 6382 ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
6546 if (ret) 6383 if (ret)
@@ -6588,19 +6425,23 @@ static int btrfs_truncate(struct inode *inode)
6588 rsv = btrfs_alloc_block_rsv(root); 6425 rsv = btrfs_alloc_block_rsv(root);
6589 if (!rsv) 6426 if (!rsv)
6590 return -ENOMEM; 6427 return -ENOMEM;
6591 btrfs_add_durable_block_rsv(root->fs_info, rsv); 6428 rsv->size = min_size;
6592 6429
6430 /*
6431 * 1 for the truncate slack space
6432 * 1 for the orphan item we're going to add
6433 * 1 for the orphan item deletion
6434 * 1 for updating the inode.
6435 */
6593 trans = btrfs_start_transaction(root, 4); 6436 trans = btrfs_start_transaction(root, 4);
6594 if (IS_ERR(trans)) { 6437 if (IS_ERR(trans)) {
6595 err = PTR_ERR(trans); 6438 err = PTR_ERR(trans);
6596 goto out; 6439 goto out;
6597 } 6440 }
6598 6441
6599 /* 6442 /* Migrate the slack space for the truncate to our reserve */
6600 * Reserve space for the truncate process. Truncate should be adding 6443 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
6601 * space, but if there are snapshots it may end up using space. 6444 min_size);
6602 */
6603 ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
6604 BUG_ON(ret); 6445 BUG_ON(ret);
6605 6446
6606 ret = btrfs_orphan_add(trans, inode); 6447 ret = btrfs_orphan_add(trans, inode);
@@ -6609,21 +6450,6 @@ static int btrfs_truncate(struct inode *inode)
6609 goto out; 6450 goto out;
6610 } 6451 }
6611 6452
6612 nr = trans->blocks_used;
6613 btrfs_end_transaction(trans, root);
6614 btrfs_btree_balance_dirty(root, nr);
6615
6616 /*
6617 * Ok so we've already migrated our bytes over for the truncate, so here
6618 * just reserve the one slot we need for updating the inode.
6619 */
6620 trans = btrfs_start_transaction(root, 1);
6621 if (IS_ERR(trans)) {
6622 err = PTR_ERR(trans);
6623 goto out;
6624 }
6625 trans->block_rsv = rsv;
6626
6627 /* 6453 /*
6628 * setattr is responsible for setting the ordered_data_close flag, 6454 * setattr is responsible for setting the ordered_data_close flag,
6629 * but that is only tested during the last file release. That 6455 * but that is only tested during the last file release. That
@@ -6645,20 +6471,30 @@ static int btrfs_truncate(struct inode *inode)
6645 btrfs_add_ordered_operation(trans, root, inode); 6471 btrfs_add_ordered_operation(trans, root, inode);
6646 6472
6647 while (1) { 6473 while (1) {
6474 ret = btrfs_block_rsv_refill(root, rsv, min_size);
6475 if (ret) {
6476 /*
6477 * This can only happen with the original transaction we
6478 * started above, every other time we shouldn't have a
6479 * transaction started yet.
6480 */
6481 if (ret == -EAGAIN)
6482 goto end_trans;
6483 err = ret;
6484 break;
6485 }
6486
6648 if (!trans) { 6487 if (!trans) {
6649 trans = btrfs_start_transaction(root, 3); 6488 /* Just need the 1 for updating the inode */
6489 trans = btrfs_start_transaction(root, 1);
6650 if (IS_ERR(trans)) { 6490 if (IS_ERR(trans)) {
6651 err = PTR_ERR(trans); 6491 err = PTR_ERR(trans);
6652 goto out; 6492 goto out;
6653 } 6493 }
6654
6655 ret = btrfs_truncate_reserve_metadata(trans, root,
6656 rsv);
6657 BUG_ON(ret);
6658
6659 trans->block_rsv = rsv;
6660 } 6494 }
6661 6495
6496 trans->block_rsv = rsv;
6497
6662 ret = btrfs_truncate_inode_items(trans, root, inode, 6498 ret = btrfs_truncate_inode_items(trans, root, inode,
6663 inode->i_size, 6499 inode->i_size,
6664 BTRFS_EXTENT_DATA_KEY); 6500 BTRFS_EXTENT_DATA_KEY);
@@ -6673,7 +6509,7 @@ static int btrfs_truncate(struct inode *inode)
6673 err = ret; 6509 err = ret;
6674 break; 6510 break;
6675 } 6511 }
6676 6512end_trans:
6677 nr = trans->blocks_used; 6513 nr = trans->blocks_used;
6678 btrfs_end_transaction(trans, root); 6514 btrfs_end_transaction(trans, root);
6679 trans = NULL; 6515 trans = NULL;
@@ -6755,9 +6591,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6755 ei->last_sub_trans = 0; 6591 ei->last_sub_trans = 0;
6756 ei->logged_trans = 0; 6592 ei->logged_trans = 0;
6757 ei->delalloc_bytes = 0; 6593 ei->delalloc_bytes = 0;
6758 ei->reserved_bytes = 0;
6759 ei->disk_i_size = 0; 6594 ei->disk_i_size = 0;
6760 ei->flags = 0; 6595 ei->flags = 0;
6596 ei->csum_bytes = 0;
6761 ei->index_cnt = (u64)-1; 6597 ei->index_cnt = (u64)-1;
6762 ei->last_unlink_trans = 0; 6598 ei->last_unlink_trans = 0;
6763 6599
@@ -6803,6 +6639,8 @@ void btrfs_destroy_inode(struct inode *inode)
6803 WARN_ON(inode->i_data.nrpages); 6639 WARN_ON(inode->i_data.nrpages);
6804 WARN_ON(BTRFS_I(inode)->outstanding_extents); 6640 WARN_ON(BTRFS_I(inode)->outstanding_extents);
6805 WARN_ON(BTRFS_I(inode)->reserved_extents); 6641 WARN_ON(BTRFS_I(inode)->reserved_extents);
6642 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
6643 WARN_ON(BTRFS_I(inode)->csum_bytes);
6806 6644
6807 /* 6645 /*
6808 * This can happen where we create an inode, but somebody else also 6646 * This can happen where we create an inode, but somebody else also
@@ -7420,7 +7258,6 @@ static struct extent_io_ops btrfs_extent_io_ops = {
7420 .readpage_end_io_hook = btrfs_readpage_end_io_hook, 7258 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
7421 .writepage_end_io_hook = btrfs_writepage_end_io_hook, 7259 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
7422 .writepage_start_hook = btrfs_writepage_start_hook, 7260 .writepage_start_hook = btrfs_writepage_start_hook,
7423 .readpage_io_failed_hook = btrfs_io_failed_hook,
7424 .set_bit_hook = btrfs_set_bit_hook, 7261 .set_bit_hook = btrfs_set_bit_hook,
7425 .clear_bit_hook = btrfs_clear_bit_hook, 7262 .clear_bit_hook = btrfs_clear_bit_hook,
7426 .merge_extent_hook = btrfs_merge_extent_hook, 7263 .merge_extent_hook = btrfs_merge_extent_hook,