diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 457 |
1 files changed, 147 insertions, 310 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 75686a61bd45..966ddcc4c63d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -45,10 +45,10 @@ | |||
45 | #include "btrfs_inode.h" | 45 | #include "btrfs_inode.h" |
46 | #include "ioctl.h" | 46 | #include "ioctl.h" |
47 | #include "print-tree.h" | 47 | #include "print-tree.h" |
48 | #include "volumes.h" | ||
49 | #include "ordered-data.h" | 48 | #include "ordered-data.h" |
50 | #include "xattr.h" | 49 | #include "xattr.h" |
51 | #include "tree-log.h" | 50 | #include "tree-log.h" |
51 | #include "volumes.h" | ||
52 | #include "compression.h" | 52 | #include "compression.h" |
53 | #include "locking.h" | 53 | #include "locking.h" |
54 | #include "free-space-cache.h" | 54 | #include "free-space-cache.h" |
@@ -393,7 +393,10 @@ again: | |||
393 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | 393 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { |
394 | WARN_ON(pages); | 394 | WARN_ON(pages); |
395 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 395 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
396 | BUG_ON(!pages); | 396 | if (!pages) { |
397 | /* just bail out to the uncompressed code */ | ||
398 | goto cont; | ||
399 | } | ||
397 | 400 | ||
398 | if (BTRFS_I(inode)->force_compress) | 401 | if (BTRFS_I(inode)->force_compress) |
399 | compress_type = BTRFS_I(inode)->force_compress; | 402 | compress_type = BTRFS_I(inode)->force_compress; |
@@ -424,6 +427,7 @@ again: | |||
424 | will_compress = 1; | 427 | will_compress = 1; |
425 | } | 428 | } |
426 | } | 429 | } |
430 | cont: | ||
427 | if (start == 0) { | 431 | if (start == 0) { |
428 | trans = btrfs_join_transaction(root); | 432 | trans = btrfs_join_transaction(root); |
429 | BUG_ON(IS_ERR(trans)); | 433 | BUG_ON(IS_ERR(trans)); |
@@ -820,7 +824,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
820 | } | 824 | } |
821 | 825 | ||
822 | BUG_ON(disk_num_bytes > | 826 | BUG_ON(disk_num_bytes > |
823 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 827 | btrfs_super_total_bytes(root->fs_info->super_copy)); |
824 | 828 | ||
825 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | 829 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); |
826 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 830 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
@@ -1792,12 +1796,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1792 | } | 1796 | } |
1793 | ret = 0; | 1797 | ret = 0; |
1794 | out: | 1798 | out: |
1795 | if (nolock) { | 1799 | if (root != root->fs_info->tree_root) |
1796 | if (trans) | ||
1797 | btrfs_end_transaction_nolock(trans, root); | ||
1798 | } else { | ||
1799 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1800 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
1800 | if (trans) | 1801 | if (trans) { |
1802 | if (nolock) | ||
1803 | btrfs_end_transaction_nolock(trans, root); | ||
1804 | else | ||
1801 | btrfs_end_transaction(trans, root); | 1805 | btrfs_end_transaction(trans, root); |
1802 | } | 1806 | } |
1803 | 1807 | ||
@@ -1819,153 +1823,9 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
1819 | } | 1823 | } |
1820 | 1824 | ||
1821 | /* | 1825 | /* |
1822 | * When IO fails, either with EIO or csum verification fails, we | ||
1823 | * try other mirrors that might have a good copy of the data. This | ||
1824 | * io_failure_record is used to record state as we go through all the | ||
1825 | * mirrors. If another mirror has good data, the page is set up to date | ||
1826 | * and things continue. If a good mirror can't be found, the original | ||
1827 | * bio end_io callback is called to indicate things have failed. | ||
1828 | */ | ||
1829 | struct io_failure_record { | ||
1830 | struct page *page; | ||
1831 | u64 start; | ||
1832 | u64 len; | ||
1833 | u64 logical; | ||
1834 | unsigned long bio_flags; | ||
1835 | int last_mirror; | ||
1836 | }; | ||
1837 | |||
1838 | static int btrfs_io_failed_hook(struct bio *failed_bio, | ||
1839 | struct page *page, u64 start, u64 end, | ||
1840 | struct extent_state *state) | ||
1841 | { | ||
1842 | struct io_failure_record *failrec = NULL; | ||
1843 | u64 private; | ||
1844 | struct extent_map *em; | ||
1845 | struct inode *inode = page->mapping->host; | ||
1846 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
1847 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
1848 | struct bio *bio; | ||
1849 | int num_copies; | ||
1850 | int ret; | ||
1851 | int rw; | ||
1852 | u64 logical; | ||
1853 | |||
1854 | ret = get_state_private(failure_tree, start, &private); | ||
1855 | if (ret) { | ||
1856 | failrec = kmalloc(sizeof(*failrec), GFP_NOFS); | ||
1857 | if (!failrec) | ||
1858 | return -ENOMEM; | ||
1859 | failrec->start = start; | ||
1860 | failrec->len = end - start + 1; | ||
1861 | failrec->last_mirror = 0; | ||
1862 | failrec->bio_flags = 0; | ||
1863 | |||
1864 | read_lock(&em_tree->lock); | ||
1865 | em = lookup_extent_mapping(em_tree, start, failrec->len); | ||
1866 | if (em->start > start || em->start + em->len < start) { | ||
1867 | free_extent_map(em); | ||
1868 | em = NULL; | ||
1869 | } | ||
1870 | read_unlock(&em_tree->lock); | ||
1871 | |||
1872 | if (IS_ERR_OR_NULL(em)) { | ||
1873 | kfree(failrec); | ||
1874 | return -EIO; | ||
1875 | } | ||
1876 | logical = start - em->start; | ||
1877 | logical = em->block_start + logical; | ||
1878 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | ||
1879 | logical = em->block_start; | ||
1880 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | ||
1881 | extent_set_compress_type(&failrec->bio_flags, | ||
1882 | em->compress_type); | ||
1883 | } | ||
1884 | failrec->logical = logical; | ||
1885 | free_extent_map(em); | ||
1886 | set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | | ||
1887 | EXTENT_DIRTY, GFP_NOFS); | ||
1888 | set_state_private(failure_tree, start, | ||
1889 | (u64)(unsigned long)failrec); | ||
1890 | } else { | ||
1891 | failrec = (struct io_failure_record *)(unsigned long)private; | ||
1892 | } | ||
1893 | num_copies = btrfs_num_copies( | ||
1894 | &BTRFS_I(inode)->root->fs_info->mapping_tree, | ||
1895 | failrec->logical, failrec->len); | ||
1896 | failrec->last_mirror++; | ||
1897 | if (!state) { | ||
1898 | spin_lock(&BTRFS_I(inode)->io_tree.lock); | ||
1899 | state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, | ||
1900 | failrec->start, | ||
1901 | EXTENT_LOCKED); | ||
1902 | if (state && state->start != failrec->start) | ||
1903 | state = NULL; | ||
1904 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); | ||
1905 | } | ||
1906 | if (!state || failrec->last_mirror > num_copies) { | ||
1907 | set_state_private(failure_tree, failrec->start, 0); | ||
1908 | clear_extent_bits(failure_tree, failrec->start, | ||
1909 | failrec->start + failrec->len - 1, | ||
1910 | EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); | ||
1911 | kfree(failrec); | ||
1912 | return -EIO; | ||
1913 | } | ||
1914 | bio = bio_alloc(GFP_NOFS, 1); | ||
1915 | bio->bi_private = state; | ||
1916 | bio->bi_end_io = failed_bio->bi_end_io; | ||
1917 | bio->bi_sector = failrec->logical >> 9; | ||
1918 | bio->bi_bdev = failed_bio->bi_bdev; | ||
1919 | bio->bi_size = 0; | ||
1920 | |||
1921 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | ||
1922 | if (failed_bio->bi_rw & REQ_WRITE) | ||
1923 | rw = WRITE; | ||
1924 | else | ||
1925 | rw = READ; | ||
1926 | |||
1927 | ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | ||
1928 | failrec->last_mirror, | ||
1929 | failrec->bio_flags, 0); | ||
1930 | return ret; | ||
1931 | } | ||
1932 | |||
1933 | /* | ||
1934 | * each time an IO finishes, we do a fast check in the IO failure tree | ||
1935 | * to see if we need to process or clean up an io_failure_record | ||
1936 | */ | ||
1937 | static int btrfs_clean_io_failures(struct inode *inode, u64 start) | ||
1938 | { | ||
1939 | u64 private; | ||
1940 | u64 private_failure; | ||
1941 | struct io_failure_record *failure; | ||
1942 | int ret; | ||
1943 | |||
1944 | private = 0; | ||
1945 | if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | ||
1946 | (u64)-1, 1, EXTENT_DIRTY, 0)) { | ||
1947 | ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
1948 | start, &private_failure); | ||
1949 | if (ret == 0) { | ||
1950 | failure = (struct io_failure_record *)(unsigned long) | ||
1951 | private_failure; | ||
1952 | set_state_private(&BTRFS_I(inode)->io_failure_tree, | ||
1953 | failure->start, 0); | ||
1954 | clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, | ||
1955 | failure->start, | ||
1956 | failure->start + failure->len - 1, | ||
1957 | EXTENT_DIRTY | EXTENT_LOCKED, | ||
1958 | GFP_NOFS); | ||
1959 | kfree(failure); | ||
1960 | } | ||
1961 | } | ||
1962 | return 0; | ||
1963 | } | ||
1964 | |||
1965 | /* | ||
1966 | * when reads are done, we need to check csums to verify the data is correct | 1826 | * when reads are done, we need to check csums to verify the data is correct |
1967 | * if there's a match, we allow the bio to finish. If not, we go through | 1827 | * if there's a match, we allow the bio to finish. If not, the code in |
1968 | * the io_failure_record routines to find good copies | 1828 | * extent_io.c will try to find good copies for us. |
1969 | */ | 1829 | */ |
1970 | static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 1830 | static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
1971 | struct extent_state *state) | 1831 | struct extent_state *state) |
@@ -2011,10 +1871,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2011 | 1871 | ||
2012 | kunmap_atomic(kaddr, KM_USER0); | 1872 | kunmap_atomic(kaddr, KM_USER0); |
2013 | good: | 1873 | good: |
2014 | /* if the io failure tree for this inode is non-empty, | ||
2015 | * check to see if we've recovered from a failed IO | ||
2016 | */ | ||
2017 | btrfs_clean_io_failures(inode, start); | ||
2018 | return 0; | 1874 | return 0; |
2019 | 1875 | ||
2020 | zeroit: | 1876 | zeroit: |
@@ -2079,89 +1935,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
2079 | up_read(&root->fs_info->cleanup_work_sem); | 1935 | up_read(&root->fs_info->cleanup_work_sem); |
2080 | } | 1936 | } |
2081 | 1937 | ||
2082 | /* | ||
2083 | * calculate extra metadata reservation when snapshotting a subvolume | ||
2084 | * contains orphan files. | ||
2085 | */ | ||
2086 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2087 | struct btrfs_pending_snapshot *pending, | ||
2088 | u64 *bytes_to_reserve) | ||
2089 | { | ||
2090 | struct btrfs_root *root; | ||
2091 | struct btrfs_block_rsv *block_rsv; | ||
2092 | u64 num_bytes; | ||
2093 | int index; | ||
2094 | |||
2095 | root = pending->root; | ||
2096 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2097 | return; | ||
2098 | |||
2099 | block_rsv = root->orphan_block_rsv; | ||
2100 | |||
2101 | /* orphan block reservation for the snapshot */ | ||
2102 | num_bytes = block_rsv->size; | ||
2103 | |||
2104 | /* | ||
2105 | * after the snapshot is created, COWing tree blocks may use more | ||
2106 | * space than it frees. So we should make sure there is enough | ||
2107 | * reserved space. | ||
2108 | */ | ||
2109 | index = trans->transid & 0x1; | ||
2110 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2111 | num_bytes += block_rsv->size - | ||
2112 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2113 | } | ||
2114 | |||
2115 | *bytes_to_reserve += num_bytes; | ||
2116 | } | ||
2117 | |||
2118 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2119 | struct btrfs_pending_snapshot *pending) | ||
2120 | { | ||
2121 | struct btrfs_root *root = pending->root; | ||
2122 | struct btrfs_root *snap = pending->snap; | ||
2123 | struct btrfs_block_rsv *block_rsv; | ||
2124 | u64 num_bytes; | ||
2125 | int index; | ||
2126 | int ret; | ||
2127 | |||
2128 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2129 | return; | ||
2130 | |||
2131 | /* refill source subvolume's orphan block reservation */ | ||
2132 | block_rsv = root->orphan_block_rsv; | ||
2133 | index = trans->transid & 0x1; | ||
2134 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2135 | num_bytes = block_rsv->size - | ||
2136 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2137 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2138 | root->orphan_block_rsv, | ||
2139 | num_bytes); | ||
2140 | BUG_ON(ret); | ||
2141 | } | ||
2142 | |||
2143 | /* setup orphan block reservation for the snapshot */ | ||
2144 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
2145 | BUG_ON(!block_rsv); | ||
2146 | |||
2147 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2148 | snap->orphan_block_rsv = block_rsv; | ||
2149 | |||
2150 | num_bytes = root->orphan_block_rsv->size; | ||
2151 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2152 | block_rsv, num_bytes); | ||
2153 | BUG_ON(ret); | ||
2154 | |||
2155 | #if 0 | ||
2156 | /* insert orphan item for the snapshot */ | ||
2157 | WARN_ON(!root->orphan_item_inserted); | ||
2158 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2159 | snap->root_key.objectid); | ||
2160 | BUG_ON(ret); | ||
2161 | snap->orphan_item_inserted = 1; | ||
2162 | #endif | ||
2163 | } | ||
2164 | |||
2165 | enum btrfs_orphan_cleanup_state { | 1938 | enum btrfs_orphan_cleanup_state { |
2166 | ORPHAN_CLEANUP_STARTED = 1, | 1939 | ORPHAN_CLEANUP_STARTED = 1, |
2167 | ORPHAN_CLEANUP_DONE = 2, | 1940 | ORPHAN_CLEANUP_DONE = 2, |
@@ -2247,9 +2020,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2247 | } | 2020 | } |
2248 | spin_unlock(&root->orphan_lock); | 2021 | spin_unlock(&root->orphan_lock); |
2249 | 2022 | ||
2250 | if (block_rsv) | ||
2251 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2252 | |||
2253 | /* grab metadata reservation from transaction handle */ | 2023 | /* grab metadata reservation from transaction handle */ |
2254 | if (reserve) { | 2024 | if (reserve) { |
2255 | ret = btrfs_orphan_reserve_metadata(trans, inode); | 2025 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
@@ -2316,6 +2086,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2316 | struct btrfs_key key, found_key; | 2086 | struct btrfs_key key, found_key; |
2317 | struct btrfs_trans_handle *trans; | 2087 | struct btrfs_trans_handle *trans; |
2318 | struct inode *inode; | 2088 | struct inode *inode; |
2089 | u64 last_objectid = 0; | ||
2319 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2090 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2320 | 2091 | ||
2321 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) | 2092 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
@@ -2367,41 +2138,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2367 | * crossing root thing. we store the inode number in the | 2138 | * crossing root thing. we store the inode number in the |
2368 | * offset of the orphan item. | 2139 | * offset of the orphan item. |
2369 | */ | 2140 | */ |
2141 | |||
2142 | if (found_key.offset == last_objectid) { | ||
2143 | printk(KERN_ERR "btrfs: Error removing orphan entry, " | ||
2144 | "stopping orphan cleanup\n"); | ||
2145 | ret = -EINVAL; | ||
2146 | goto out; | ||
2147 | } | ||
2148 | |||
2149 | last_objectid = found_key.offset; | ||
2150 | |||
2370 | found_key.objectid = found_key.offset; | 2151 | found_key.objectid = found_key.offset; |
2371 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2152 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2372 | found_key.offset = 0; | 2153 | found_key.offset = 0; |
2373 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2154 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2374 | if (IS_ERR(inode)) { | 2155 | ret = PTR_RET(inode); |
2375 | ret = PTR_ERR(inode); | 2156 | if (ret && ret != -ESTALE) |
2376 | goto out; | 2157 | goto out; |
2377 | } | ||
2378 | 2158 | ||
2379 | /* | 2159 | /* |
2380 | * add this inode to the orphan list so btrfs_orphan_del does | 2160 | * Inode is already gone but the orphan item is still there, |
2381 | * the proper thing when we hit it | 2161 | * kill the orphan item. |
2382 | */ | 2162 | */ |
2383 | spin_lock(&root->orphan_lock); | 2163 | if (ret == -ESTALE) { |
2384 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2164 | trans = btrfs_start_transaction(root, 1); |
2385 | spin_unlock(&root->orphan_lock); | ||
2386 | |||
2387 | /* | ||
2388 | * if this is a bad inode, means we actually succeeded in | ||
2389 | * removing the inode, but not the orphan record, which means | ||
2390 | * we need to manually delete the orphan since iput will just | ||
2391 | * do a destroy_inode | ||
2392 | */ | ||
2393 | if (is_bad_inode(inode)) { | ||
2394 | trans = btrfs_start_transaction(root, 0); | ||
2395 | if (IS_ERR(trans)) { | 2165 | if (IS_ERR(trans)) { |
2396 | ret = PTR_ERR(trans); | 2166 | ret = PTR_ERR(trans); |
2397 | goto out; | 2167 | goto out; |
2398 | } | 2168 | } |
2399 | btrfs_orphan_del(trans, inode); | 2169 | ret = btrfs_del_orphan_item(trans, root, |
2170 | found_key.objectid); | ||
2171 | BUG_ON(ret); | ||
2400 | btrfs_end_transaction(trans, root); | 2172 | btrfs_end_transaction(trans, root); |
2401 | iput(inode); | ||
2402 | continue; | 2173 | continue; |
2403 | } | 2174 | } |
2404 | 2175 | ||
2176 | /* | ||
2177 | * add this inode to the orphan list so btrfs_orphan_del does | ||
2178 | * the proper thing when we hit it | ||
2179 | */ | ||
2180 | spin_lock(&root->orphan_lock); | ||
2181 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
2182 | spin_unlock(&root->orphan_lock); | ||
2183 | |||
2405 | /* if we have links, this was a truncate, lets do that */ | 2184 | /* if we have links, this was a truncate, lets do that */ |
2406 | if (inode->i_nlink) { | 2185 | if (inode->i_nlink) { |
2407 | if (!S_ISREG(inode->i_mode)) { | 2186 | if (!S_ISREG(inode->i_mode)) { |
@@ -2835,7 +2614,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2835 | u64 ino = btrfs_ino(inode); | 2614 | u64 ino = btrfs_ino(inode); |
2836 | u64 dir_ino = btrfs_ino(dir); | 2615 | u64 dir_ino = btrfs_ino(dir); |
2837 | 2616 | ||
2838 | trans = btrfs_start_transaction(root, 10); | 2617 | /* |
2618 | * 1 for the possible orphan item | ||
2619 | * 1 for the dir item | ||
2620 | * 1 for the dir index | ||
2621 | * 1 for the inode ref | ||
2622 | * 1 for the inode ref in the tree log | ||
2623 | * 2 for the dir entries in the log | ||
2624 | * 1 for the inode | ||
2625 | */ | ||
2626 | trans = btrfs_start_transaction(root, 8); | ||
2839 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | 2627 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) |
2840 | return trans; | 2628 | return trans; |
2841 | 2629 | ||
@@ -2858,7 +2646,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2858 | return ERR_PTR(-ENOMEM); | 2646 | return ERR_PTR(-ENOMEM); |
2859 | } | 2647 | } |
2860 | 2648 | ||
2861 | trans = btrfs_start_transaction(root, 0); | 2649 | /* 1 for the orphan item */ |
2650 | trans = btrfs_start_transaction(root, 1); | ||
2862 | if (IS_ERR(trans)) { | 2651 | if (IS_ERR(trans)) { |
2863 | btrfs_free_path(path); | 2652 | btrfs_free_path(path); |
2864 | root->fs_info->enospc_unlink = 0; | 2653 | root->fs_info->enospc_unlink = 0; |
@@ -2963,6 +2752,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | |||
2963 | err = 0; | 2752 | err = 0; |
2964 | out: | 2753 | out: |
2965 | btrfs_free_path(path); | 2754 | btrfs_free_path(path); |
2755 | /* Migrate the orphan reservation over */ | ||
2756 | if (!err) | ||
2757 | err = btrfs_block_rsv_migrate(trans->block_rsv, | ||
2758 | &root->fs_info->global_block_rsv, | ||
2759 | trans->bytes_reserved); | ||
2760 | |||
2966 | if (err) { | 2761 | if (err) { |
2967 | btrfs_end_transaction(trans, root); | 2762 | btrfs_end_transaction(trans, root); |
2968 | root->fs_info->enospc_unlink = 0; | 2763 | root->fs_info->enospc_unlink = 0; |
@@ -2977,6 +2772,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, | |||
2977 | struct btrfs_root *root) | 2772 | struct btrfs_root *root) |
2978 | { | 2773 | { |
2979 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | 2774 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { |
2775 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
2776 | trans->bytes_reserved); | ||
2777 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
2980 | BUG_ON(!root->fs_info->enospc_unlink); | 2778 | BUG_ON(!root->fs_info->enospc_unlink); |
2981 | root->fs_info->enospc_unlink = 0; | 2779 | root->fs_info->enospc_unlink = 0; |
2982 | } | 2780 | } |
@@ -3368,6 +3166,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3368 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | 3166 | pgoff_t index = from >> PAGE_CACHE_SHIFT; |
3369 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3167 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3370 | struct page *page; | 3168 | struct page *page; |
3169 | gfp_t mask = btrfs_alloc_write_mask(mapping); | ||
3371 | int ret = 0; | 3170 | int ret = 0; |
3372 | u64 page_start; | 3171 | u64 page_start; |
3373 | u64 page_end; | 3172 | u64 page_end; |
@@ -3380,7 +3179,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3380 | 3179 | ||
3381 | ret = -ENOMEM; | 3180 | ret = -ENOMEM; |
3382 | again: | 3181 | again: |
3383 | page = find_or_create_page(mapping, index, GFP_NOFS); | 3182 | page = find_or_create_page(mapping, index, mask); |
3384 | if (!page) { | 3183 | if (!page) { |
3385 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3184 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3386 | goto out; | 3185 | goto out; |
@@ -3613,6 +3412,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3613 | { | 3412 | { |
3614 | struct btrfs_trans_handle *trans; | 3413 | struct btrfs_trans_handle *trans; |
3615 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3414 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3415 | struct btrfs_block_rsv *rsv, *global_rsv; | ||
3416 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
3616 | unsigned long nr; | 3417 | unsigned long nr; |
3617 | int ret; | 3418 | int ret; |
3618 | 3419 | ||
@@ -3640,22 +3441,55 @@ void btrfs_evict_inode(struct inode *inode) | |||
3640 | goto no_delete; | 3441 | goto no_delete; |
3641 | } | 3442 | } |
3642 | 3443 | ||
3444 | rsv = btrfs_alloc_block_rsv(root); | ||
3445 | if (!rsv) { | ||
3446 | btrfs_orphan_del(NULL, inode); | ||
3447 | goto no_delete; | ||
3448 | } | ||
3449 | rsv->size = min_size; | ||
3450 | global_rsv = &root->fs_info->global_block_rsv; | ||
3451 | |||
3643 | btrfs_i_size_write(inode, 0); | 3452 | btrfs_i_size_write(inode, 0); |
3644 | 3453 | ||
3454 | /* | ||
3455 | * This is a bit simpler than btrfs_truncate since | ||
3456 | * | ||
3457 | * 1) We've already reserved our space for our orphan item in the | ||
3458 | * unlink. | ||
3459 | * 2) We're going to delete the inode item, so we don't need to update | ||
3460 | * it at all. | ||
3461 | * | ||
3462 | * So we just need to reserve some slack space in case we add bytes when | ||
3463 | * doing the truncate. | ||
3464 | */ | ||
3645 | while (1) { | 3465 | while (1) { |
3646 | trans = btrfs_join_transaction(root); | 3466 | ret = btrfs_block_rsv_refill(root, rsv, min_size); |
3647 | BUG_ON(IS_ERR(trans)); | 3467 | |
3648 | trans->block_rsv = root->orphan_block_rsv; | 3468 | /* |
3469 | * Try and steal from the global reserve since we will | ||
3470 | * likely not use this space anyway, we want to try as | ||
3471 | * hard as possible to get this to work. | ||
3472 | */ | ||
3473 | if (ret) | ||
3474 | ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); | ||
3649 | 3475 | ||
3650 | ret = btrfs_block_rsv_check(trans, root, | ||
3651 | root->orphan_block_rsv, 0, 5); | ||
3652 | if (ret) { | 3476 | if (ret) { |
3653 | BUG_ON(ret != -EAGAIN); | 3477 | printk(KERN_WARNING "Could not get space for a " |
3654 | ret = btrfs_commit_transaction(trans, root); | 3478 | "delete, will truncate on mount %d\n", ret); |
3655 | BUG_ON(ret); | 3479 | btrfs_orphan_del(NULL, inode); |
3656 | continue; | 3480 | btrfs_free_block_rsv(root, rsv); |
3481 | goto no_delete; | ||
3657 | } | 3482 | } |
3658 | 3483 | ||
3484 | trans = btrfs_start_transaction(root, 0); | ||
3485 | if (IS_ERR(trans)) { | ||
3486 | btrfs_orphan_del(NULL, inode); | ||
3487 | btrfs_free_block_rsv(root, rsv); | ||
3488 | goto no_delete; | ||
3489 | } | ||
3490 | |||
3491 | trans->block_rsv = rsv; | ||
3492 | |||
3659 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3493 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); |
3660 | if (ret != -EAGAIN) | 3494 | if (ret != -EAGAIN) |
3661 | break; | 3495 | break; |
@@ -3664,14 +3498,17 @@ void btrfs_evict_inode(struct inode *inode) | |||
3664 | btrfs_end_transaction(trans, root); | 3498 | btrfs_end_transaction(trans, root); |
3665 | trans = NULL; | 3499 | trans = NULL; |
3666 | btrfs_btree_balance_dirty(root, nr); | 3500 | btrfs_btree_balance_dirty(root, nr); |
3667 | |||
3668 | } | 3501 | } |
3669 | 3502 | ||
3503 | btrfs_free_block_rsv(root, rsv); | ||
3504 | |||
3670 | if (ret == 0) { | 3505 | if (ret == 0) { |
3506 | trans->block_rsv = root->orphan_block_rsv; | ||
3671 | ret = btrfs_orphan_del(trans, inode); | 3507 | ret = btrfs_orphan_del(trans, inode); |
3672 | BUG_ON(ret); | 3508 | BUG_ON(ret); |
3673 | } | 3509 | } |
3674 | 3510 | ||
3511 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3675 | if (!(root == root->fs_info->tree_root || | 3512 | if (!(root == root->fs_info->tree_root || |
3676 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) | 3513 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) |
3677 | btrfs_return_ino(root, btrfs_ino(inode)); | 3514 | btrfs_return_ino(root, btrfs_ino(inode)); |
@@ -5795,8 +5632,7 @@ again: | |||
5795 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | 5632 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { |
5796 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5633 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5797 | if (!ret) | 5634 | if (!ret) |
5798 | ret = btrfs_update_inode(trans, root, inode); | 5635 | err = btrfs_update_inode(trans, root, inode); |
5799 | err = ret; | ||
5800 | goto out; | 5636 | goto out; |
5801 | } | 5637 | } |
5802 | 5638 | ||
@@ -6289,7 +6125,7 @@ int btrfs_readpage(struct file *file, struct page *page) | |||
6289 | { | 6125 | { |
6290 | struct extent_io_tree *tree; | 6126 | struct extent_io_tree *tree; |
6291 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 6127 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
6292 | return extent_read_full_page(tree, page, btrfs_get_extent); | 6128 | return extent_read_full_page(tree, page, btrfs_get_extent, 0); |
6293 | } | 6129 | } |
6294 | 6130 | ||
6295 | static int btrfs_writepage(struct page *page, struct writeback_control *wbc) | 6131 | static int btrfs_writepage(struct page *page, struct writeback_control *wbc) |
@@ -6541,6 +6377,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6541 | struct btrfs_trans_handle *trans; | 6377 | struct btrfs_trans_handle *trans; |
6542 | unsigned long nr; | 6378 | unsigned long nr; |
6543 | u64 mask = root->sectorsize - 1; | 6379 | u64 mask = root->sectorsize - 1; |
6380 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | ||
6544 | 6381 | ||
6545 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); | 6382 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
6546 | if (ret) | 6383 | if (ret) |
@@ -6588,19 +6425,23 @@ static int btrfs_truncate(struct inode *inode) | |||
6588 | rsv = btrfs_alloc_block_rsv(root); | 6425 | rsv = btrfs_alloc_block_rsv(root); |
6589 | if (!rsv) | 6426 | if (!rsv) |
6590 | return -ENOMEM; | 6427 | return -ENOMEM; |
6591 | btrfs_add_durable_block_rsv(root->fs_info, rsv); | 6428 | rsv->size = min_size; |
6592 | 6429 | ||
6430 | /* | ||
6431 | * 1 for the truncate slack space | ||
6432 | * 1 for the orphan item we're going to add | ||
6433 | * 1 for the orphan item deletion | ||
6434 | * 1 for updating the inode. | ||
6435 | */ | ||
6593 | trans = btrfs_start_transaction(root, 4); | 6436 | trans = btrfs_start_transaction(root, 4); |
6594 | if (IS_ERR(trans)) { | 6437 | if (IS_ERR(trans)) { |
6595 | err = PTR_ERR(trans); | 6438 | err = PTR_ERR(trans); |
6596 | goto out; | 6439 | goto out; |
6597 | } | 6440 | } |
6598 | 6441 | ||
6599 | /* | 6442 | /* Migrate the slack space for the truncate to our reserve */ |
6600 | * Reserve space for the truncate process. Truncate should be adding | 6443 | ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, |
6601 | * space, but if there are snapshots it may end up using space. | 6444 | min_size); |
6602 | */ | ||
6603 | ret = btrfs_truncate_reserve_metadata(trans, root, rsv); | ||
6604 | BUG_ON(ret); | 6445 | BUG_ON(ret); |
6605 | 6446 | ||
6606 | ret = btrfs_orphan_add(trans, inode); | 6447 | ret = btrfs_orphan_add(trans, inode); |
@@ -6609,21 +6450,6 @@ static int btrfs_truncate(struct inode *inode) | |||
6609 | goto out; | 6450 | goto out; |
6610 | } | 6451 | } |
6611 | 6452 | ||
6612 | nr = trans->blocks_used; | ||
6613 | btrfs_end_transaction(trans, root); | ||
6614 | btrfs_btree_balance_dirty(root, nr); | ||
6615 | |||
6616 | /* | ||
6617 | * Ok so we've already migrated our bytes over for the truncate, so here | ||
6618 | * just reserve the one slot we need for updating the inode. | ||
6619 | */ | ||
6620 | trans = btrfs_start_transaction(root, 1); | ||
6621 | if (IS_ERR(trans)) { | ||
6622 | err = PTR_ERR(trans); | ||
6623 | goto out; | ||
6624 | } | ||
6625 | trans->block_rsv = rsv; | ||
6626 | |||
6627 | /* | 6453 | /* |
6628 | * setattr is responsible for setting the ordered_data_close flag, | 6454 | * setattr is responsible for setting the ordered_data_close flag, |
6629 | * but that is only tested during the last file release. That | 6455 | * but that is only tested during the last file release. That |
@@ -6645,20 +6471,30 @@ static int btrfs_truncate(struct inode *inode) | |||
6645 | btrfs_add_ordered_operation(trans, root, inode); | 6471 | btrfs_add_ordered_operation(trans, root, inode); |
6646 | 6472 | ||
6647 | while (1) { | 6473 | while (1) { |
6474 | ret = btrfs_block_rsv_refill(root, rsv, min_size); | ||
6475 | if (ret) { | ||
6476 | /* | ||
6477 | * This can only happen with the original transaction we | ||
6478 | * started above, every other time we shouldn't have a | ||
6479 | * transaction started yet. | ||
6480 | */ | ||
6481 | if (ret == -EAGAIN) | ||
6482 | goto end_trans; | ||
6483 | err = ret; | ||
6484 | break; | ||
6485 | } | ||
6486 | |||
6648 | if (!trans) { | 6487 | if (!trans) { |
6649 | trans = btrfs_start_transaction(root, 3); | 6488 | /* Just need the 1 for updating the inode */ |
6489 | trans = btrfs_start_transaction(root, 1); | ||
6650 | if (IS_ERR(trans)) { | 6490 | if (IS_ERR(trans)) { |
6651 | err = PTR_ERR(trans); | 6491 | err = PTR_ERR(trans); |
6652 | goto out; | 6492 | goto out; |
6653 | } | 6493 | } |
6654 | |||
6655 | ret = btrfs_truncate_reserve_metadata(trans, root, | ||
6656 | rsv); | ||
6657 | BUG_ON(ret); | ||
6658 | |||
6659 | trans->block_rsv = rsv; | ||
6660 | } | 6494 | } |
6661 | 6495 | ||
6496 | trans->block_rsv = rsv; | ||
6497 | |||
6662 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6498 | ret = btrfs_truncate_inode_items(trans, root, inode, |
6663 | inode->i_size, | 6499 | inode->i_size, |
6664 | BTRFS_EXTENT_DATA_KEY); | 6500 | BTRFS_EXTENT_DATA_KEY); |
@@ -6673,7 +6509,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6673 | err = ret; | 6509 | err = ret; |
6674 | break; | 6510 | break; |
6675 | } | 6511 | } |
6676 | 6512 | end_trans: | |
6677 | nr = trans->blocks_used; | 6513 | nr = trans->blocks_used; |
6678 | btrfs_end_transaction(trans, root); | 6514 | btrfs_end_transaction(trans, root); |
6679 | trans = NULL; | 6515 | trans = NULL; |
@@ -6755,9 +6591,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6755 | ei->last_sub_trans = 0; | 6591 | ei->last_sub_trans = 0; |
6756 | ei->logged_trans = 0; | 6592 | ei->logged_trans = 0; |
6757 | ei->delalloc_bytes = 0; | 6593 | ei->delalloc_bytes = 0; |
6758 | ei->reserved_bytes = 0; | ||
6759 | ei->disk_i_size = 0; | 6594 | ei->disk_i_size = 0; |
6760 | ei->flags = 0; | 6595 | ei->flags = 0; |
6596 | ei->csum_bytes = 0; | ||
6761 | ei->index_cnt = (u64)-1; | 6597 | ei->index_cnt = (u64)-1; |
6762 | ei->last_unlink_trans = 0; | 6598 | ei->last_unlink_trans = 0; |
6763 | 6599 | ||
@@ -6803,6 +6639,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6803 | WARN_ON(inode->i_data.nrpages); | 6639 | WARN_ON(inode->i_data.nrpages); |
6804 | WARN_ON(BTRFS_I(inode)->outstanding_extents); | 6640 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
6805 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 6641 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
6642 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); | ||
6643 | WARN_ON(BTRFS_I(inode)->csum_bytes); | ||
6806 | 6644 | ||
6807 | /* | 6645 | /* |
6808 | * This can happen where we create an inode, but somebody else also | 6646 | * This can happen where we create an inode, but somebody else also |
@@ -7420,7 +7258,6 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
7420 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, | 7258 | .readpage_end_io_hook = btrfs_readpage_end_io_hook, |
7421 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, | 7259 | .writepage_end_io_hook = btrfs_writepage_end_io_hook, |
7422 | .writepage_start_hook = btrfs_writepage_start_hook, | 7260 | .writepage_start_hook = btrfs_writepage_start_hook, |
7423 | .readpage_io_failed_hook = btrfs_io_failed_hook, | ||
7424 | .set_bit_hook = btrfs_set_bit_hook, | 7261 | .set_bit_hook = btrfs_set_bit_hook, |
7425 | .clear_bit_hook = btrfs_clear_bit_hook, | 7262 | .clear_bit_hook = btrfs_clear_bit_hook, |
7426 | .merge_extent_hook = btrfs_merge_extent_hook, | 7263 | .merge_extent_hook = btrfs_merge_extent_hook, |