diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 157 |
1 files changed, 132 insertions, 25 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 17e7393c50f0..1f2b99cb55ea 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "locking.h" | 41 | #include "locking.h" |
42 | #include "volumes.h" | 42 | #include "volumes.h" |
43 | #include "qgroup.h" | ||
43 | 44 | ||
44 | static struct kmem_cache *btrfs_inode_defrag_cachep; | 45 | static struct kmem_cache *btrfs_inode_defrag_cachep; |
45 | /* | 46 | /* |
@@ -470,11 +471,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
470 | for (i = 0; i < num_pages; i++) { | 471 | for (i = 0; i < num_pages; i++) { |
471 | /* page checked is some magic around finding pages that | 472 | /* page checked is some magic around finding pages that |
472 | * have been modified without going through btrfs_set_page_dirty | 473 | * have been modified without going through btrfs_set_page_dirty |
473 | * clear it here | 474 | * clear it here. There should be no need to mark the pages |
475 | * accessed as prepare_pages should have marked them accessed | ||
476 | * in prepare_pages via find_or_create_page() | ||
474 | */ | 477 | */ |
475 | ClearPageChecked(pages[i]); | 478 | ClearPageChecked(pages[i]); |
476 | unlock_page(pages[i]); | 479 | unlock_page(pages[i]); |
477 | mark_page_accessed(pages[i]); | ||
478 | page_cache_release(pages[i]); | 480 | page_cache_release(pages[i]); |
479 | } | 481 | } |
480 | } | 482 | } |
@@ -714,7 +716,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
714 | int recow; | 716 | int recow; |
715 | int ret; | 717 | int ret; |
716 | int modify_tree = -1; | 718 | int modify_tree = -1; |
717 | int update_refs = (root->ref_cows || root == root->fs_info->tree_root); | 719 | int update_refs; |
718 | int found = 0; | 720 | int found = 0; |
719 | int leafs_visited = 0; | 721 | int leafs_visited = 0; |
720 | 722 | ||
@@ -724,6 +726,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
724 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) | 726 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
725 | modify_tree = 0; | 727 | modify_tree = 0; |
726 | 728 | ||
729 | update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || | ||
730 | root == root->fs_info->tree_root); | ||
727 | while (1) { | 731 | while (1) { |
728 | recow = 0; | 732 | recow = 0; |
729 | ret = btrfs_lookup_file_extent(trans, root, path, ino, | 733 | ret = btrfs_lookup_file_extent(trans, root, path, ino, |
@@ -780,6 +784,18 @@ next_slot: | |||
780 | extent_end = search_start; | 784 | extent_end = search_start; |
781 | } | 785 | } |
782 | 786 | ||
787 | /* | ||
788 | * Don't skip extent items representing 0 byte lengths. They | ||
789 | * used to be created (bug) if while punching holes we hit | ||
790 | * -ENOSPC condition. So if we find one here, just ensure we | ||
791 | * delete it, otherwise we would insert a new file extent item | ||
792 | * with the same key (offset) as that 0 bytes length file | ||
793 | * extent item in the call to setup_items_for_insert() later | ||
794 | * in this function. | ||
795 | */ | ||
796 | if (extent_end == key.offset && extent_end >= search_start) | ||
797 | goto delete_extent_item; | ||
798 | |||
783 | if (extent_end <= search_start) { | 799 | if (extent_end <= search_start) { |
784 | path->slots[0]++; | 800 | path->slots[0]++; |
785 | goto next_slot; | 801 | goto next_slot; |
@@ -835,7 +851,7 @@ next_slot: | |||
835 | disk_bytenr, num_bytes, 0, | 851 | disk_bytenr, num_bytes, 0, |
836 | root->root_key.objectid, | 852 | root->root_key.objectid, |
837 | new_key.objectid, | 853 | new_key.objectid, |
838 | start - extent_offset, 0); | 854 | start - extent_offset, 1); |
839 | BUG_ON(ret); /* -ENOMEM */ | 855 | BUG_ON(ret); /* -ENOMEM */ |
840 | } | 856 | } |
841 | key.offset = start; | 857 | key.offset = start; |
@@ -893,6 +909,7 @@ next_slot: | |||
893 | * | ------ extent ------ | | 909 | * | ------ extent ------ | |
894 | */ | 910 | */ |
895 | if (start <= key.offset && end >= extent_end) { | 911 | if (start <= key.offset && end >= extent_end) { |
912 | delete_extent_item: | ||
896 | if (del_nr == 0) { | 913 | if (del_nr == 0) { |
897 | del_slot = path->slots[0]; | 914 | del_slot = path->slots[0]; |
898 | del_nr = 1; | 915 | del_nr = 1; |
@@ -1191,7 +1208,7 @@ again: | |||
1191 | 1208 | ||
1192 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, | 1209 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, |
1193 | root->root_key.objectid, | 1210 | root->root_key.objectid, |
1194 | ino, orig_offset, 0); | 1211 | ino, orig_offset, 1); |
1195 | BUG_ON(ret); /* -ENOMEM */ | 1212 | BUG_ON(ret); /* -ENOMEM */ |
1196 | 1213 | ||
1197 | if (split == start) { | 1214 | if (split == start) { |
@@ -1994,8 +2011,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1994 | if (!full_sync) { | 2011 | if (!full_sync) { |
1995 | ret = btrfs_wait_ordered_range(inode, start, | 2012 | ret = btrfs_wait_ordered_range(inode, start, |
1996 | end - start + 1); | 2013 | end - start + 1); |
1997 | if (ret) | 2014 | if (ret) { |
2015 | btrfs_end_transaction(trans, root); | ||
1998 | goto out; | 2016 | goto out; |
2017 | } | ||
1999 | } | 2018 | } |
2000 | ret = btrfs_commit_transaction(trans, root); | 2019 | ret = btrfs_commit_transaction(trans, root); |
2001 | } else { | 2020 | } else { |
@@ -2153,6 +2172,37 @@ out: | |||
2153 | return 0; | 2172 | return 0; |
2154 | } | 2173 | } |
2155 | 2174 | ||
2175 | /* | ||
2176 | * Find a hole extent on given inode and change start/len to the end of hole | ||
2177 | * extent.(hole/vacuum extent whose em->start <= start && | ||
2178 | * em->start + em->len > start) | ||
2179 | * When a hole extent is found, return 1 and modify start/len. | ||
2180 | */ | ||
2181 | static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) | ||
2182 | { | ||
2183 | struct extent_map *em; | ||
2184 | int ret = 0; | ||
2185 | |||
2186 | em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0); | ||
2187 | if (IS_ERR_OR_NULL(em)) { | ||
2188 | if (!em) | ||
2189 | ret = -ENOMEM; | ||
2190 | else | ||
2191 | ret = PTR_ERR(em); | ||
2192 | return ret; | ||
2193 | } | ||
2194 | |||
2195 | /* Hole or vacuum extent(only exists in no-hole mode) */ | ||
2196 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
2197 | ret = 1; | ||
2198 | *len = em->start + em->len > *start + *len ? | ||
2199 | 0 : *start + *len - em->start - em->len; | ||
2200 | *start = em->start + em->len; | ||
2201 | } | ||
2202 | free_extent_map(em); | ||
2203 | return ret; | ||
2204 | } | ||
2205 | |||
2156 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | 2206 | static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) |
2157 | { | 2207 | { |
2158 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2208 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -2160,25 +2210,42 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2160 | struct btrfs_path *path; | 2210 | struct btrfs_path *path; |
2161 | struct btrfs_block_rsv *rsv; | 2211 | struct btrfs_block_rsv *rsv; |
2162 | struct btrfs_trans_handle *trans; | 2212 | struct btrfs_trans_handle *trans; |
2163 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); | 2213 | u64 lockstart; |
2164 | u64 lockend = round_down(offset + len, | 2214 | u64 lockend; |
2165 | BTRFS_I(inode)->root->sectorsize) - 1; | 2215 | u64 tail_start; |
2166 | u64 cur_offset = lockstart; | 2216 | u64 tail_len; |
2217 | u64 orig_start = offset; | ||
2218 | u64 cur_offset; | ||
2167 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 2219 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
2168 | u64 drop_end; | 2220 | u64 drop_end; |
2169 | int ret = 0; | 2221 | int ret = 0; |
2170 | int err = 0; | 2222 | int err = 0; |
2171 | int rsv_count; | 2223 | int rsv_count; |
2172 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2224 | bool same_page; |
2173 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | ||
2174 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2225 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
2175 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | 2226 | u64 ino_size; |
2176 | 2227 | ||
2177 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2228 | ret = btrfs_wait_ordered_range(inode, offset, len); |
2178 | if (ret) | 2229 | if (ret) |
2179 | return ret; | 2230 | return ret; |
2180 | 2231 | ||
2181 | mutex_lock(&inode->i_mutex); | 2232 | mutex_lock(&inode->i_mutex); |
2233 | ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
2234 | ret = find_first_non_hole(inode, &offset, &len); | ||
2235 | if (ret < 0) | ||
2236 | goto out_only_mutex; | ||
2237 | if (ret && !len) { | ||
2238 | /* Already in a large hole */ | ||
2239 | ret = 0; | ||
2240 | goto out_only_mutex; | ||
2241 | } | ||
2242 | |||
2243 | lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); | ||
2244 | lockend = round_down(offset + len, | ||
2245 | BTRFS_I(inode)->root->sectorsize) - 1; | ||
2246 | same_page = ((offset >> PAGE_CACHE_SHIFT) == | ||
2247 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | ||
2248 | |||
2182 | /* | 2249 | /* |
2183 | * We needn't truncate any page which is beyond the end of the file | 2250 | * We needn't truncate any page which is beyond the end of the file |
2184 | * because we are sure there is no data there. | 2251 | * because we are sure there is no data there. |
@@ -2190,8 +2257,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2190 | if (same_page && len < PAGE_CACHE_SIZE) { | 2257 | if (same_page && len < PAGE_CACHE_SIZE) { |
2191 | if (offset < ino_size) | 2258 | if (offset < ino_size) |
2192 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2259 | ret = btrfs_truncate_page(inode, offset, len, 0); |
2193 | mutex_unlock(&inode->i_mutex); | 2260 | goto out_only_mutex; |
2194 | return ret; | ||
2195 | } | 2261 | } |
2196 | 2262 | ||
2197 | /* zero back part of the first page */ | 2263 | /* zero back part of the first page */ |
@@ -2203,12 +2269,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2203 | } | 2269 | } |
2204 | } | 2270 | } |
2205 | 2271 | ||
2206 | /* zero the front end of the last page */ | 2272 | /* Check the aligned pages after the first unaligned page, |
2207 | if (offset + len < ino_size) { | 2273 | * if offset != orig_start, which means the first unaligned page |
2208 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2274 | * including serveral following pages are already in holes, |
2209 | if (ret) { | 2275 | * the extra check can be skipped */ |
2210 | mutex_unlock(&inode->i_mutex); | 2276 | if (offset == orig_start) { |
2211 | return ret; | 2277 | /* after truncate page, check hole again */ |
2278 | len = offset + len - lockstart; | ||
2279 | offset = lockstart; | ||
2280 | ret = find_first_non_hole(inode, &offset, &len); | ||
2281 | if (ret < 0) | ||
2282 | goto out_only_mutex; | ||
2283 | if (ret && !len) { | ||
2284 | ret = 0; | ||
2285 | goto out_only_mutex; | ||
2286 | } | ||
2287 | lockstart = offset; | ||
2288 | } | ||
2289 | |||
2290 | /* Check the tail unaligned part is in a hole */ | ||
2291 | tail_start = lockend + 1; | ||
2292 | tail_len = offset + len - tail_start; | ||
2293 | if (tail_len) { | ||
2294 | ret = find_first_non_hole(inode, &tail_start, &tail_len); | ||
2295 | if (unlikely(ret < 0)) | ||
2296 | goto out_only_mutex; | ||
2297 | if (!ret) { | ||
2298 | /* zero the front end of the last page */ | ||
2299 | if (tail_start + tail_len < ino_size) { | ||
2300 | ret = btrfs_truncate_page(inode, | ||
2301 | tail_start + tail_len, 0, 1); | ||
2302 | if (ret) | ||
2303 | goto out_only_mutex; | ||
2304 | } | ||
2212 | } | 2305 | } |
2213 | } | 2306 | } |
2214 | 2307 | ||
@@ -2234,9 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2234 | if ((!ordered || | 2327 | if ((!ordered || |
2235 | (ordered->file_offset + ordered->len <= lockstart || | 2328 | (ordered->file_offset + ordered->len <= lockstart || |
2236 | ordered->file_offset > lockend)) && | 2329 | ordered->file_offset > lockend)) && |
2237 | !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, | 2330 | !btrfs_page_exists_in_range(inode, lockstart, lockend)) { |
2238 | lockend, EXTENT_UPTODATE, 0, | ||
2239 | cached_state)) { | ||
2240 | if (ordered) | 2331 | if (ordered) |
2241 | btrfs_put_ordered_extent(ordered); | 2332 | btrfs_put_ordered_extent(ordered); |
2242 | break; | 2333 | break; |
@@ -2284,6 +2375,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2284 | BUG_ON(ret); | 2375 | BUG_ON(ret); |
2285 | trans->block_rsv = rsv; | 2376 | trans->block_rsv = rsv; |
2286 | 2377 | ||
2378 | cur_offset = lockstart; | ||
2379 | len = lockend - cur_offset; | ||
2287 | while (cur_offset < lockend) { | 2380 | while (cur_offset < lockend) { |
2288 | ret = __btrfs_drop_extents(trans, root, inode, path, | 2381 | ret = __btrfs_drop_extents(trans, root, inode, path, |
2289 | cur_offset, lockend + 1, | 2382 | cur_offset, lockend + 1, |
@@ -2324,6 +2417,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2324 | rsv, min_size); | 2417 | rsv, min_size); |
2325 | BUG_ON(ret); /* shouldn't happen */ | 2418 | BUG_ON(ret); /* shouldn't happen */ |
2326 | trans->block_rsv = rsv; | 2419 | trans->block_rsv = rsv; |
2420 | |||
2421 | ret = find_first_non_hole(inode, &cur_offset, &len); | ||
2422 | if (unlikely(ret < 0)) | ||
2423 | break; | ||
2424 | if (ret && !len) { | ||
2425 | ret = 0; | ||
2426 | break; | ||
2427 | } | ||
2327 | } | 2428 | } |
2328 | 2429 | ||
2329 | if (ret) { | 2430 | if (ret) { |
@@ -2332,7 +2433,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2332 | } | 2433 | } |
2333 | 2434 | ||
2334 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2435 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2335 | if (cur_offset < ino_size) { | 2436 | /* |
2437 | * Don't insert file hole extent item if it's for a range beyond eof | ||
2438 | * (because it's useless) or if it represents a 0 bytes range (when | ||
2439 | * cur_offset == drop_end). | ||
2440 | */ | ||
2441 | if (cur_offset < ino_size && cur_offset < drop_end) { | ||
2336 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2442 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
2337 | if (ret) { | 2443 | if (ret) { |
2338 | err = ret; | 2444 | err = ret; |
@@ -2357,6 +2463,7 @@ out_free: | |||
2357 | out: | 2463 | out: |
2358 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2464 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
2359 | &cached_state, GFP_NOFS); | 2465 | &cached_state, GFP_NOFS); |
2466 | out_only_mutex: | ||
2360 | mutex_unlock(&inode->i_mutex); | 2467 | mutex_unlock(&inode->i_mutex); |
2361 | if (ret && !err) | 2468 | if (ret && !err) |
2362 | err = ret; | 2469 | err = ret; |