aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c157
1 files changed, 132 insertions, 25 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 17e7393c50f0..1f2b99cb55ea 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -40,6 +40,7 @@
40#include "tree-log.h" 40#include "tree-log.h"
41#include "locking.h" 41#include "locking.h"
42#include "volumes.h" 42#include "volumes.h"
43#include "qgroup.h"
43 44
44static struct kmem_cache *btrfs_inode_defrag_cachep; 45static struct kmem_cache *btrfs_inode_defrag_cachep;
45/* 46/*
@@ -470,11 +471,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
470 for (i = 0; i < num_pages; i++) { 471 for (i = 0; i < num_pages; i++) {
471 /* page checked is some magic around finding pages that 472 /* page checked is some magic around finding pages that
472 * have been modified without going through btrfs_set_page_dirty 473 * have been modified without going through btrfs_set_page_dirty
473 * clear it here 474 * clear it here. There should be no need to mark the pages
475 * accessed as prepare_pages should have marked them accessed
476 * in prepare_pages via find_or_create_page()
474 */ 477 */
475 ClearPageChecked(pages[i]); 478 ClearPageChecked(pages[i]);
476 unlock_page(pages[i]); 479 unlock_page(pages[i]);
477 mark_page_accessed(pages[i]);
478 page_cache_release(pages[i]); 480 page_cache_release(pages[i]);
479 } 481 }
480} 482}
@@ -714,7 +716,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
714 int recow; 716 int recow;
715 int ret; 717 int ret;
716 int modify_tree = -1; 718 int modify_tree = -1;
717 int update_refs = (root->ref_cows || root == root->fs_info->tree_root); 719 int update_refs;
718 int found = 0; 720 int found = 0;
719 int leafs_visited = 0; 721 int leafs_visited = 0;
720 722
@@ -724,6 +726,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
724 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) 726 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
725 modify_tree = 0; 727 modify_tree = 0;
726 728
729 update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
730 root == root->fs_info->tree_root);
727 while (1) { 731 while (1) {
728 recow = 0; 732 recow = 0;
729 ret = btrfs_lookup_file_extent(trans, root, path, ino, 733 ret = btrfs_lookup_file_extent(trans, root, path, ino,
@@ -780,6 +784,18 @@ next_slot:
780 extent_end = search_start; 784 extent_end = search_start;
781 } 785 }
782 786
787 /*
788 * Don't skip extent items representing 0 byte lengths. They
789 * used to be created (bug) if while punching holes we hit
790 * -ENOSPC condition. So if we find one here, just ensure we
791 * delete it, otherwise we would insert a new file extent item
792 * with the same key (offset) as that 0 bytes length file
793 * extent item in the call to setup_items_for_insert() later
794 * in this function.
795 */
796 if (extent_end == key.offset && extent_end >= search_start)
797 goto delete_extent_item;
798
783 if (extent_end <= search_start) { 799 if (extent_end <= search_start) {
784 path->slots[0]++; 800 path->slots[0]++;
785 goto next_slot; 801 goto next_slot;
@@ -835,7 +851,7 @@ next_slot:
835 disk_bytenr, num_bytes, 0, 851 disk_bytenr, num_bytes, 0,
836 root->root_key.objectid, 852 root->root_key.objectid,
837 new_key.objectid, 853 new_key.objectid,
838 start - extent_offset, 0); 854 start - extent_offset, 1);
839 BUG_ON(ret); /* -ENOMEM */ 855 BUG_ON(ret); /* -ENOMEM */
840 } 856 }
841 key.offset = start; 857 key.offset = start;
@@ -893,6 +909,7 @@ next_slot:
893 * | ------ extent ------ | 909 * | ------ extent ------ |
894 */ 910 */
895 if (start <= key.offset && end >= extent_end) { 911 if (start <= key.offset && end >= extent_end) {
912delete_extent_item:
896 if (del_nr == 0) { 913 if (del_nr == 0) {
897 del_slot = path->slots[0]; 914 del_slot = path->slots[0];
898 del_nr = 1; 915 del_nr = 1;
@@ -1191,7 +1208,7 @@ again:
1191 1208
1192 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, 1209 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
1193 root->root_key.objectid, 1210 root->root_key.objectid,
1194 ino, orig_offset, 0); 1211 ino, orig_offset, 1);
1195 BUG_ON(ret); /* -ENOMEM */ 1212 BUG_ON(ret); /* -ENOMEM */
1196 1213
1197 if (split == start) { 1214 if (split == start) {
@@ -1994,8 +2011,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1994 if (!full_sync) { 2011 if (!full_sync) {
1995 ret = btrfs_wait_ordered_range(inode, start, 2012 ret = btrfs_wait_ordered_range(inode, start,
1996 end - start + 1); 2013 end - start + 1);
1997 if (ret) 2014 if (ret) {
2015 btrfs_end_transaction(trans, root);
1998 goto out; 2016 goto out;
2017 }
1999 } 2018 }
2000 ret = btrfs_commit_transaction(trans, root); 2019 ret = btrfs_commit_transaction(trans, root);
2001 } else { 2020 } else {
@@ -2153,6 +2172,37 @@ out:
2153 return 0; 2172 return 0;
2154} 2173}
2155 2174
2175/*
2176 * Find a hole extent on given inode and change start/len to the end of hole
2177 * extent.(hole/vacuum extent whose em->start <= start &&
2178 * em->start + em->len > start)
2179 * When a hole extent is found, return 1 and modify start/len.
2180 */
2181static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
2182{
2183 struct extent_map *em;
2184 int ret = 0;
2185
2186 em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
2187 if (IS_ERR_OR_NULL(em)) {
2188 if (!em)
2189 ret = -ENOMEM;
2190 else
2191 ret = PTR_ERR(em);
2192 return ret;
2193 }
2194
2195 /* Hole or vacuum extent(only exists in no-hole mode) */
2196 if (em->block_start == EXTENT_MAP_HOLE) {
2197 ret = 1;
2198 *len = em->start + em->len > *start + *len ?
2199 0 : *start + *len - em->start - em->len;
2200 *start = em->start + em->len;
2201 }
2202 free_extent_map(em);
2203 return ret;
2204}
2205
2156static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 2206static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2157{ 2207{
2158 struct btrfs_root *root = BTRFS_I(inode)->root; 2208 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2160,25 +2210,42 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2160 struct btrfs_path *path; 2210 struct btrfs_path *path;
2161 struct btrfs_block_rsv *rsv; 2211 struct btrfs_block_rsv *rsv;
2162 struct btrfs_trans_handle *trans; 2212 struct btrfs_trans_handle *trans;
2163 u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); 2213 u64 lockstart;
2164 u64 lockend = round_down(offset + len, 2214 u64 lockend;
2165 BTRFS_I(inode)->root->sectorsize) - 1; 2215 u64 tail_start;
2166 u64 cur_offset = lockstart; 2216 u64 tail_len;
2217 u64 orig_start = offset;
2218 u64 cur_offset;
2167 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 2219 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
2168 u64 drop_end; 2220 u64 drop_end;
2169 int ret = 0; 2221 int ret = 0;
2170 int err = 0; 2222 int err = 0;
2171 int rsv_count; 2223 int rsv_count;
2172 bool same_page = ((offset >> PAGE_CACHE_SHIFT) == 2224 bool same_page;
2173 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2174 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); 2225 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2175 u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); 2226 u64 ino_size;
2176 2227
2177 ret = btrfs_wait_ordered_range(inode, offset, len); 2228 ret = btrfs_wait_ordered_range(inode, offset, len);
2178 if (ret) 2229 if (ret)
2179 return ret; 2230 return ret;
2180 2231
2181 mutex_lock(&inode->i_mutex); 2232 mutex_lock(&inode->i_mutex);
2233 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
2234 ret = find_first_non_hole(inode, &offset, &len);
2235 if (ret < 0)
2236 goto out_only_mutex;
2237 if (ret && !len) {
2238 /* Already in a large hole */
2239 ret = 0;
2240 goto out_only_mutex;
2241 }
2242
2243 lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
2244 lockend = round_down(offset + len,
2245 BTRFS_I(inode)->root->sectorsize) - 1;
2246 same_page = ((offset >> PAGE_CACHE_SHIFT) ==
2247 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2248
2182 /* 2249 /*
2183 * We needn't truncate any page which is beyond the end of the file 2250 * We needn't truncate any page which is beyond the end of the file
2184 * because we are sure there is no data there. 2251 * because we are sure there is no data there.
@@ -2190,8 +2257,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2190 if (same_page && len < PAGE_CACHE_SIZE) { 2257 if (same_page && len < PAGE_CACHE_SIZE) {
2191 if (offset < ino_size) 2258 if (offset < ino_size)
2192 ret = btrfs_truncate_page(inode, offset, len, 0); 2259 ret = btrfs_truncate_page(inode, offset, len, 0);
2193 mutex_unlock(&inode->i_mutex); 2260 goto out_only_mutex;
2194 return ret;
2195 } 2261 }
2196 2262
2197 /* zero back part of the first page */ 2263 /* zero back part of the first page */
@@ -2203,12 +2269,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2203 } 2269 }
2204 } 2270 }
2205 2271
2206 /* zero the front end of the last page */ 2272 /* Check the aligned pages after the first unaligned page,
2207 if (offset + len < ino_size) { 2273 * if offset != orig_start, which means the first unaligned page
2208 ret = btrfs_truncate_page(inode, offset + len, 0, 1); 2274 * including serveral following pages are already in holes,
2209 if (ret) { 2275 * the extra check can be skipped */
2210 mutex_unlock(&inode->i_mutex); 2276 if (offset == orig_start) {
2211 return ret; 2277 /* after truncate page, check hole again */
2278 len = offset + len - lockstart;
2279 offset = lockstart;
2280 ret = find_first_non_hole(inode, &offset, &len);
2281 if (ret < 0)
2282 goto out_only_mutex;
2283 if (ret && !len) {
2284 ret = 0;
2285 goto out_only_mutex;
2286 }
2287 lockstart = offset;
2288 }
2289
2290 /* Check the tail unaligned part is in a hole */
2291 tail_start = lockend + 1;
2292 tail_len = offset + len - tail_start;
2293 if (tail_len) {
2294 ret = find_first_non_hole(inode, &tail_start, &tail_len);
2295 if (unlikely(ret < 0))
2296 goto out_only_mutex;
2297 if (!ret) {
2298 /* zero the front end of the last page */
2299 if (tail_start + tail_len < ino_size) {
2300 ret = btrfs_truncate_page(inode,
2301 tail_start + tail_len, 0, 1);
2302 if (ret)
2303 goto out_only_mutex;
2304 }
2212 } 2305 }
2213 } 2306 }
2214 2307
@@ -2234,9 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2234 if ((!ordered || 2327 if ((!ordered ||
2235 (ordered->file_offset + ordered->len <= lockstart || 2328 (ordered->file_offset + ordered->len <= lockstart ||
2236 ordered->file_offset > lockend)) && 2329 ordered->file_offset > lockend)) &&
2237 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, 2330 !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
2238 lockend, EXTENT_UPTODATE, 0,
2239 cached_state)) {
2240 if (ordered) 2331 if (ordered)
2241 btrfs_put_ordered_extent(ordered); 2332 btrfs_put_ordered_extent(ordered);
2242 break; 2333 break;
@@ -2284,6 +2375,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2284 BUG_ON(ret); 2375 BUG_ON(ret);
2285 trans->block_rsv = rsv; 2376 trans->block_rsv = rsv;
2286 2377
2378 cur_offset = lockstart;
2379 len = lockend - cur_offset;
2287 while (cur_offset < lockend) { 2380 while (cur_offset < lockend) {
2288 ret = __btrfs_drop_extents(trans, root, inode, path, 2381 ret = __btrfs_drop_extents(trans, root, inode, path,
2289 cur_offset, lockend + 1, 2382 cur_offset, lockend + 1,
@@ -2324,6 +2417,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2324 rsv, min_size); 2417 rsv, min_size);
2325 BUG_ON(ret); /* shouldn't happen */ 2418 BUG_ON(ret); /* shouldn't happen */
2326 trans->block_rsv = rsv; 2419 trans->block_rsv = rsv;
2420
2421 ret = find_first_non_hole(inode, &cur_offset, &len);
2422 if (unlikely(ret < 0))
2423 break;
2424 if (ret && !len) {
2425 ret = 0;
2426 break;
2427 }
2327 } 2428 }
2328 2429
2329 if (ret) { 2430 if (ret) {
@@ -2332,7 +2433,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2332 } 2433 }
2333 2434
2334 trans->block_rsv = &root->fs_info->trans_block_rsv; 2435 trans->block_rsv = &root->fs_info->trans_block_rsv;
2335 if (cur_offset < ino_size) { 2436 /*
2437 * Don't insert file hole extent item if it's for a range beyond eof
2438 * (because it's useless) or if it represents a 0 bytes range (when
2439 * cur_offset == drop_end).
2440 */
2441 if (cur_offset < ino_size && cur_offset < drop_end) {
2336 ret = fill_holes(trans, inode, path, cur_offset, drop_end); 2442 ret = fill_holes(trans, inode, path, cur_offset, drop_end);
2337 if (ret) { 2443 if (ret) {
2338 err = ret; 2444 err = ret;
@@ -2357,6 +2463,7 @@ out_free:
2357out: 2463out:
2358 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2464 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2359 &cached_state, GFP_NOFS); 2465 &cached_state, GFP_NOFS);
2466out_only_mutex:
2360 mutex_unlock(&inode->i_mutex); 2467 mutex_unlock(&inode->i_mutex);
2361 if (ret && !err) 2468 if (ret && !err)
2362 err = ret; 2469 err = ret;