aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c197
1 files changed, 155 insertions, 42 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 611b66d73e80..0e4f2bfcc37d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -310,6 +310,13 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
310 btrfs_delalloc_release_metadata(inode, end + 1 - start); 310 btrfs_delalloc_release_metadata(inode, end + 1 - start);
311 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 311 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
312out: 312out:
313 /*
314 * Don't forget to free the reserved space, as for inlined extent
315 * it won't count as data extent, free them directly here.
316 * And at reserve time, it's always aligned to page size, so
317 * just free one page here.
318 */
319 btrfs_qgroup_free_data(inode, 0, PAGE_CACHE_SIZE);
313 btrfs_free_path(path); 320 btrfs_free_path(path);
314 btrfs_end_transaction(trans, root); 321 btrfs_end_transaction(trans, root);
315 return ret; 322 return ret;
@@ -1096,6 +1103,9 @@ static noinline void async_cow_submit(struct btrfs_work *work)
1096 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> 1103 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1097 PAGE_CACHE_SHIFT; 1104 PAGE_CACHE_SHIFT;
1098 1105
1106 /*
1107 * atomic_sub_return implies a barrier for waitqueue_active
1108 */
1099 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < 1109 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1100 5 * 1024 * 1024 && 1110 5 * 1024 * 1024 &&
1101 waitqueue_active(&root->fs_info->async_submit_wait)) 1111 waitqueue_active(&root->fs_info->async_submit_wait))
@@ -1766,7 +1776,8 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1766 1776
1767 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1777 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1768 && do_list && !(state->state & EXTENT_NORESERVE)) 1778 && do_list && !(state->state & EXTENT_NORESERVE))
1769 btrfs_free_reserved_data_space(inode, len); 1779 btrfs_free_reserved_data_space_noquota(inode,
1780 state->start, len);
1770 1781
1771 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, 1782 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1772 root->fs_info->delalloc_batch); 1783 root->fs_info->delalloc_batch);
@@ -1861,15 +1872,15 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1861 u64 bio_offset) 1872 u64 bio_offset)
1862{ 1873{
1863 struct btrfs_root *root = BTRFS_I(inode)->root; 1874 struct btrfs_root *root = BTRFS_I(inode)->root;
1875 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
1864 int ret = 0; 1876 int ret = 0;
1865 int skip_sum; 1877 int skip_sum;
1866 int metadata = 0;
1867 int async = !atomic_read(&BTRFS_I(inode)->sync_writers); 1878 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1868 1879
1869 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1880 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1870 1881
1871 if (btrfs_is_free_space_inode(inode)) 1882 if (btrfs_is_free_space_inode(inode))
1872 metadata = 2; 1883 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
1873 1884
1874 if (!(rw & REQ_WRITE)) { 1885 if (!(rw & REQ_WRITE)) {
1875 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); 1886 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
@@ -1989,7 +2000,8 @@ again:
1989 goto again; 2000 goto again;
1990 } 2001 }
1991 2002
1992 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 2003 ret = btrfs_delalloc_reserve_space(inode, page_start,
2004 PAGE_CACHE_SIZE);
1993 if (ret) { 2005 if (ret) {
1994 mapping_set_error(page->mapping, ret); 2006 mapping_set_error(page->mapping, ret);
1995 end_extent_writepage(page, ret, page_start, page_end); 2007 end_extent_writepage(page, ret, page_start, page_end);
@@ -2115,7 +2127,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2115 ins.type = BTRFS_EXTENT_ITEM_KEY; 2127 ins.type = BTRFS_EXTENT_ITEM_KEY;
2116 ret = btrfs_alloc_reserved_file_extent(trans, root, 2128 ret = btrfs_alloc_reserved_file_extent(trans, root,
2117 root->root_key.objectid, 2129 root->root_key.objectid,
2118 btrfs_ino(inode), file_pos, &ins); 2130 btrfs_ino(inode), file_pos,
2131 ram_bytes, &ins);
2132 /*
2133 * Release the reserved range from inode dirty range map, as it is
2134 * already moved into delayed_ref_head
2135 */
2136 btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
2119out: 2137out:
2120 btrfs_free_path(path); 2138 btrfs_free_path(path);
2121 2139
@@ -2573,7 +2591,7 @@ again:
2573 ret = btrfs_inc_extent_ref(trans, root, new->bytenr, 2591 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2574 new->disk_len, 0, 2592 new->disk_len, 0,
2575 backref->root_id, backref->inum, 2593 backref->root_id, backref->inum,
2576 new->file_pos, 0); /* start - extent_offset */ 2594 new->file_pos); /* start - extent_offset */
2577 if (ret) { 2595 if (ret) {
2578 btrfs_abort_transaction(trans, root, ret); 2596 btrfs_abort_transaction(trans, root, ret);
2579 goto out_free_path; 2597 goto out_free_path;
@@ -2599,7 +2617,6 @@ static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2599 return; 2617 return;
2600 2618
2601 list_for_each_entry_safe(old, tmp, &new->head, list) { 2619 list_for_each_entry_safe(old, tmp, &new->head, list) {
2602 list_del(&old->list);
2603 kfree(old); 2620 kfree(old);
2604 } 2621 }
2605 kfree(new); 2622 kfree(new);
@@ -2824,6 +2841,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2824 2841
2825 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 2842 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2826 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ 2843 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
2844
2845 /*
2846 * For mwrite(mmap + memset to write) case, we still reserve
2847 * space for NOCOW range.
2848 * As NOCOW won't cause a new delayed ref, just free the space
2849 */
2850 btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
2851 ordered_extent->len);
2827 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 2852 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2828 if (nolock) 2853 if (nolock)
2829 trans = btrfs_join_transaction_nolock(root); 2854 trans = btrfs_join_transaction_nolock(root);
@@ -3018,8 +3043,6 @@ static int __readpage_endio_check(struct inode *inode,
3018 char *kaddr; 3043 char *kaddr;
3019 u32 csum_expected; 3044 u32 csum_expected;
3020 u32 csum = ~(u32)0; 3045 u32 csum = ~(u32)0;
3021 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
3022 DEFAULT_RATELIMIT_BURST);
3023 3046
3024 csum_expected = *(((u32 *)io_bio->csum) + icsum); 3047 csum_expected = *(((u32 *)io_bio->csum) + icsum);
3025 3048
@@ -3032,9 +3055,8 @@ static int __readpage_endio_check(struct inode *inode,
3032 kunmap_atomic(kaddr); 3055 kunmap_atomic(kaddr);
3033 return 0; 3056 return 0;
3034zeroit: 3057zeroit:
3035 if (__ratelimit(&_rs)) 3058 btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
3036 btrfs_warn(BTRFS_I(inode)->root->fs_info, 3059 "csum failed ino %llu off %llu csum %u expected csum %u",
3037 "csum failed ino %llu off %llu csum %u expected csum %u",
3038 btrfs_ino(inode), start, csum, csum_expected); 3060 btrfs_ino(inode), start, csum, csum_expected);
3039 memset(kaddr + pgoff, 1, len); 3061 memset(kaddr + pgoff, 1, len);
3040 flush_dcache_page(page); 3062 flush_dcache_page(page);
@@ -4217,6 +4239,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
4217 4239
4218} 4240}
4219 4241
4242static int truncate_inline_extent(struct inode *inode,
4243 struct btrfs_path *path,
4244 struct btrfs_key *found_key,
4245 const u64 item_end,
4246 const u64 new_size)
4247{
4248 struct extent_buffer *leaf = path->nodes[0];
4249 int slot = path->slots[0];
4250 struct btrfs_file_extent_item *fi;
4251 u32 size = (u32)(new_size - found_key->offset);
4252 struct btrfs_root *root = BTRFS_I(inode)->root;
4253
4254 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
4255
4256 if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
4257 loff_t offset = new_size;
4258 loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
4259
4260 /*
4261 * Zero out the remaining of the last page of our inline extent,
4262 * instead of directly truncating our inline extent here - that
4263 * would be much more complex (decompressing all the data, then
4264 * compressing the truncated data, which might be bigger than
4265 * the size of the inline extent, resize the extent, etc).
4266 * We release the path because to get the page we might need to
4267 * read the extent item from disk (data not in the page cache).
4268 */
4269 btrfs_release_path(path);
4270 return btrfs_truncate_page(inode, offset, page_end - offset, 0);
4271 }
4272
4273 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4274 size = btrfs_file_extent_calc_inline_size(size);
4275 btrfs_truncate_item(root, path, size, 1);
4276
4277 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4278 inode_sub_bytes(inode, item_end + 1 - new_size);
4279
4280 return 0;
4281}
4282
4220/* 4283/*
4221 * this can truncate away extent items, csum items and directory items. 4284 * this can truncate away extent items, csum items and directory items.
4222 * It starts at a high offset and removes keys until it can't find 4285 * It starts at a high offset and removes keys until it can't find
@@ -4411,27 +4474,40 @@ search_again:
4411 * special encodings 4474 * special encodings
4412 */ 4475 */
4413 if (!del_item && 4476 if (!del_item &&
4414 btrfs_file_extent_compression(leaf, fi) == 0 &&
4415 btrfs_file_extent_encryption(leaf, fi) == 0 && 4477 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4416 btrfs_file_extent_other_encoding(leaf, fi) == 0) { 4478 btrfs_file_extent_other_encoding(leaf, fi) == 0) {
4417 u32 size = new_size - found_key.offset;
4418
4419 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4420 inode_sub_bytes(inode, item_end + 1 -
4421 new_size);
4422 4479
4423 /* 4480 /*
4424 * update the ram bytes to properly reflect 4481 * Need to release path in order to truncate a
4425 * the new size of our item 4482 * compressed extent. So delete any accumulated
4483 * extent items so far.
4426 */ 4484 */
4427 btrfs_set_file_extent_ram_bytes(leaf, fi, size); 4485 if (btrfs_file_extent_compression(leaf, fi) !=
4428 size = 4486 BTRFS_COMPRESS_NONE && pending_del_nr) {
4429 btrfs_file_extent_calc_inline_size(size); 4487 err = btrfs_del_items(trans, root, path,
4430 btrfs_truncate_item(root, path, size, 1); 4488 pending_del_slot,
4489 pending_del_nr);
4490 if (err) {
4491 btrfs_abort_transaction(trans,
4492 root,
4493 err);
4494 goto error;
4495 }
4496 pending_del_nr = 0;
4497 }
4498
4499 err = truncate_inline_extent(inode, path,
4500 &found_key,
4501 item_end,
4502 new_size);
4503 if (err) {
4504 btrfs_abort_transaction(trans,
4505 root, err);
4506 goto error;
4507 }
4431 } else if (test_bit(BTRFS_ROOT_REF_COWS, 4508 } else if (test_bit(BTRFS_ROOT_REF_COWS,
4432 &root->state)) { 4509 &root->state)) {
4433 inode_sub_bytes(inode, item_end + 1 - 4510 inode_sub_bytes(inode, item_end + 1 - new_size);
4434 found_key.offset);
4435 } 4511 }
4436 } 4512 }
4437delete: 4513delete:
@@ -4461,7 +4537,7 @@ delete:
4461 ret = btrfs_free_extent(trans, root, extent_start, 4537 ret = btrfs_free_extent(trans, root, extent_start,
4462 extent_num_bytes, 0, 4538 extent_num_bytes, 0,
4463 btrfs_header_owner(leaf), 4539 btrfs_header_owner(leaf),
4464 ino, extent_offset, 0); 4540 ino, extent_offset);
4465 BUG_ON(ret); 4541 BUG_ON(ret);
4466 if (btrfs_should_throttle_delayed_refs(trans, root)) 4542 if (btrfs_should_throttle_delayed_refs(trans, root))
4467 btrfs_async_run_delayed_refs(root, 4543 btrfs_async_run_delayed_refs(root,
@@ -4575,14 +4651,17 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
4575 if ((offset & (blocksize - 1)) == 0 && 4651 if ((offset & (blocksize - 1)) == 0 &&
4576 (!len || ((len & (blocksize - 1)) == 0))) 4652 (!len || ((len & (blocksize - 1)) == 0)))
4577 goto out; 4653 goto out;
4578 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 4654 ret = btrfs_delalloc_reserve_space(inode,
4655 round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE);
4579 if (ret) 4656 if (ret)
4580 goto out; 4657 goto out;
4581 4658
4582again: 4659again:
4583 page = find_or_create_page(mapping, index, mask); 4660 page = find_or_create_page(mapping, index, mask);
4584 if (!page) { 4661 if (!page) {
4585 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 4662 btrfs_delalloc_release_space(inode,
4663 round_down(from, PAGE_CACHE_SIZE),
4664 PAGE_CACHE_SIZE);
4586 ret = -ENOMEM; 4665 ret = -ENOMEM;
4587 goto out; 4666 goto out;
4588 } 4667 }
@@ -4650,7 +4729,8 @@ again:
4650 4729
4651out_unlock: 4730out_unlock:
4652 if (ret) 4731 if (ret)
4653 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 4732 btrfs_delalloc_release_space(inode, page_start,
4733 PAGE_CACHE_SIZE);
4654 unlock_page(page); 4734 unlock_page(page);
4655 page_cache_release(page); 4735 page_cache_release(page);
4656out: 4736out:
@@ -5048,6 +5128,18 @@ static void evict_inode_truncate_pages(struct inode *inode)
5048 spin_unlock(&io_tree->lock); 5128 spin_unlock(&io_tree->lock);
5049 5129
5050 lock_extent_bits(io_tree, start, end, 0, &cached_state); 5130 lock_extent_bits(io_tree, start, end, 0, &cached_state);
5131
5132 /*
5133 * If still has DELALLOC flag, the extent didn't reach disk,
5134 * and its reserved space won't be freed by delayed_ref.
5135 * So we need to free its reserved space here.
5136 * (Refer to comment in btrfs_invalidatepage, case 2)
5137 *
5138 * Note, end is the bytenr of last byte, so we need + 1 here.
5139 */
5140 if (state->state & EXTENT_DELALLOC)
5141 btrfs_qgroup_free_data(inode, start, end - start + 1);
5142
5051 clear_extent_bit(io_tree, start, end, 5143 clear_extent_bit(io_tree, start, end,
5052 EXTENT_LOCKED | EXTENT_DIRTY | 5144 EXTENT_LOCKED | EXTENT_DIRTY |
5053 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | 5145 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
@@ -6268,9 +6360,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
6268 u64 objectid; 6360 u64 objectid;
6269 u64 index = 0; 6361 u64 index = 0;
6270 6362
6271 if (!new_valid_dev(rdev))
6272 return -EINVAL;
6273
6274 /* 6363 /*
6275 * 2 for inode item and ref 6364 * 2 for inode item and ref
6276 * 2 for dir items 6365 * 2 for dir items
@@ -7581,7 +7670,7 @@ unlock:
7581 spin_unlock(&BTRFS_I(inode)->lock); 7670 spin_unlock(&BTRFS_I(inode)->lock);
7582 } 7671 }
7583 7672
7584 btrfs_free_reserved_data_space(inode, len); 7673 btrfs_free_reserved_data_space(inode, start, len);
7585 WARN_ON(dio_data->reserve < len); 7674 WARN_ON(dio_data->reserve < len);
7586 dio_data->reserve -= len; 7675 dio_data->reserve -= len;
7587 current->journal_info = dio_data; 7676 current->journal_info = dio_data;
@@ -8371,7 +8460,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8371 mutex_unlock(&inode->i_mutex); 8460 mutex_unlock(&inode->i_mutex);
8372 relock = true; 8461 relock = true;
8373 } 8462 }
8374 ret = btrfs_delalloc_reserve_space(inode, count); 8463 ret = btrfs_delalloc_reserve_space(inode, offset, count);
8375 if (ret) 8464 if (ret)
8376 goto out; 8465 goto out;
8377 dio_data.outstanding_extents = div64_u64(count + 8466 dio_data.outstanding_extents = div64_u64(count +
@@ -8400,10 +8489,10 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8400 current->journal_info = NULL; 8489 current->journal_info = NULL;
8401 if (ret < 0 && ret != -EIOCBQUEUED) { 8490 if (ret < 0 && ret != -EIOCBQUEUED) {
8402 if (dio_data.reserve) 8491 if (dio_data.reserve)
8403 btrfs_delalloc_release_space(inode, 8492 btrfs_delalloc_release_space(inode, offset,
8404 dio_data.reserve); 8493 dio_data.reserve);
8405 } else if (ret >= 0 && (size_t)ret < count) 8494 } else if (ret >= 0 && (size_t)ret < count)
8406 btrfs_delalloc_release_space(inode, 8495 btrfs_delalloc_release_space(inode, offset,
8407 count - (size_t)ret); 8496 count - (size_t)ret);
8408 } 8497 }
8409out: 8498out:
@@ -8562,6 +8651,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8562 } 8651 }
8563 } 8652 }
8564 8653
8654 /*
8655 * Qgroup reserved space handler
8656 * Page here will be either
8657 * 1) Already written to disk
8658 * In this case, its reserved space is released from data rsv map
8659 * and will be freed by delayed_ref handler finally.
8660 * So even we call qgroup_free_data(), it won't decrease reserved
8661 * space.
8662 * 2) Not written to disk
8663 * This means the reserved space should be freed here.
8664 */
8665 btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE);
8565 if (!inode_evicting) { 8666 if (!inode_evicting) {
8566 clear_extent_bit(tree, page_start, page_end, 8667 clear_extent_bit(tree, page_start, page_end,
8567 EXTENT_LOCKED | EXTENT_DIRTY | 8668 EXTENT_LOCKED | EXTENT_DIRTY |
@@ -8612,7 +8713,11 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
8612 u64 page_end; 8713 u64 page_end;
8613 8714
8614 sb_start_pagefault(inode->i_sb); 8715 sb_start_pagefault(inode->i_sb);
8615 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 8716 page_start = page_offset(page);
8717 page_end = page_start + PAGE_CACHE_SIZE - 1;
8718
8719 ret = btrfs_delalloc_reserve_space(inode, page_start,
8720 PAGE_CACHE_SIZE);
8616 if (!ret) { 8721 if (!ret) {
8617 ret = file_update_time(vma->vm_file); 8722 ret = file_update_time(vma->vm_file);
8618 reserved = 1; 8723 reserved = 1;
@@ -8631,8 +8736,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
8631again: 8736again:
8632 lock_page(page); 8737 lock_page(page);
8633 size = i_size_read(inode); 8738 size = i_size_read(inode);
8634 page_start = page_offset(page);
8635 page_end = page_start + PAGE_CACHE_SIZE - 1;
8636 8739
8637 if ((page->mapping != inode->i_mapping) || 8740 if ((page->mapping != inode->i_mapping) ||
8638 (page_start >= size)) { 8741 (page_start >= size)) {
@@ -8709,7 +8812,7 @@ out_unlock:
8709 } 8812 }
8710 unlock_page(page); 8813 unlock_page(page);
8711out: 8814out:
8712 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 8815 btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE);
8713out_noreserve: 8816out_noreserve:
8714 sb_end_pagefault(inode->i_sb); 8817 sb_end_pagefault(inode->i_sb);
8715 return ret; 8818 return ret;
@@ -8998,6 +9101,7 @@ void btrfs_destroy_inode(struct inode *inode)
8998 btrfs_put_ordered_extent(ordered); 9101 btrfs_put_ordered_extent(ordered);
8999 } 9102 }
9000 } 9103 }
9104 btrfs_qgroup_check_reserved_leak(inode);
9001 inode_tree_del(inode); 9105 inode_tree_del(inode);
9002 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 9106 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
9003free: 9107free:
@@ -9634,6 +9738,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9634 u64 cur_offset = start; 9738 u64 cur_offset = start;
9635 u64 i_size; 9739 u64 i_size;
9636 u64 cur_bytes; 9740 u64 cur_bytes;
9741 u64 last_alloc = (u64)-1;
9637 int ret = 0; 9742 int ret = 0;
9638 bool own_trans = true; 9743 bool own_trans = true;
9639 9744
@@ -9650,6 +9755,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9650 9755
9651 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 9756 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
9652 cur_bytes = max(cur_bytes, min_size); 9757 cur_bytes = max(cur_bytes, min_size);
9758 /*
9759 * If we are severely fragmented we could end up with really
9760 * small allocations, so if the allocator is returning small
9761 * chunks lets make its job easier by only searching for those
9762 * sized chunks.
9763 */
9764 cur_bytes = min(cur_bytes, last_alloc);
9653 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, 9765 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
9654 *alloc_hint, &ins, 1, 0); 9766 *alloc_hint, &ins, 1, 0);
9655 if (ret) { 9767 if (ret) {
@@ -9658,6 +9770,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9658 break; 9770 break;
9659 } 9771 }
9660 9772
9773 last_alloc = ins.offset;
9661 ret = insert_reserved_file_extent(trans, inode, 9774 ret = insert_reserved_file_extent(trans, inode,
9662 cur_offset, ins.objectid, 9775 cur_offset, ins.objectid,
9663 ins.offset, ins.offset, 9776 ins.offset, ins.offset,