aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c2552
1 files changed, 1856 insertions, 696 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8cd109972fa6..160b55b3e132 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
36#include <linux/xattr.h> 36#include <linux/xattr.h>
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h>
39#include "compat.h" 40#include "compat.h"
40#include "ctree.h" 41#include "ctree.h"
41#include "disk-io.h" 42#include "disk-io.h"
@@ -121,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
121 size_t cur_size = size; 122 size_t cur_size = size;
122 size_t datasize; 123 size_t datasize;
123 unsigned long offset; 124 unsigned long offset;
124 int use_compress = 0; 125 int compress_type = BTRFS_COMPRESS_NONE;
125 126
126 if (compressed_size && compressed_pages) { 127 if (compressed_size && compressed_pages) {
127 use_compress = 1; 128 compress_type = root->fs_info->compress_type;
128 cur_size = compressed_size; 129 cur_size = compressed_size;
129 } 130 }
130 131
@@ -158,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
158 btrfs_set_file_extent_ram_bytes(leaf, ei, size); 159 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
159 ptr = btrfs_file_extent_inline_start(ei); 160 ptr = btrfs_file_extent_inline_start(ei);
160 161
161 if (use_compress) { 162 if (compress_type != BTRFS_COMPRESS_NONE) {
162 struct page *cpage; 163 struct page *cpage;
163 int i = 0; 164 int i = 0;
164 while (compressed_size > 0) { 165 while (compressed_size > 0) {
@@ -175,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
175 compressed_size -= cur_size; 176 compressed_size -= cur_size;
176 } 177 }
177 btrfs_set_file_extent_compression(leaf, ei, 178 btrfs_set_file_extent_compression(leaf, ei,
178 BTRFS_COMPRESS_ZLIB); 179 compress_type);
179 } else { 180 } else {
180 page = find_get_page(inode->i_mapping, 181 page = find_get_page(inode->i_mapping,
181 start >> PAGE_CACHE_SHIFT); 182 start >> PAGE_CACHE_SHIFT);
@@ -251,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
251 inline_len, compressed_size, 252 inline_len, compressed_size,
252 compressed_pages); 253 compressed_pages);
253 BUG_ON(ret); 254 BUG_ON(ret);
255 btrfs_delalloc_release_metadata(inode, end + 1 - start);
254 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 256 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
255 return 0; 257 return 0;
256} 258}
@@ -261,6 +263,7 @@ struct async_extent {
261 u64 compressed_size; 263 u64 compressed_size;
262 struct page **pages; 264 struct page **pages;
263 unsigned long nr_pages; 265 unsigned long nr_pages;
266 int compress_type;
264 struct list_head list; 267 struct list_head list;
265}; 268};
266 269
@@ -278,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow,
278 u64 start, u64 ram_size, 281 u64 start, u64 ram_size,
279 u64 compressed_size, 282 u64 compressed_size,
280 struct page **pages, 283 struct page **pages,
281 unsigned long nr_pages) 284 unsigned long nr_pages,
285 int compress_type)
282{ 286{
283 struct async_extent *async_extent; 287 struct async_extent *async_extent;
284 288
@@ -288,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow,
288 async_extent->compressed_size = compressed_size; 292 async_extent->compressed_size = compressed_size;
289 async_extent->pages = pages; 293 async_extent->pages = pages;
290 async_extent->nr_pages = nr_pages; 294 async_extent->nr_pages = nr_pages;
295 async_extent->compress_type = compress_type;
291 list_add_tail(&async_extent->list, &cow->extents); 296 list_add_tail(&async_extent->list, &cow->extents);
292 return 0; 297 return 0;
293} 298}
@@ -317,8 +322,6 @@ static noinline int compress_file_range(struct inode *inode,
317 struct btrfs_root *root = BTRFS_I(inode)->root; 322 struct btrfs_root *root = BTRFS_I(inode)->root;
318 struct btrfs_trans_handle *trans; 323 struct btrfs_trans_handle *trans;
319 u64 num_bytes; 324 u64 num_bytes;
320 u64 orig_start;
321 u64 disk_num_bytes;
322 u64 blocksize = root->sectorsize; 325 u64 blocksize = root->sectorsize;
323 u64 actual_end; 326 u64 actual_end;
324 u64 isize = i_size_read(inode); 327 u64 isize = i_size_read(inode);
@@ -332,8 +335,7 @@ static noinline int compress_file_range(struct inode *inode,
332 unsigned long max_uncompressed = 128 * 1024; 335 unsigned long max_uncompressed = 128 * 1024;
333 int i; 336 int i;
334 int will_compress; 337 int will_compress;
335 338 int compress_type = root->fs_info->compress_type;
336 orig_start = start;
337 339
338 actual_end = min_t(u64, isize, end + 1); 340 actual_end = min_t(u64, isize, end + 1);
339again: 341again:
@@ -369,7 +371,6 @@ again:
369 total_compressed = min(total_compressed, max_uncompressed); 371 total_compressed = min(total_compressed, max_uncompressed);
370 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 372 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
371 num_bytes = max(blocksize, num_bytes); 373 num_bytes = max(blocksize, num_bytes);
372 disk_num_bytes = num_bytes;
373 total_in = 0; 374 total_in = 0;
374 ret = 0; 375 ret = 0;
375 376
@@ -379,16 +380,21 @@ again:
379 * change at any time if we discover bad compression ratios. 380 * change at any time if we discover bad compression ratios.
380 */ 381 */
381 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 382 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
382 btrfs_test_opt(root, COMPRESS)) { 383 (btrfs_test_opt(root, COMPRESS) ||
384 (BTRFS_I(inode)->force_compress))) {
383 WARN_ON(pages); 385 WARN_ON(pages);
384 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 386 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
385 387
386 ret = btrfs_zlib_compress_pages(inode->i_mapping, start, 388 if (BTRFS_I(inode)->force_compress)
387 total_compressed, pages, 389 compress_type = BTRFS_I(inode)->force_compress;
388 nr_pages, &nr_pages_ret, 390
389 &total_in, 391 ret = btrfs_compress_pages(compress_type,
390 &total_compressed, 392 inode->i_mapping, start,
391 max_compressed); 393 total_compressed, pages,
394 nr_pages, &nr_pages_ret,
395 &total_in,
396 &total_compressed,
397 max_compressed);
392 398
393 if (!ret) { 399 if (!ret) {
394 unsigned long offset = total_compressed & 400 unsigned long offset = total_compressed &
@@ -412,6 +418,7 @@ again:
412 trans = btrfs_join_transaction(root, 1); 418 trans = btrfs_join_transaction(root, 1);
413 BUG_ON(!trans); 419 BUG_ON(!trans);
414 btrfs_set_trans_block_group(trans, inode); 420 btrfs_set_trans_block_group(trans, inode);
421 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
415 422
416 /* lets try to make an inline extent */ 423 /* lets try to make an inline extent */
417 if (ret || total_in < (actual_end - start)) { 424 if (ret || total_in < (actual_end - start)) {
@@ -437,7 +444,6 @@ again:
437 start, end, NULL, 444 start, end, NULL,
438 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 445 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
439 EXTENT_CLEAR_DELALLOC | 446 EXTENT_CLEAR_DELALLOC |
440 EXTENT_CLEAR_ACCOUNTING |
441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 447 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
442 448
443 btrfs_end_transaction(trans, root); 449 btrfs_end_transaction(trans, root);
@@ -464,7 +470,6 @@ again:
464 if (total_compressed >= total_in) { 470 if (total_compressed >= total_in) {
465 will_compress = 0; 471 will_compress = 0;
466 } else { 472 } else {
467 disk_num_bytes = total_compressed;
468 num_bytes = total_in; 473 num_bytes = total_in;
469 } 474 }
470 } 475 }
@@ -483,8 +488,10 @@ again:
483 nr_pages_ret = 0; 488 nr_pages_ret = 0;
484 489
485 /* flag the file so we don't compress in the future */ 490 /* flag the file so we don't compress in the future */
486 if (!btrfs_test_opt(root, FORCE_COMPRESS)) 491 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
492 !(BTRFS_I(inode)->force_compress)) {
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 493 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
494 }
488 } 495 }
489 if (will_compress) { 496 if (will_compress) {
490 *num_added += 1; 497 *num_added += 1;
@@ -494,9 +501,10 @@ again:
494 * and will submit them to the elevator. 501 * and will submit them to the elevator.
495 */ 502 */
496 add_async_extent(async_cow, start, num_bytes, 503 add_async_extent(async_cow, start, num_bytes,
497 total_compressed, pages, nr_pages_ret); 504 total_compressed, pages, nr_pages_ret,
505 compress_type);
498 506
499 if (start + num_bytes < end && start + num_bytes < actual_end) { 507 if (start + num_bytes < end) {
500 start += num_bytes; 508 start += num_bytes;
501 pages = NULL; 509 pages = NULL;
502 cond_resched(); 510 cond_resched();
@@ -516,7 +524,8 @@ cleanup_and_bail_uncompressed:
516 __set_page_dirty_nobuffers(locked_page); 524 __set_page_dirty_nobuffers(locked_page);
517 /* unlocked later on in the async handlers */ 525 /* unlocked later on in the async handlers */
518 } 526 }
519 add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); 527 add_async_extent(async_cow, start, end - start + 1,
528 0, NULL, 0, BTRFS_COMPRESS_NONE);
520 *num_added += 1; 529 *num_added += 1;
521 } 530 }
522 531
@@ -570,8 +579,8 @@ retry:
570 unsigned long nr_written = 0; 579 unsigned long nr_written = 0;
571 580
572 lock_extent(io_tree, async_extent->start, 581 lock_extent(io_tree, async_extent->start,
573 async_extent->start + 582 async_extent->start +
574 async_extent->ram_size - 1, GFP_NOFS); 583 async_extent->ram_size - 1, GFP_NOFS);
575 584
576 /* allocate blocks */ 585 /* allocate blocks */
577 ret = cow_file_range(inode, async_cow->locked_page, 586 ret = cow_file_range(inode, async_cow->locked_page,
@@ -641,6 +650,7 @@ retry:
641 em->block_start = ins.objectid; 650 em->block_start = ins.objectid;
642 em->block_len = ins.offset; 651 em->block_len = ins.offset;
643 em->bdev = root->fs_info->fs_devices->latest_bdev; 652 em->bdev = root->fs_info->fs_devices->latest_bdev;
653 em->compress_type = async_extent->compress_type;
644 set_bit(EXTENT_FLAG_PINNED, &em->flags); 654 set_bit(EXTENT_FLAG_PINNED, &em->flags);
645 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 655 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
646 656
@@ -657,11 +667,13 @@ retry:
657 async_extent->ram_size - 1, 0); 667 async_extent->ram_size - 1, 0);
658 } 668 }
659 669
660 ret = btrfs_add_ordered_extent(inode, async_extent->start, 670 ret = btrfs_add_ordered_extent_compress(inode,
661 ins.objectid, 671 async_extent->start,
662 async_extent->ram_size, 672 ins.objectid,
663 ins.offset, 673 async_extent->ram_size,
664 BTRFS_ORDERED_COMPRESSED); 674 ins.offset,
675 BTRFS_ORDERED_COMPRESSED,
676 async_extent->compress_type);
665 BUG_ON(ret); 677 BUG_ON(ret);
666 678
667 /* 679 /*
@@ -693,6 +705,38 @@ retry:
693 return 0; 705 return 0;
694} 706}
695 707
708static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
709 u64 num_bytes)
710{
711 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
712 struct extent_map *em;
713 u64 alloc_hint = 0;
714
715 read_lock(&em_tree->lock);
716 em = search_extent_mapping(em_tree, start, num_bytes);
717 if (em) {
718 /*
719 * if block start isn't an actual block number then find the
720 * first block in this inode and use that as a hint. If that
721 * block is also bogus then just don't worry about it.
722 */
723 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
724 free_extent_map(em);
725 em = search_extent_mapping(em_tree, 0, 0);
726 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
727 alloc_hint = em->block_start;
728 if (em)
729 free_extent_map(em);
730 } else {
731 alloc_hint = em->block_start;
732 free_extent_map(em);
733 }
734 }
735 read_unlock(&em_tree->lock);
736
737 return alloc_hint;
738}
739
696/* 740/*
697 * when extent_io.c finds a delayed allocation range in the file, 741 * when extent_io.c finds a delayed allocation range in the file,
698 * the call backs end up in this code. The basic idea is to 742 * the call backs end up in this code. The basic idea is to
@@ -720,18 +764,16 @@ static noinline int cow_file_range(struct inode *inode,
720 u64 disk_num_bytes; 764 u64 disk_num_bytes;
721 u64 cur_alloc_size; 765 u64 cur_alloc_size;
722 u64 blocksize = root->sectorsize; 766 u64 blocksize = root->sectorsize;
723 u64 actual_end;
724 u64 isize = i_size_read(inode);
725 struct btrfs_key ins; 767 struct btrfs_key ins;
726 struct extent_map *em; 768 struct extent_map *em;
727 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 769 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
728 int ret = 0; 770 int ret = 0;
729 771
772 BUG_ON(root == root->fs_info->tree_root);
730 trans = btrfs_join_transaction(root, 1); 773 trans = btrfs_join_transaction(root, 1);
731 BUG_ON(!trans); 774 BUG_ON(!trans);
732 btrfs_set_trans_block_group(trans, inode); 775 btrfs_set_trans_block_group(trans, inode);
733 776 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
734 actual_end = min_t(u64, isize, end + 1);
735 777
736 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 778 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
737 num_bytes = max(blocksize, num_bytes); 779 num_bytes = max(blocksize, num_bytes);
@@ -749,7 +791,6 @@ static noinline int cow_file_range(struct inode *inode,
749 EXTENT_CLEAR_UNLOCK_PAGE | 791 EXTENT_CLEAR_UNLOCK_PAGE |
750 EXTENT_CLEAR_UNLOCK | 792 EXTENT_CLEAR_UNLOCK |
751 EXTENT_CLEAR_DELALLOC | 793 EXTENT_CLEAR_DELALLOC |
752 EXTENT_CLEAR_ACCOUNTING |
753 EXTENT_CLEAR_DIRTY | 794 EXTENT_CLEAR_DIRTY |
754 EXTENT_SET_WRITEBACK | 795 EXTENT_SET_WRITEBACK |
755 EXTENT_END_WRITEBACK); 796 EXTENT_END_WRITEBACK);
@@ -765,35 +806,13 @@ static noinline int cow_file_range(struct inode *inode,
765 BUG_ON(disk_num_bytes > 806 BUG_ON(disk_num_bytes >
766 btrfs_super_total_bytes(&root->fs_info->super_copy)); 807 btrfs_super_total_bytes(&root->fs_info->super_copy));
767 808
768 809 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
769 read_lock(&BTRFS_I(inode)->extent_tree.lock);
770 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
771 start, num_bytes);
772 if (em) {
773 /*
774 * if block start isn't an actual block number then find the
775 * first block in this inode and use that as a hint. If that
776 * block is also bogus then just don't worry about it.
777 */
778 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
779 free_extent_map(em);
780 em = search_extent_mapping(em_tree, 0, 0);
781 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
782 alloc_hint = em->block_start;
783 if (em)
784 free_extent_map(em);
785 } else {
786 alloc_hint = em->block_start;
787 free_extent_map(em);
788 }
789 }
790 read_unlock(&BTRFS_I(inode)->extent_tree.lock);
791 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 810 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
792 811
793 while (disk_num_bytes > 0) { 812 while (disk_num_bytes > 0) {
794 unsigned long op; 813 unsigned long op;
795 814
796 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 815 cur_alloc_size = disk_num_bytes;
797 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 816 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
798 root->sectorsize, 0, alloc_hint, 817 root->sectorsize, 0, alloc_hint,
799 (u64)-1, &ins, 1); 818 (u64)-1, &ins, 1);
@@ -1020,10 +1039,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1020 int type; 1039 int type;
1021 int nocow; 1040 int nocow;
1022 int check_prev = 1; 1041 int check_prev = 1;
1042 bool nolock = false;
1023 1043
1024 path = btrfs_alloc_path(); 1044 path = btrfs_alloc_path();
1025 BUG_ON(!path); 1045 BUG_ON(!path);
1026 trans = btrfs_join_transaction(root, 1); 1046 if (root == root->fs_info->tree_root) {
1047 nolock = true;
1048 trans = btrfs_join_transaction_nolock(root, 1);
1049 } else {
1050 trans = btrfs_join_transaction(root, 1);
1051 }
1027 BUG_ON(!trans); 1052 BUG_ON(!trans);
1028 1053
1029 cow_start = (u64)-1; 1054 cow_start = (u64)-1;
@@ -1170,6 +1195,13 @@ out_check:
1170 num_bytes, num_bytes, type); 1195 num_bytes, num_bytes, type);
1171 BUG_ON(ret); 1196 BUG_ON(ret);
1172 1197
1198 if (root->root_key.objectid ==
1199 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1200 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1201 num_bytes);
1202 BUG_ON(ret);
1203 }
1204
1173 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1205 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1174 cur_offset, cur_offset + num_bytes - 1, 1206 cur_offset, cur_offset + num_bytes - 1,
1175 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1207 locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
@@ -1189,8 +1221,13 @@ out_check:
1189 BUG_ON(ret); 1221 BUG_ON(ret);
1190 } 1222 }
1191 1223
1192 ret = btrfs_end_transaction(trans, root); 1224 if (nolock) {
1193 BUG_ON(ret); 1225 ret = btrfs_end_transaction_nolock(trans, root);
1226 BUG_ON(ret);
1227 } else {
1228 ret = btrfs_end_transaction(trans, root);
1229 BUG_ON(ret);
1230 }
1194 btrfs_free_path(path); 1231 btrfs_free_path(path);
1195 return 0; 1232 return 0;
1196} 1233}
@@ -1211,7 +1248,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1211 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1248 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
1212 ret = run_delalloc_nocow(inode, locked_page, start, end, 1249 ret = run_delalloc_nocow(inode, locked_page, start, end,
1213 page_started, 0, nr_written); 1250 page_started, 0, nr_written);
1214 else if (!btrfs_test_opt(root, COMPRESS)) 1251 else if (!btrfs_test_opt(root, COMPRESS) &&
1252 !(BTRFS_I(inode)->force_compress))
1215 ret = cow_file_range(inode, locked_page, start, end, 1253 ret = cow_file_range(inode, locked_page, start, end,
1216 page_started, nr_written, 1); 1254 page_started, nr_written, 1);
1217 else 1255 else
@@ -1221,36 +1259,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1221} 1259}
1222 1260
1223static int btrfs_split_extent_hook(struct inode *inode, 1261static int btrfs_split_extent_hook(struct inode *inode,
1224 struct extent_state *orig, u64 split) 1262 struct extent_state *orig, u64 split)
1225{ 1263{
1226 struct btrfs_root *root = BTRFS_I(inode)->root; 1264 /* not delalloc, ignore it */
1227 u64 size;
1228
1229 if (!(orig->state & EXTENT_DELALLOC)) 1265 if (!(orig->state & EXTENT_DELALLOC))
1230 return 0; 1266 return 0;
1231 1267
1232 size = orig->end - orig->start + 1; 1268 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1233 if (size > root->fs_info->max_extent) {
1234 u64 num_extents;
1235 u64 new_size;
1236
1237 new_size = orig->end - split + 1;
1238 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1239 root->fs_info->max_extent);
1240
1241 /*
1242 * if we break a large extent up then leave oustanding_extents
1243 * be, since we've already accounted for the large extent.
1244 */
1245 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1246 root->fs_info->max_extent) < num_extents)
1247 return 0;
1248 }
1249
1250 spin_lock(&BTRFS_I(inode)->accounting_lock);
1251 BTRFS_I(inode)->outstanding_extents++;
1252 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1253
1254 return 0; 1269 return 0;
1255} 1270}
1256 1271
@@ -1264,42 +1279,11 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1264 struct extent_state *new, 1279 struct extent_state *new,
1265 struct extent_state *other) 1280 struct extent_state *other)
1266{ 1281{
1267 struct btrfs_root *root = BTRFS_I(inode)->root;
1268 u64 new_size, old_size;
1269 u64 num_extents;
1270
1271 /* not delalloc, ignore it */ 1282 /* not delalloc, ignore it */
1272 if (!(other->state & EXTENT_DELALLOC)) 1283 if (!(other->state & EXTENT_DELALLOC))
1273 return 0; 1284 return 0;
1274 1285
1275 old_size = other->end - other->start + 1; 1286 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1276 if (new->start < other->start)
1277 new_size = other->end - new->start + 1;
1278 else
1279 new_size = new->end - other->start + 1;
1280
1281 /* we're not bigger than the max, unreserve the space and go */
1282 if (new_size <= root->fs_info->max_extent) {
1283 spin_lock(&BTRFS_I(inode)->accounting_lock);
1284 BTRFS_I(inode)->outstanding_extents--;
1285 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1286 return 0;
1287 }
1288
1289 /*
1290 * If we grew by another max_extent, just return, we want to keep that
1291 * reserved amount.
1292 */
1293 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1294 root->fs_info->max_extent);
1295 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1296 root->fs_info->max_extent) > num_extents)
1297 return 0;
1298
1299 spin_lock(&BTRFS_I(inode)->accounting_lock);
1300 BTRFS_I(inode)->outstanding_extents--;
1301 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1302
1303 return 0; 1287 return 0;
1304} 1288}
1305 1289
@@ -1308,8 +1292,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1308 * bytes in this file, and to maintain the list of inodes that 1292 * bytes in this file, and to maintain the list of inodes that
1309 * have pending delalloc work to be done. 1293 * have pending delalloc work to be done.
1310 */ 1294 */
1311static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, 1295static int btrfs_set_bit_hook(struct inode *inode,
1312 unsigned long old, unsigned long bits) 1296 struct extent_state *state, int *bits)
1313{ 1297{
1314 1298
1315 /* 1299 /*
@@ -1317,17 +1301,21 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1317 * but in this case, we are only testeing for the DELALLOC 1301 * but in this case, we are only testeing for the DELALLOC
1318 * bit, which is only set or cleared with irqs on 1302 * bit, which is only set or cleared with irqs on
1319 */ 1303 */
1320 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1304 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1321 struct btrfs_root *root = BTRFS_I(inode)->root; 1305 struct btrfs_root *root = BTRFS_I(inode)->root;
1306 u64 len = state->end + 1 - state->start;
1307 int do_list = (root->root_key.objectid !=
1308 BTRFS_ROOT_TREE_OBJECTID);
1309
1310 if (*bits & EXTENT_FIRST_DELALLOC)
1311 *bits &= ~EXTENT_FIRST_DELALLOC;
1312 else
1313 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1322 1314
1323 spin_lock(&BTRFS_I(inode)->accounting_lock);
1324 BTRFS_I(inode)->outstanding_extents++;
1325 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1326 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1327 spin_lock(&root->fs_info->delalloc_lock); 1315 spin_lock(&root->fs_info->delalloc_lock);
1328 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1316 BTRFS_I(inode)->delalloc_bytes += len;
1329 root->fs_info->delalloc_bytes += end - start + 1; 1317 root->fs_info->delalloc_bytes += len;
1330 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1318 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1331 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1319 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1332 &root->fs_info->delalloc_inodes); 1320 &root->fs_info->delalloc_inodes);
1333 } 1321 }
@@ -1340,45 +1328,36 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1340 * extent_io.c clear_bit_hook, see set_bit_hook for why 1328 * extent_io.c clear_bit_hook, see set_bit_hook for why
1341 */ 1329 */
1342static int btrfs_clear_bit_hook(struct inode *inode, 1330static int btrfs_clear_bit_hook(struct inode *inode,
1343 struct extent_state *state, unsigned long bits) 1331 struct extent_state *state, int *bits)
1344{ 1332{
1345 /* 1333 /*
1346 * set_bit and clear bit hooks normally require _irqsave/restore 1334 * set_bit and clear bit hooks normally require _irqsave/restore
1347 * but in this case, we are only testeing for the DELALLOC 1335 * but in this case, we are only testeing for the DELALLOC
1348 * bit, which is only set or cleared with irqs on 1336 * bit, which is only set or cleared with irqs on
1349 */ 1337 */
1350 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1338 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1351 struct btrfs_root *root = BTRFS_I(inode)->root; 1339 struct btrfs_root *root = BTRFS_I(inode)->root;
1340 u64 len = state->end + 1 - state->start;
1341 int do_list = (root->root_key.objectid !=
1342 BTRFS_ROOT_TREE_OBJECTID);
1352 1343
1353 if (bits & EXTENT_DO_ACCOUNTING) { 1344 if (*bits & EXTENT_FIRST_DELALLOC)
1354 spin_lock(&BTRFS_I(inode)->accounting_lock); 1345 *bits &= ~EXTENT_FIRST_DELALLOC;
1355 BTRFS_I(inode)->outstanding_extents--; 1346 else if (!(*bits & EXTENT_DO_ACCOUNTING))
1356 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1347 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1357 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1348
1358 } 1349 if (*bits & EXTENT_DO_ACCOUNTING)
1350 btrfs_delalloc_release_metadata(inode, len);
1351
1352 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1353 && do_list)
1354 btrfs_free_reserved_data_space(inode, len);
1359 1355
1360 spin_lock(&root->fs_info->delalloc_lock); 1356 spin_lock(&root->fs_info->delalloc_lock);
1361 if (state->end - state->start + 1 > 1357 root->fs_info->delalloc_bytes -= len;
1362 root->fs_info->delalloc_bytes) { 1358 BTRFS_I(inode)->delalloc_bytes -= len;
1363 printk(KERN_INFO "btrfs warning: delalloc account " 1359
1364 "%llu %llu\n", 1360 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1365 (unsigned long long)
1366 state->end - state->start + 1,
1367 (unsigned long long)
1368 root->fs_info->delalloc_bytes);
1369 btrfs_delalloc_free_space(root, inode, (u64)-1);
1370 root->fs_info->delalloc_bytes = 0;
1371 BTRFS_I(inode)->delalloc_bytes = 0;
1372 } else {
1373 btrfs_delalloc_free_space(root, inode,
1374 state->end -
1375 state->start + 1);
1376 root->fs_info->delalloc_bytes -= state->end -
1377 state->start + 1;
1378 BTRFS_I(inode)->delalloc_bytes -= state->end -
1379 state->start + 1;
1380 }
1381 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
1382 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1361 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1383 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1362 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1384 } 1363 }
@@ -1413,7 +1392,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1413 1392
1414 if (map_length < length + size) 1393 if (map_length < length + size)
1415 return 1; 1394 return 1;
1416 return 0; 1395 return ret;
1417} 1396}
1418 1397
1419/* 1398/*
@@ -1426,7 +1405,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1426 */ 1405 */
1427static int __btrfs_submit_bio_start(struct inode *inode, int rw, 1406static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1428 struct bio *bio, int mirror_num, 1407 struct bio *bio, int mirror_num,
1429 unsigned long bio_flags) 1408 unsigned long bio_flags,
1409 u64 bio_offset)
1430{ 1410{
1431 struct btrfs_root *root = BTRFS_I(inode)->root; 1411 struct btrfs_root *root = BTRFS_I(inode)->root;
1432 int ret = 0; 1412 int ret = 0;
@@ -1445,7 +1425,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1445 * are inserted into the btree 1425 * are inserted into the btree
1446 */ 1426 */
1447static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 1427static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1448 int mirror_num, unsigned long bio_flags) 1428 int mirror_num, unsigned long bio_flags,
1429 u64 bio_offset)
1449{ 1430{
1450 struct btrfs_root *root = BTRFS_I(inode)->root; 1431 struct btrfs_root *root = BTRFS_I(inode)->root;
1451 return btrfs_map_bio(root, rw, bio, mirror_num, 1); 1432 return btrfs_map_bio(root, rw, bio, mirror_num, 1);
@@ -1456,7 +1437,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1456 * on write, or reading the csums from the tree before a read 1437 * on write, or reading the csums from the tree before a read
1457 */ 1438 */
1458static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 1439static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1459 int mirror_num, unsigned long bio_flags) 1440 int mirror_num, unsigned long bio_flags,
1441 u64 bio_offset)
1460{ 1442{
1461 struct btrfs_root *root = BTRFS_I(inode)->root; 1443 struct btrfs_root *root = BTRFS_I(inode)->root;
1462 int ret = 0; 1444 int ret = 0;
@@ -1464,10 +1446,13 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1464 1446
1465 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1447 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1466 1448
1467 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1449 if (root == root->fs_info->tree_root)
1450 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1451 else
1452 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1468 BUG_ON(ret); 1453 BUG_ON(ret);
1469 1454
1470 if (!(rw & (1 << BIO_RW))) { 1455 if (!(rw & REQ_WRITE)) {
1471 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1456 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1472 return btrfs_submit_compressed_read(inode, bio, 1457 return btrfs_submit_compressed_read(inode, bio,
1473 mirror_num, bio_flags); 1458 mirror_num, bio_flags);
@@ -1481,7 +1466,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1481 /* we're doing a write, do the async checksumming */ 1466 /* we're doing a write, do the async checksumming */
1482 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 1467 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1483 inode, rw, bio, mirror_num, 1468 inode, rw, bio, mirror_num,
1484 bio_flags, __btrfs_submit_bio_start, 1469 bio_flags, bio_offset,
1470 __btrfs_submit_bio_start,
1485 __btrfs_submit_bio_done); 1471 __btrfs_submit_bio_done);
1486 } 1472 }
1487 1473
@@ -1508,12 +1494,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1508 return 0; 1494 return 0;
1509} 1495}
1510 1496
1511int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) 1497int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1498 struct extent_state **cached_state)
1512{ 1499{
1513 if ((end & (PAGE_CACHE_SIZE - 1)) == 0) 1500 if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
1514 WARN_ON(1); 1501 WARN_ON(1);
1515 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, 1502 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1516 GFP_NOFS); 1503 cached_state, GFP_NOFS);
1517} 1504}
1518 1505
1519/* see btrfs_writepage_start_hook for details on why this is required */ 1506/* see btrfs_writepage_start_hook for details on why this is required */
@@ -1526,6 +1513,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1526{ 1513{
1527 struct btrfs_writepage_fixup *fixup; 1514 struct btrfs_writepage_fixup *fixup;
1528 struct btrfs_ordered_extent *ordered; 1515 struct btrfs_ordered_extent *ordered;
1516 struct extent_state *cached_state = NULL;
1529 struct page *page; 1517 struct page *page;
1530 struct inode *inode; 1518 struct inode *inode;
1531 u64 page_start; 1519 u64 page_start;
@@ -1544,7 +1532,8 @@ again:
1544 page_start = page_offset(page); 1532 page_start = page_offset(page);
1545 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1533 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1546 1534
1547 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1535 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1536 &cached_state, GFP_NOFS);
1548 1537
1549 /* already ordered? We're done */ 1538 /* already ordered? We're done */
1550 if (PagePrivate2(page)) 1539 if (PagePrivate2(page))
@@ -1552,17 +1541,19 @@ again:
1552 1541
1553 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1542 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1554 if (ordered) { 1543 if (ordered) {
1555 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, 1544 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
1556 page_end, GFP_NOFS); 1545 page_end, &cached_state, GFP_NOFS);
1557 unlock_page(page); 1546 unlock_page(page);
1558 btrfs_start_ordered_extent(inode, ordered, 1); 1547 btrfs_start_ordered_extent(inode, ordered, 1);
1559 goto again; 1548 goto again;
1560 } 1549 }
1561 1550
1562 btrfs_set_extent_delalloc(inode, page_start, page_end); 1551 BUG();
1552 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1563 ClearPageChecked(page); 1553 ClearPageChecked(page);
1564out: 1554out:
1565 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1555 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1556 &cached_state, GFP_NOFS);
1566out_page: 1557out_page:
1567 unlock_page(page); 1558 unlock_page(page);
1568 page_cache_release(page); 1559 page_cache_release(page);
@@ -1681,24 +1672,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1681 * before we start the transaction. It limits the amount of btree 1672 * before we start the transaction. It limits the amount of btree
1682 * reads required while inside the transaction. 1673 * reads required while inside the transaction.
1683 */ 1674 */
1684static noinline void reada_csum(struct btrfs_root *root,
1685 struct btrfs_path *path,
1686 struct btrfs_ordered_extent *ordered_extent)
1687{
1688 struct btrfs_ordered_sum *sum;
1689 u64 bytenr;
1690
1691 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1692 list);
1693 bytenr = sum->sums[0].bytenr;
1694
1695 /*
1696 * we don't care about the results, the point of this search is
1697 * just to get the btree leaves into ram
1698 */
1699 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1700}
1701
1702/* as ordered data IO finishes, this gets called so we can finish 1675/* as ordered data IO finishes, this gets called so we can finish
1703 * an ordered extent if the range of bytes in the file it covers are 1676 * an ordered extent if the range of bytes in the file it covers are
1704 * fully written. 1677 * fully written.
@@ -1706,96 +1679,94 @@ static noinline void reada_csum(struct btrfs_root *root,
1706static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1679static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1707{ 1680{
1708 struct btrfs_root *root = BTRFS_I(inode)->root; 1681 struct btrfs_root *root = BTRFS_I(inode)->root;
1709 struct btrfs_trans_handle *trans; 1682 struct btrfs_trans_handle *trans = NULL;
1710 struct btrfs_ordered_extent *ordered_extent = NULL; 1683 struct btrfs_ordered_extent *ordered_extent = NULL;
1711 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1684 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1712 struct btrfs_path *path; 1685 struct extent_state *cached_state = NULL;
1713 int compressed = 0; 1686 int compress_type = 0;
1714 int ret; 1687 int ret;
1688 bool nolock = false;
1715 1689
1716 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); 1690 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1691 end - start + 1);
1717 if (!ret) 1692 if (!ret)
1718 return 0; 1693 return 0;
1694 BUG_ON(!ordered_extent);
1719 1695
1720 /* 1696 nolock = (root == root->fs_info->tree_root);
1721 * before we join the transaction, try to do some of our IO.
1722 * This will limit the amount of IO that we have to do with
1723 * the transaction running. We're unlikely to need to do any
1724 * IO if the file extents are new, the disk_i_size checks
1725 * covers the most common case.
1726 */
1727 if (start < BTRFS_I(inode)->disk_i_size) {
1728 path = btrfs_alloc_path();
1729 if (path) {
1730 ret = btrfs_lookup_file_extent(NULL, root, path,
1731 inode->i_ino,
1732 start, 0);
1733 ordered_extent = btrfs_lookup_ordered_extent(inode,
1734 start);
1735 if (!list_empty(&ordered_extent->list)) {
1736 btrfs_release_path(root, path);
1737 reada_csum(root, path, ordered_extent);
1738 }
1739 btrfs_free_path(path);
1740 }
1741 }
1742 1697
1743 if (!ordered_extent)
1744 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1745 BUG_ON(!ordered_extent);
1746 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1698 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1747 BUG_ON(!list_empty(&ordered_extent->list)); 1699 BUG_ON(!list_empty(&ordered_extent->list));
1748 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1700 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1749 if (!ret) { 1701 if (!ret) {
1750 trans = btrfs_join_transaction(root, 1); 1702 if (nolock)
1703 trans = btrfs_join_transaction_nolock(root, 1);
1704 else
1705 trans = btrfs_join_transaction(root, 1);
1706 BUG_ON(!trans);
1707 btrfs_set_trans_block_group(trans, inode);
1708 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1751 ret = btrfs_update_inode(trans, root, inode); 1709 ret = btrfs_update_inode(trans, root, inode);
1752 BUG_ON(ret); 1710 BUG_ON(ret);
1753 btrfs_end_transaction(trans, root);
1754 } 1711 }
1755 goto out; 1712 goto out;
1756 } 1713 }
1757 1714
1758 lock_extent(io_tree, ordered_extent->file_offset, 1715 lock_extent_bits(io_tree, ordered_extent->file_offset,
1759 ordered_extent->file_offset + ordered_extent->len - 1, 1716 ordered_extent->file_offset + ordered_extent->len - 1,
1760 GFP_NOFS); 1717 0, &cached_state, GFP_NOFS);
1761 1718
1762 trans = btrfs_join_transaction(root, 1); 1719 if (nolock)
1720 trans = btrfs_join_transaction_nolock(root, 1);
1721 else
1722 trans = btrfs_join_transaction(root, 1);
1723 btrfs_set_trans_block_group(trans, inode);
1724 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1763 1725
1764 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1726 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1765 compressed = 1; 1727 compress_type = ordered_extent->compress_type;
1766 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 1728 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
1767 BUG_ON(compressed); 1729 BUG_ON(compress_type);
1768 ret = btrfs_mark_extent_written(trans, inode, 1730 ret = btrfs_mark_extent_written(trans, inode,
1769 ordered_extent->file_offset, 1731 ordered_extent->file_offset,
1770 ordered_extent->file_offset + 1732 ordered_extent->file_offset +
1771 ordered_extent->len); 1733 ordered_extent->len);
1772 BUG_ON(ret); 1734 BUG_ON(ret);
1773 } else { 1735 } else {
1736 BUG_ON(root == root->fs_info->tree_root);
1774 ret = insert_reserved_file_extent(trans, inode, 1737 ret = insert_reserved_file_extent(trans, inode,
1775 ordered_extent->file_offset, 1738 ordered_extent->file_offset,
1776 ordered_extent->start, 1739 ordered_extent->start,
1777 ordered_extent->disk_len, 1740 ordered_extent->disk_len,
1778 ordered_extent->len, 1741 ordered_extent->len,
1779 ordered_extent->len, 1742 ordered_extent->len,
1780 compressed, 0, 0, 1743 compress_type, 0, 0,
1781 BTRFS_FILE_EXTENT_REG); 1744 BTRFS_FILE_EXTENT_REG);
1782 unpin_extent_cache(&BTRFS_I(inode)->extent_tree, 1745 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1783 ordered_extent->file_offset, 1746 ordered_extent->file_offset,
1784 ordered_extent->len); 1747 ordered_extent->len);
1785 BUG_ON(ret); 1748 BUG_ON(ret);
1786 } 1749 }
1787 unlock_extent(io_tree, ordered_extent->file_offset, 1750 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1788 ordered_extent->file_offset + ordered_extent->len - 1, 1751 ordered_extent->file_offset +
1789 GFP_NOFS); 1752 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1753
1790 add_pending_csums(trans, inode, ordered_extent->file_offset, 1754 add_pending_csums(trans, inode, ordered_extent->file_offset,
1791 &ordered_extent->list); 1755 &ordered_extent->list);
1792 1756
1793 /* this also removes the ordered extent from the tree */
1794 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1757 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1795 ret = btrfs_update_inode(trans, root, inode); 1758 ret = btrfs_update_inode(trans, root, inode);
1796 BUG_ON(ret); 1759 BUG_ON(ret);
1797 btrfs_end_transaction(trans, root);
1798out: 1760out:
1761 if (nolock) {
1762 if (trans)
1763 btrfs_end_transaction_nolock(trans, root);
1764 } else {
1765 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1766 if (trans)
1767 btrfs_end_transaction(trans, root);
1768 }
1769
1799 /* once for us */ 1770 /* once for us */
1800 btrfs_put_ordered_extent(ordered_extent); 1771 btrfs_put_ordered_extent(ordered_extent);
1801 /* once for the tree */ 1772 /* once for the tree */
@@ -1871,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1871 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 1842 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
1872 logical = em->block_start; 1843 logical = em->block_start;
1873 failrec->bio_flags = EXTENT_BIO_COMPRESSED; 1844 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
1845 extent_set_compress_type(&failrec->bio_flags,
1846 em->compress_type);
1874 } 1847 }
1875 failrec->logical = logical; 1848 failrec->logical = logical;
1876 free_extent_map(em); 1849 free_extent_map(em);
@@ -1910,14 +1883,14 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1910 bio->bi_size = 0; 1883 bio->bi_size = 0;
1911 1884
1912 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 1885 bio_add_page(bio, page, failrec->len, start - page_offset(page));
1913 if (failed_bio->bi_rw & (1 << BIO_RW)) 1886 if (failed_bio->bi_rw & REQ_WRITE)
1914 rw = WRITE; 1887 rw = WRITE;
1915 else 1888 else
1916 rw = READ; 1889 rw = READ;
1917 1890
1918 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1891 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1919 failrec->last_mirror, 1892 failrec->last_mirror,
1920 failrec->bio_flags); 1893 failrec->bio_flags, 0);
1921 return 0; 1894 return 0;
1922} 1895}
1923 1896
@@ -2072,32 +2045,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2072} 2045}
2073 2046
2074/* 2047/*
2048 * calculate extra metadata reservation when snapshotting a subvolume
2049 * contains orphan files.
2050 */
2051void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
2052 struct btrfs_pending_snapshot *pending,
2053 u64 *bytes_to_reserve)
2054{
2055 struct btrfs_root *root;
2056 struct btrfs_block_rsv *block_rsv;
2057 u64 num_bytes;
2058 int index;
2059
2060 root = pending->root;
2061 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2062 return;
2063
2064 block_rsv = root->orphan_block_rsv;
2065
2066 /* orphan block reservation for the snapshot */
2067 num_bytes = block_rsv->size;
2068
2069 /*
2070 * after the snapshot is created, COWing tree blocks may use more
2071 * space than it frees. So we should make sure there is enough
2072 * reserved space.
2073 */
2074 index = trans->transid & 0x1;
2075 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2076 num_bytes += block_rsv->size -
2077 (block_rsv->reserved + block_rsv->freed[index]);
2078 }
2079
2080 *bytes_to_reserve += num_bytes;
2081}
2082
2083void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
2084 struct btrfs_pending_snapshot *pending)
2085{
2086 struct btrfs_root *root = pending->root;
2087 struct btrfs_root *snap = pending->snap;
2088 struct btrfs_block_rsv *block_rsv;
2089 u64 num_bytes;
2090 int index;
2091 int ret;
2092
2093 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2094 return;
2095
2096 /* refill source subvolume's orphan block reservation */
2097 block_rsv = root->orphan_block_rsv;
2098 index = trans->transid & 0x1;
2099 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2100 num_bytes = block_rsv->size -
2101 (block_rsv->reserved + block_rsv->freed[index]);
2102 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2103 root->orphan_block_rsv,
2104 num_bytes);
2105 BUG_ON(ret);
2106 }
2107
2108 /* setup orphan block reservation for the snapshot */
2109 block_rsv = btrfs_alloc_block_rsv(snap);
2110 BUG_ON(!block_rsv);
2111
2112 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2113 snap->orphan_block_rsv = block_rsv;
2114
2115 num_bytes = root->orphan_block_rsv->size;
2116 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2117 block_rsv, num_bytes);
2118 BUG_ON(ret);
2119
2120#if 0
2121 /* insert orphan item for the snapshot */
2122 WARN_ON(!root->orphan_item_inserted);
2123 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2124 snap->root_key.objectid);
2125 BUG_ON(ret);
2126 snap->orphan_item_inserted = 1;
2127#endif
2128}
2129
2130enum btrfs_orphan_cleanup_state {
2131 ORPHAN_CLEANUP_STARTED = 1,
2132 ORPHAN_CLEANUP_DONE = 2,
2133};
2134
2135/*
2136 * This is called in transaction commmit time. If there are no orphan
2137 * files in the subvolume, it removes orphan item and frees block_rsv
2138 * structure.
2139 */
2140void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2141 struct btrfs_root *root)
2142{
2143 int ret;
2144
2145 if (!list_empty(&root->orphan_list) ||
2146 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
2147 return;
2148
2149 if (root->orphan_item_inserted &&
2150 btrfs_root_refs(&root->root_item) > 0) {
2151 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
2152 root->root_key.objectid);
2153 BUG_ON(ret);
2154 root->orphan_item_inserted = 0;
2155 }
2156
2157 if (root->orphan_block_rsv) {
2158 WARN_ON(root->orphan_block_rsv->size > 0);
2159 btrfs_free_block_rsv(root, root->orphan_block_rsv);
2160 root->orphan_block_rsv = NULL;
2161 }
2162}
2163
2164/*
2075 * This creates an orphan entry for the given inode in case something goes 2165 * This creates an orphan entry for the given inode in case something goes
2076 * wrong in the middle of an unlink/truncate. 2166 * wrong in the middle of an unlink/truncate.
2167 *
2168 * NOTE: caller of this function should reserve 5 units of metadata for
2169 * this function.
2077 */ 2170 */
2078int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) 2171int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2079{ 2172{
2080 struct btrfs_root *root = BTRFS_I(inode)->root; 2173 struct btrfs_root *root = BTRFS_I(inode)->root;
2081 int ret = 0; 2174 struct btrfs_block_rsv *block_rsv = NULL;
2175 int reserve = 0;
2176 int insert = 0;
2177 int ret;
2082 2178
2083 spin_lock(&root->list_lock); 2179 if (!root->orphan_block_rsv) {
2180 block_rsv = btrfs_alloc_block_rsv(root);
2181 BUG_ON(!block_rsv);
2182 }
2084 2183
2085 /* already on the orphan list, we're good */ 2184 spin_lock(&root->orphan_lock);
2086 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 2185 if (!root->orphan_block_rsv) {
2087 spin_unlock(&root->list_lock); 2186 root->orphan_block_rsv = block_rsv;
2088 return 0; 2187 } else if (block_rsv) {
2188 btrfs_free_block_rsv(root, block_rsv);
2189 block_rsv = NULL;
2190 }
2191
2192 if (list_empty(&BTRFS_I(inode)->i_orphan)) {
2193 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2194#if 0
2195 /*
2196 * For proper ENOSPC handling, we should do orphan
2197 * cleanup when mounting. But this introduces backward
2198 * compatibility issue.
2199 */
2200 if (!xchg(&root->orphan_item_inserted, 1))
2201 insert = 2;
2202 else
2203 insert = 1;
2204#endif
2205 insert = 1;
2206 } else {
2207 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2089 } 2208 }
2090 2209
2091 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2210 if (!BTRFS_I(inode)->orphan_meta_reserved) {
2211 BTRFS_I(inode)->orphan_meta_reserved = 1;
2212 reserve = 1;
2213 }
2214 spin_unlock(&root->orphan_lock);
2092 2215
2093 spin_unlock(&root->list_lock); 2216 if (block_rsv)
2217 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2094 2218
2095 /* 2219 /* grab metadata reservation from transaction handle */
2096 * insert an orphan item to track this unlinked/truncated file 2220 if (reserve) {
2097 */ 2221 ret = btrfs_orphan_reserve_metadata(trans, inode);
2098 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); 2222 BUG_ON(ret);
2223 }
2099 2224
2100 return ret; 2225 /* insert an orphan item to track this unlinked/truncated file */
2226 if (insert >= 1) {
2227 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
2228 BUG_ON(ret);
2229 }
2230
2231 /* insert an orphan item to track subvolume contains orphan files */
2232 if (insert >= 2) {
2233 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2234 root->root_key.objectid);
2235 BUG_ON(ret);
2236 }
2237 return 0;
2101} 2238}
2102 2239
2103/* 2240/*
@@ -2107,26 +2244,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2107int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) 2244int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2108{ 2245{
2109 struct btrfs_root *root = BTRFS_I(inode)->root; 2246 struct btrfs_root *root = BTRFS_I(inode)->root;
2247 int delete_item = 0;
2248 int release_rsv = 0;
2110 int ret = 0; 2249 int ret = 0;
2111 2250
2112 spin_lock(&root->list_lock); 2251 spin_lock(&root->orphan_lock);
2113 2252 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
2114 if (list_empty(&BTRFS_I(inode)->i_orphan)) { 2253 list_del_init(&BTRFS_I(inode)->i_orphan);
2115 spin_unlock(&root->list_lock); 2254 delete_item = 1;
2116 return 0;
2117 } 2255 }
2118 2256
2119 list_del_init(&BTRFS_I(inode)->i_orphan); 2257 if (BTRFS_I(inode)->orphan_meta_reserved) {
2120 if (!trans) { 2258 BTRFS_I(inode)->orphan_meta_reserved = 0;
2121 spin_unlock(&root->list_lock); 2259 release_rsv = 1;
2122 return 0;
2123 } 2260 }
2261 spin_unlock(&root->orphan_lock);
2124 2262
2125 spin_unlock(&root->list_lock); 2263 if (trans && delete_item) {
2264 ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
2265 BUG_ON(ret);
2266 }
2126 2267
2127 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2268 if (release_rsv)
2269 btrfs_orphan_release_metadata(inode);
2128 2270
2129 return ret; 2271 return 0;
2130} 2272}
2131 2273
2132/* 2274/*
@@ -2137,13 +2279,12 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2137{ 2279{
2138 struct btrfs_path *path; 2280 struct btrfs_path *path;
2139 struct extent_buffer *leaf; 2281 struct extent_buffer *leaf;
2140 struct btrfs_item *item;
2141 struct btrfs_key key, found_key; 2282 struct btrfs_key key, found_key;
2142 struct btrfs_trans_handle *trans; 2283 struct btrfs_trans_handle *trans;
2143 struct inode *inode; 2284 struct inode *inode;
2144 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2285 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2145 2286
2146 if (!xchg(&root->clean_orphans, 0)) 2287 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
2147 return; 2288 return;
2148 2289
2149 path = btrfs_alloc_path(); 2290 path = btrfs_alloc_path();
@@ -2175,7 +2316,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2175 2316
2176 /* pull out the item */ 2317 /* pull out the item */
2177 leaf = path->nodes[0]; 2318 leaf = path->nodes[0];
2178 item = btrfs_item_nr(leaf, path->slots[0]);
2179 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2319 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2180 2320
2181 /* make sure the item matches what we want */ 2321 /* make sure the item matches what we want */
@@ -2195,17 +2335,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2195 found_key.objectid = found_key.offset; 2335 found_key.objectid = found_key.offset;
2196 found_key.type = BTRFS_INODE_ITEM_KEY; 2336 found_key.type = BTRFS_INODE_ITEM_KEY;
2197 found_key.offset = 0; 2337 found_key.offset = 0;
2198 inode = btrfs_iget(root->fs_info->sb, &found_key, root); 2338 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2199 if (IS_ERR(inode)) 2339 BUG_ON(IS_ERR(inode));
2200 break;
2201 2340
2202 /* 2341 /*
2203 * add this inode to the orphan list so btrfs_orphan_del does 2342 * add this inode to the orphan list so btrfs_orphan_del does
2204 * the proper thing when we hit it 2343 * the proper thing when we hit it
2205 */ 2344 */
2206 spin_lock(&root->list_lock); 2345 spin_lock(&root->orphan_lock);
2207 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2346 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2208 spin_unlock(&root->list_lock); 2347 spin_unlock(&root->orphan_lock);
2209 2348
2210 /* 2349 /*
2211 * if this is a bad inode, means we actually succeeded in 2350 * if this is a bad inode, means we actually succeeded in
@@ -2214,7 +2353,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2214 * do a destroy_inode 2353 * do a destroy_inode
2215 */ 2354 */
2216 if (is_bad_inode(inode)) { 2355 if (is_bad_inode(inode)) {
2217 trans = btrfs_start_transaction(root, 1); 2356 trans = btrfs_start_transaction(root, 0);
2218 btrfs_orphan_del(trans, inode); 2357 btrfs_orphan_del(trans, inode);
2219 btrfs_end_transaction(trans, root); 2358 btrfs_end_transaction(trans, root);
2220 iput(inode); 2359 iput(inode);
@@ -2232,13 +2371,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2232 /* this will do delete_inode and everything for us */ 2371 /* this will do delete_inode and everything for us */
2233 iput(inode); 2372 iput(inode);
2234 } 2373 }
2374 btrfs_free_path(path);
2375
2376 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
2377
2378 if (root->orphan_block_rsv)
2379 btrfs_block_rsv_release(root, root->orphan_block_rsv,
2380 (u64)-1);
2381
2382 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2383 trans = btrfs_join_transaction(root, 1);
2384 btrfs_end_transaction(trans, root);
2385 }
2235 2386
2236 if (nr_unlink) 2387 if (nr_unlink)
2237 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 2388 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
2238 if (nr_truncate) 2389 if (nr_truncate)
2239 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 2390 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
2240
2241 btrfs_free_path(path);
2242} 2391}
2243 2392
2244/* 2393/*
@@ -2542,7 +2691,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2542 2691
2543 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2692 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2544 dir, index); 2693 dir, index);
2545 BUG_ON(ret); 2694 if (ret == -ENOENT)
2695 ret = 0;
2546err: 2696err:
2547 btrfs_free_path(path); 2697 btrfs_free_path(path);
2548 if (ret) 2698 if (ret)
@@ -2557,29 +2707,201 @@ out:
2557 return ret; 2707 return ret;
2558} 2708}
2559 2709
2560static int btrfs_unlink(struct inode *dir, struct dentry *dentry) 2710/* helper to check if there is any shared block in the path */
2711static int check_path_shared(struct btrfs_root *root,
2712 struct btrfs_path *path)
2713{
2714 struct extent_buffer *eb;
2715 int level;
2716 u64 refs = 1;
2717 int uninitialized_var(ret);
2718
2719 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2720 if (!path->nodes[level])
2721 break;
2722 eb = path->nodes[level];
2723 if (!btrfs_block_can_be_shared(root, eb))
2724 continue;
2725 ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
2726 &refs, NULL);
2727 if (refs > 1)
2728 return 1;
2729 }
2730 return ret; /* XXX callers? */
2731}
2732
2733/*
2734 * helper to start transaction for unlink and rmdir.
2735 *
2736 * unlink and rmdir are special in btrfs, they do not always free space.
2737 * so in enospc case, we should make sure they will free space before
2738 * allowing them to use the global metadata reservation.
2739 */
2740static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2741 struct dentry *dentry)
2561{ 2742{
2562 struct btrfs_root *root;
2563 struct btrfs_trans_handle *trans; 2743 struct btrfs_trans_handle *trans;
2744 struct btrfs_root *root = BTRFS_I(dir)->root;
2745 struct btrfs_path *path;
2746 struct btrfs_inode_ref *ref;
2747 struct btrfs_dir_item *di;
2564 struct inode *inode = dentry->d_inode; 2748 struct inode *inode = dentry->d_inode;
2749 u64 index;
2750 int check_link = 1;
2751 int err = -ENOSPC;
2565 int ret; 2752 int ret;
2566 unsigned long nr = 0;
2567 2753
2568 root = BTRFS_I(dir)->root; 2754 trans = btrfs_start_transaction(root, 10);
2755 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2756 return trans;
2569 2757
2570 /* 2758 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2571 * 5 items for unlink inode 2759 return ERR_PTR(-ENOSPC);
2572 * 1 for orphan 2760
2573 */ 2761 /* check if there is someone else holds reference */
2574 ret = btrfs_reserve_metadata_space(root, 6); 2762 if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
2575 if (ret) 2763 return ERR_PTR(-ENOSPC);
2576 return ret; 2764
2765 if (atomic_read(&inode->i_count) > 2)
2766 return ERR_PTR(-ENOSPC);
2577 2767
2578 trans = btrfs_start_transaction(root, 1); 2768 if (xchg(&root->fs_info->enospc_unlink, 1))
2769 return ERR_PTR(-ENOSPC);
2770
2771 path = btrfs_alloc_path();
2772 if (!path) {
2773 root->fs_info->enospc_unlink = 0;
2774 return ERR_PTR(-ENOMEM);
2775 }
2776
2777 trans = btrfs_start_transaction(root, 0);
2579 if (IS_ERR(trans)) { 2778 if (IS_ERR(trans)) {
2580 btrfs_unreserve_metadata_space(root, 6); 2779 btrfs_free_path(path);
2581 return PTR_ERR(trans); 2780 root->fs_info->enospc_unlink = 0;
2781 return trans;
2782 }
2783
2784 path->skip_locking = 1;
2785 path->search_commit_root = 1;
2786
2787 ret = btrfs_lookup_inode(trans, root, path,
2788 &BTRFS_I(dir)->location, 0);
2789 if (ret < 0) {
2790 err = ret;
2791 goto out;
2792 }
2793 if (ret == 0) {
2794 if (check_path_shared(root, path))
2795 goto out;
2796 } else {
2797 check_link = 0;
2798 }
2799 btrfs_release_path(root, path);
2800
2801 ret = btrfs_lookup_inode(trans, root, path,
2802 &BTRFS_I(inode)->location, 0);
2803 if (ret < 0) {
2804 err = ret;
2805 goto out;
2806 }
2807 if (ret == 0) {
2808 if (check_path_shared(root, path))
2809 goto out;
2810 } else {
2811 check_link = 0;
2812 }
2813 btrfs_release_path(root, path);
2814
2815 if (ret == 0 && S_ISREG(inode->i_mode)) {
2816 ret = btrfs_lookup_file_extent(trans, root, path,
2817 inode->i_ino, (u64)-1, 0);
2818 if (ret < 0) {
2819 err = ret;
2820 goto out;
2821 }
2822 BUG_ON(ret == 0);
2823 if (check_path_shared(root, path))
2824 goto out;
2825 btrfs_release_path(root, path);
2826 }
2827
2828 if (!check_link) {
2829 err = 0;
2830 goto out;
2831 }
2832
2833 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2834 dentry->d_name.name, dentry->d_name.len, 0);
2835 if (IS_ERR(di)) {
2836 err = PTR_ERR(di);
2837 goto out;
2582 } 2838 }
2839 if (di) {
2840 if (check_path_shared(root, path))
2841 goto out;
2842 } else {
2843 err = 0;
2844 goto out;
2845 }
2846 btrfs_release_path(root, path);
2847
2848 ref = btrfs_lookup_inode_ref(trans, root, path,
2849 dentry->d_name.name, dentry->d_name.len,
2850 inode->i_ino, dir->i_ino, 0);
2851 if (IS_ERR(ref)) {
2852 err = PTR_ERR(ref);
2853 goto out;
2854 }
2855 BUG_ON(!ref);
2856 if (check_path_shared(root, path))
2857 goto out;
2858 index = btrfs_inode_ref_index(path->nodes[0], ref);
2859 btrfs_release_path(root, path);
2860
2861 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index,
2862 dentry->d_name.name, dentry->d_name.len, 0);
2863 if (IS_ERR(di)) {
2864 err = PTR_ERR(di);
2865 goto out;
2866 }
2867 BUG_ON(ret == -ENOENT);
2868 if (check_path_shared(root, path))
2869 goto out;
2870
2871 err = 0;
2872out:
2873 btrfs_free_path(path);
2874 if (err) {
2875 btrfs_end_transaction(trans, root);
2876 root->fs_info->enospc_unlink = 0;
2877 return ERR_PTR(err);
2878 }
2879
2880 trans->block_rsv = &root->fs_info->global_block_rsv;
2881 return trans;
2882}
2883
2884static void __unlink_end_trans(struct btrfs_trans_handle *trans,
2885 struct btrfs_root *root)
2886{
2887 if (trans->block_rsv == &root->fs_info->global_block_rsv) {
2888 BUG_ON(!root->fs_info->enospc_unlink);
2889 root->fs_info->enospc_unlink = 0;
2890 }
2891 btrfs_end_transaction_throttle(trans, root);
2892}
2893
2894static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2895{
2896 struct btrfs_root *root = BTRFS_I(dir)->root;
2897 struct btrfs_trans_handle *trans;
2898 struct inode *inode = dentry->d_inode;
2899 int ret;
2900 unsigned long nr = 0;
2901
2902 trans = __unlink_start_trans(dir, dentry);
2903 if (IS_ERR(trans))
2904 return PTR_ERR(trans);
2583 2905
2584 btrfs_set_trans_block_group(trans, dir); 2906 btrfs_set_trans_block_group(trans, dir);
2585 2907
@@ -2587,14 +2909,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2587 2909
2588 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2910 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2589 dentry->d_name.name, dentry->d_name.len); 2911 dentry->d_name.name, dentry->d_name.len);
2912 BUG_ON(ret);
2590 2913
2591 if (inode->i_nlink == 0) 2914 if (inode->i_nlink == 0) {
2592 ret = btrfs_orphan_add(trans, inode); 2915 ret = btrfs_orphan_add(trans, inode);
2916 BUG_ON(ret);
2917 }
2593 2918
2594 nr = trans->blocks_used; 2919 nr = trans->blocks_used;
2595 2920 __unlink_end_trans(trans, root);
2596 btrfs_end_transaction_throttle(trans, root);
2597 btrfs_unreserve_metadata_space(root, 6);
2598 btrfs_btree_balance_dirty(root, nr); 2921 btrfs_btree_balance_dirty(root, nr);
2599 return ret; 2922 return ret;
2600} 2923}
@@ -2656,7 +2979,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2656 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2979 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2657 ret = btrfs_update_inode(trans, root, dir); 2980 ret = btrfs_update_inode(trans, root, dir);
2658 BUG_ON(ret); 2981 BUG_ON(ret);
2659 dir->i_sb->s_dirt = 1;
2660 2982
2661 btrfs_free_path(path); 2983 btrfs_free_path(path);
2662 return 0; 2984 return 0;
@@ -2666,7 +2988,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2666{ 2988{
2667 struct inode *inode = dentry->d_inode; 2989 struct inode *inode = dentry->d_inode;
2668 int err = 0; 2990 int err = 0;
2669 int ret;
2670 struct btrfs_root *root = BTRFS_I(dir)->root; 2991 struct btrfs_root *root = BTRFS_I(dir)->root;
2671 struct btrfs_trans_handle *trans; 2992 struct btrfs_trans_handle *trans;
2672 unsigned long nr = 0; 2993 unsigned long nr = 0;
@@ -2675,15 +2996,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2675 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 2996 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
2676 return -ENOTEMPTY; 2997 return -ENOTEMPTY;
2677 2998
2678 ret = btrfs_reserve_metadata_space(root, 5); 2999 trans = __unlink_start_trans(dir, dentry);
2679 if (ret) 3000 if (IS_ERR(trans))
2680 return ret;
2681
2682 trans = btrfs_start_transaction(root, 1);
2683 if (IS_ERR(trans)) {
2684 btrfs_unreserve_metadata_space(root, 5);
2685 return PTR_ERR(trans); 3001 return PTR_ERR(trans);
2686 }
2687 3002
2688 btrfs_set_trans_block_group(trans, dir); 3003 btrfs_set_trans_block_group(trans, dir);
2689 3004
@@ -2706,12 +3021,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2706 btrfs_i_size_write(inode, 0); 3021 btrfs_i_size_write(inode, 0);
2707out: 3022out:
2708 nr = trans->blocks_used; 3023 nr = trans->blocks_used;
2709 ret = btrfs_end_transaction_throttle(trans, root); 3024 __unlink_end_trans(trans, root);
2710 btrfs_unreserve_metadata_space(root, 5);
2711 btrfs_btree_balance_dirty(root, nr); 3025 btrfs_btree_balance_dirty(root, nr);
2712 3026
2713 if (ret && !err)
2714 err = ret;
2715 return err; 3027 return err;
2716} 3028}
2717 3029
@@ -2925,7 +3237,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2925 3237
2926 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3238 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
2927 3239
2928 if (root->ref_cows) 3240 if (root->ref_cows || root == root->fs_info->tree_root)
2929 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3241 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2930 3242
2931 path = btrfs_alloc_path(); 3243 path = btrfs_alloc_path();
@@ -3073,7 +3385,8 @@ delete:
3073 } else { 3385 } else {
3074 break; 3386 break;
3075 } 3387 }
3076 if (found_extent && root->ref_cows) { 3388 if (found_extent && (root->ref_cows ||
3389 root == root->fs_info->tree_root)) {
3077 btrfs_set_path_blocking(path); 3390 btrfs_set_path_blocking(path);
3078 ret = btrfs_free_extent(trans, root, extent_start, 3391 ret = btrfs_free_extent(trans, root, extent_start,
3079 extent_num_bytes, 0, 3392 extent_num_bytes, 0,
@@ -3108,6 +3421,7 @@ out:
3108 if (pending_del_nr) { 3421 if (pending_del_nr) {
3109 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3422 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3110 pending_del_nr); 3423 pending_del_nr);
3424 BUG_ON(ret);
3111 } 3425 }
3112 btrfs_free_path(path); 3426 btrfs_free_path(path);
3113 return err; 3427 return err;
@@ -3123,6 +3437,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3123 struct btrfs_root *root = BTRFS_I(inode)->root; 3437 struct btrfs_root *root = BTRFS_I(inode)->root;
3124 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3438 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3125 struct btrfs_ordered_extent *ordered; 3439 struct btrfs_ordered_extent *ordered;
3440 struct extent_state *cached_state = NULL;
3126 char *kaddr; 3441 char *kaddr;
3127 u32 blocksize = root->sectorsize; 3442 u32 blocksize = root->sectorsize;
3128 pgoff_t index = from >> PAGE_CACHE_SHIFT; 3443 pgoff_t index = from >> PAGE_CACHE_SHIFT;
@@ -3134,11 +3449,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3134 3449
3135 if ((offset & (blocksize - 1)) == 0) 3450 if ((offset & (blocksize - 1)) == 0)
3136 goto out; 3451 goto out;
3137 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 3452 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
3138 if (ret)
3139 goto out;
3140
3141 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
3142 if (ret) 3453 if (ret)
3143 goto out; 3454 goto out;
3144 3455
@@ -3146,8 +3457,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3146again: 3457again:
3147 page = grab_cache_page(mapping, index); 3458 page = grab_cache_page(mapping, index);
3148 if (!page) { 3459 if (!page) {
3149 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3460 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3150 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3151 goto out; 3461 goto out;
3152 } 3462 }
3153 3463
@@ -3169,12 +3479,14 @@ again:
3169 } 3479 }
3170 wait_on_page_writeback(page); 3480 wait_on_page_writeback(page);
3171 3481
3172 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 3482 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
3483 GFP_NOFS);
3173 set_page_extent_mapped(page); 3484 set_page_extent_mapped(page);
3174 3485
3175 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3486 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3176 if (ordered) { 3487 if (ordered) {
3177 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3488 unlock_extent_cached(io_tree, page_start, page_end,
3489 &cached_state, GFP_NOFS);
3178 unlock_page(page); 3490 unlock_page(page);
3179 page_cache_release(page); 3491 page_cache_release(page);
3180 btrfs_start_ordered_extent(inode, ordered, 1); 3492 btrfs_start_ordered_extent(inode, ordered, 1);
@@ -3182,13 +3494,15 @@ again:
3182 goto again; 3494 goto again;
3183 } 3495 }
3184 3496
3185 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 3497 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3186 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 3498 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
3187 GFP_NOFS); 3499 0, 0, &cached_state, GFP_NOFS);
3188 3500
3189 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 3501 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
3502 &cached_state);
3190 if (ret) { 3503 if (ret) {
3191 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3504 unlock_extent_cached(io_tree, page_start, page_end,
3505 &cached_state, GFP_NOFS);
3192 goto out_unlock; 3506 goto out_unlock;
3193 } 3507 }
3194 3508
@@ -3201,12 +3515,12 @@ again:
3201 } 3515 }
3202 ClearPageChecked(page); 3516 ClearPageChecked(page);
3203 set_page_dirty(page); 3517 set_page_dirty(page);
3204 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3518 unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
3519 GFP_NOFS);
3205 3520
3206out_unlock: 3521out_unlock:
3207 if (ret) 3522 if (ret)
3208 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3523 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3209 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3210 unlock_page(page); 3524 unlock_page(page);
3211 page_cache_release(page); 3525 page_cache_release(page);
3212out: 3526out:
@@ -3218,7 +3532,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3218 struct btrfs_trans_handle *trans; 3532 struct btrfs_trans_handle *trans;
3219 struct btrfs_root *root = BTRFS_I(inode)->root; 3533 struct btrfs_root *root = BTRFS_I(inode)->root;
3220 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3534 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3221 struct extent_map *em; 3535 struct extent_map *em = NULL;
3536 struct extent_state *cached_state = NULL;
3222 u64 mask = root->sectorsize - 1; 3537 u64 mask = root->sectorsize - 1;
3223 u64 hole_start = (inode->i_size + mask) & ~mask; 3538 u64 hole_start = (inode->i_size + mask) & ~mask;
3224 u64 block_end = (size + mask) & ~mask; 3539 u64 block_end = (size + mask) & ~mask;
@@ -3234,11 +3549,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3234 struct btrfs_ordered_extent *ordered; 3549 struct btrfs_ordered_extent *ordered;
3235 btrfs_wait_ordered_range(inode, hole_start, 3550 btrfs_wait_ordered_range(inode, hole_start,
3236 block_end - hole_start); 3551 block_end - hole_start);
3237 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3552 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3553 &cached_state, GFP_NOFS);
3238 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3554 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3239 if (!ordered) 3555 if (!ordered)
3240 break; 3556 break;
3241 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3557 unlock_extent_cached(io_tree, hole_start, block_end - 1,
3558 &cached_state, GFP_NOFS);
3242 btrfs_put_ordered_extent(ordered); 3559 btrfs_put_ordered_extent(ordered);
3243 } 3560 }
3244 3561
@@ -3253,11 +3570,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3253 u64 hint_byte = 0; 3570 u64 hint_byte = 0;
3254 hole_size = last_byte - cur_offset; 3571 hole_size = last_byte - cur_offset;
3255 3572
3256 err = btrfs_reserve_metadata_space(root, 2); 3573 trans = btrfs_start_transaction(root, 2);
3257 if (err) 3574 if (IS_ERR(trans)) {
3575 err = PTR_ERR(trans);
3258 break; 3576 break;
3259 3577 }
3260 trans = btrfs_start_transaction(root, 1);
3261 btrfs_set_trans_block_group(trans, inode); 3578 btrfs_set_trans_block_group(trans, inode);
3262 3579
3263 err = btrfs_drop_extents(trans, inode, cur_offset, 3580 err = btrfs_drop_extents(trans, inode, cur_offset,
@@ -3275,15 +3592,17 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3275 last_byte - 1, 0); 3592 last_byte - 1, 0);
3276 3593
3277 btrfs_end_transaction(trans, root); 3594 btrfs_end_transaction(trans, root);
3278 btrfs_unreserve_metadata_space(root, 2);
3279 } 3595 }
3280 free_extent_map(em); 3596 free_extent_map(em);
3597 em = NULL;
3281 cur_offset = last_byte; 3598 cur_offset = last_byte;
3282 if (cur_offset >= block_end) 3599 if (cur_offset >= block_end)
3283 break; 3600 break;
3284 } 3601 }
3285 3602
3286 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3603 free_extent_map(em);
3604 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3605 GFP_NOFS);
3287 return err; 3606 return err;
3288} 3607}
3289 3608
@@ -3308,11 +3627,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3308 } 3627 }
3309 } 3628 }
3310 3629
3311 ret = btrfs_reserve_metadata_space(root, 1); 3630 trans = btrfs_start_transaction(root, 5);
3312 if (ret) 3631 if (IS_ERR(trans))
3313 return ret; 3632 return PTR_ERR(trans);
3314 3633
3315 trans = btrfs_start_transaction(root, 1);
3316 btrfs_set_trans_block_group(trans, inode); 3634 btrfs_set_trans_block_group(trans, inode);
3317 3635
3318 ret = btrfs_orphan_add(trans, inode); 3636 ret = btrfs_orphan_add(trans, inode);
@@ -3320,7 +3638,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3320 3638
3321 nr = trans->blocks_used; 3639 nr = trans->blocks_used;
3322 btrfs_end_transaction(trans, root); 3640 btrfs_end_transaction(trans, root);
3323 btrfs_unreserve_metadata_space(root, 1);
3324 btrfs_btree_balance_dirty(root, nr); 3641 btrfs_btree_balance_dirty(root, nr);
3325 3642
3326 if (attr->ia_size > inode->i_size) { 3643 if (attr->ia_size > inode->i_size) {
@@ -3333,8 +3650,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3333 i_size_write(inode, attr->ia_size); 3650 i_size_write(inode, attr->ia_size);
3334 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 3651 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3335 3652
3336 trans = btrfs_start_transaction(root, 1); 3653 trans = btrfs_start_transaction(root, 0);
3654 BUG_ON(IS_ERR(trans));
3337 btrfs_set_trans_block_group(trans, inode); 3655 btrfs_set_trans_block_group(trans, inode);
3656 trans->block_rsv = root->orphan_block_rsv;
3657 BUG_ON(!trans->block_rsv);
3338 3658
3339 ret = btrfs_update_inode(trans, root, inode); 3659 ret = btrfs_update_inode(trans, root, inode);
3340 BUG_ON(ret); 3660 BUG_ON(ret);
@@ -3366,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3366static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) 3686static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3367{ 3687{
3368 struct inode *inode = dentry->d_inode; 3688 struct inode *inode = dentry->d_inode;
3689 struct btrfs_root *root = BTRFS_I(inode)->root;
3369 int err; 3690 int err;
3370 3691
3692 if (btrfs_root_readonly(root))
3693 return -EROFS;
3694
3371 err = inode_change_ok(inode, attr); 3695 err = inode_change_ok(inode, attr);
3372 if (err) 3696 if (err)
3373 return err; 3697 return err;
@@ -3377,17 +3701,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3377 if (err) 3701 if (err)
3378 return err; 3702 return err;
3379 } 3703 }
3380 attr->ia_valid &= ~ATTR_SIZE;
3381 3704
3382 if (attr->ia_valid) 3705 if (attr->ia_valid) {
3383 err = inode_setattr(inode, attr); 3706 setattr_copy(inode, attr);
3707 mark_inode_dirty(inode);
3708
3709 if (attr->ia_valid & ATTR_MODE)
3710 err = btrfs_acl_chmod(inode);
3711 }
3384 3712
3385 if (!err && ((attr->ia_valid & ATTR_MODE)))
3386 err = btrfs_acl_chmod(inode);
3387 return err; 3713 return err;
3388} 3714}
3389 3715
3390void btrfs_delete_inode(struct inode *inode) 3716void btrfs_evict_inode(struct inode *inode)
3391{ 3717{
3392 struct btrfs_trans_handle *trans; 3718 struct btrfs_trans_handle *trans;
3393 struct btrfs_root *root = BTRFS_I(inode)->root; 3719 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3395,10 +3721,15 @@ void btrfs_delete_inode(struct inode *inode)
3395 int ret; 3721 int ret;
3396 3722
3397 truncate_inode_pages(&inode->i_data, 0); 3723 truncate_inode_pages(&inode->i_data, 0);
3724 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3725 root == root->fs_info->tree_root))
3726 goto no_delete;
3727
3398 if (is_bad_inode(inode)) { 3728 if (is_bad_inode(inode)) {
3399 btrfs_orphan_del(NULL, inode); 3729 btrfs_orphan_del(NULL, inode);
3400 goto no_delete; 3730 goto no_delete;
3401 } 3731 }
3732 /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
3402 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3733 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3403 3734
3404 if (root->fs_info->log_root_recovering) { 3735 if (root->fs_info->log_root_recovering) {
@@ -3414,10 +3745,21 @@ void btrfs_delete_inode(struct inode *inode)
3414 btrfs_i_size_write(inode, 0); 3745 btrfs_i_size_write(inode, 0);
3415 3746
3416 while (1) { 3747 while (1) {
3417 trans = btrfs_start_transaction(root, 1); 3748 trans = btrfs_start_transaction(root, 0);
3749 BUG_ON(IS_ERR(trans));
3418 btrfs_set_trans_block_group(trans, inode); 3750 btrfs_set_trans_block_group(trans, inode);
3419 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); 3751 trans->block_rsv = root->orphan_block_rsv;
3752
3753 ret = btrfs_block_rsv_check(trans, root,
3754 root->orphan_block_rsv, 0, 5);
3755 if (ret) {
3756 BUG_ON(ret != -EAGAIN);
3757 ret = btrfs_commit_transaction(trans, root);
3758 BUG_ON(ret);
3759 continue;
3760 }
3420 3761
3762 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3421 if (ret != -EAGAIN) 3763 if (ret != -EAGAIN)
3422 break; 3764 break;
3423 3765
@@ -3425,6 +3767,7 @@ void btrfs_delete_inode(struct inode *inode)
3425 btrfs_end_transaction(trans, root); 3767 btrfs_end_transaction(trans, root);
3426 trans = NULL; 3768 trans = NULL;
3427 btrfs_btree_balance_dirty(root, nr); 3769 btrfs_btree_balance_dirty(root, nr);
3770
3428 } 3771 }
3429 3772
3430 if (ret == 0) { 3773 if (ret == 0) {
@@ -3436,7 +3779,7 @@ void btrfs_delete_inode(struct inode *inode)
3436 btrfs_end_transaction(trans, root); 3779 btrfs_end_transaction(trans, root);
3437 btrfs_btree_balance_dirty(root, nr); 3780 btrfs_btree_balance_dirty(root, nr);
3438no_delete: 3781no_delete:
3439 clear_inode(inode); 3782 end_writeback(inode);
3440 return; 3783 return;
3441} 3784}
3442 3785
@@ -3553,7 +3896,7 @@ again:
3553 p = &root->inode_tree.rb_node; 3896 p = &root->inode_tree.rb_node;
3554 parent = NULL; 3897 parent = NULL;
3555 3898
3556 if (hlist_unhashed(&inode->i_hash)) 3899 if (inode_unhashed(inode))
3557 return; 3900 return;
3558 3901
3559 spin_lock(&root->inode_lock); 3902 spin_lock(&root->inode_lock);
@@ -3567,7 +3910,7 @@ again:
3567 p = &parent->rb_right; 3910 p = &parent->rb_right;
3568 else { 3911 else {
3569 WARN_ON(!(entry->vfs_inode.i_state & 3912 WARN_ON(!(entry->vfs_inode.i_state &
3570 (I_WILL_FREE | I_FREEING | I_CLEAR))); 3913 (I_WILL_FREE | I_FREEING)));
3571 rb_erase(parent, &root->inode_tree); 3914 rb_erase(parent, &root->inode_tree);
3572 RB_CLEAR_NODE(parent); 3915 RB_CLEAR_NODE(parent);
3573 spin_unlock(&root->inode_lock); 3916 spin_unlock(&root->inode_lock);
@@ -3592,7 +3935,14 @@ static void inode_tree_del(struct inode *inode)
3592 } 3935 }
3593 spin_unlock(&root->inode_lock); 3936 spin_unlock(&root->inode_lock);
3594 3937
3595 if (empty && btrfs_root_refs(&root->root_item) == 0) { 3938 /*
3939 * Free space cache has inodes in the tree root, but the tree root has a
3940 * root_refs of 0, so this could end up dropping the tree root as a
3941 * snapshot, so we need the extra !root->fs_info->tree_root check to
3942 * make sure we don't drop it.
3943 */
3944 if (empty && btrfs_root_refs(&root->root_item) == 0 &&
3945 root != root->fs_info->tree_root) {
3596 synchronize_srcu(&root->fs_info->subvol_srcu); 3946 synchronize_srcu(&root->fs_info->subvol_srcu);
3597 spin_lock(&root->inode_lock); 3947 spin_lock(&root->inode_lock);
3598 empty = RB_EMPTY_ROOT(&root->inode_tree); 3948 empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -3646,7 +3996,7 @@ again:
3646 if (atomic_read(&inode->i_count) > 1) 3996 if (atomic_read(&inode->i_count) > 1)
3647 d_prune_aliases(inode); 3997 d_prune_aliases(inode);
3648 /* 3998 /*
3649 * btrfs_drop_inode will remove it from 3999 * btrfs_drop_inode will have it removed from
3650 * the inode cache when its usage count 4000 * the inode cache when its usage count
3651 * hits zero. 4001 * hits zero.
3652 */ 4002 */
@@ -3665,39 +4015,10 @@ again:
3665 return 0; 4015 return 0;
3666} 4016}
3667 4017
3668static noinline void init_btrfs_i(struct inode *inode)
3669{
3670 struct btrfs_inode *bi = BTRFS_I(inode);
3671
3672 bi->generation = 0;
3673 bi->sequence = 0;
3674 bi->last_trans = 0;
3675 bi->last_sub_trans = 0;
3676 bi->logged_trans = 0;
3677 bi->delalloc_bytes = 0;
3678 bi->reserved_bytes = 0;
3679 bi->disk_i_size = 0;
3680 bi->flags = 0;
3681 bi->index_cnt = (u64)-1;
3682 bi->last_unlink_trans = 0;
3683 bi->ordered_data_close = 0;
3684 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3685 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3686 inode->i_mapping, GFP_NOFS);
3687 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3688 inode->i_mapping, GFP_NOFS);
3689 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3690 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3691 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3692 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3693 mutex_init(&BTRFS_I(inode)->log_mutex);
3694}
3695
3696static int btrfs_init_locked_inode(struct inode *inode, void *p) 4018static int btrfs_init_locked_inode(struct inode *inode, void *p)
3697{ 4019{
3698 struct btrfs_iget_args *args = p; 4020 struct btrfs_iget_args *args = p;
3699 inode->i_ino = args->ino; 4021 inode->i_ino = args->ino;
3700 init_btrfs_i(inode);
3701 BTRFS_I(inode)->root = args->root; 4022 BTRFS_I(inode)->root = args->root;
3702 btrfs_set_inode_space_info(args->root, inode); 4023 btrfs_set_inode_space_info(args->root, inode);
3703 return 0; 4024 return 0;
@@ -3729,7 +4050,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
3729 * Returns in *is_new if the inode was read from disk 4050 * Returns in *is_new if the inode was read from disk
3730 */ 4051 */
3731struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 4052struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3732 struct btrfs_root *root) 4053 struct btrfs_root *root, int *new)
3733{ 4054{
3734 struct inode *inode; 4055 struct inode *inode;
3735 4056
@@ -3744,6 +4065,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3744 4065
3745 inode_tree_add(inode); 4066 inode_tree_add(inode);
3746 unlock_new_inode(inode); 4067 unlock_new_inode(inode);
4068 if (new)
4069 *new = 1;
3747 } 4070 }
3748 4071
3749 return inode; 4072 return inode;
@@ -3758,8 +4081,6 @@ static struct inode *new_simple_dir(struct super_block *s,
3758 if (!inode) 4081 if (!inode)
3759 return ERR_PTR(-ENOMEM); 4082 return ERR_PTR(-ENOMEM);
3760 4083
3761 init_btrfs_i(inode);
3762
3763 BTRFS_I(inode)->root = root; 4084 BTRFS_I(inode)->root = root;
3764 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); 4085 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
3765 BTRFS_I(inode)->dummy_inode = 1; 4086 BTRFS_I(inode)->dummy_inode = 1;
@@ -3782,8 +4103,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3782 int index; 4103 int index;
3783 int ret; 4104 int ret;
3784 4105
3785 dentry->d_op = &btrfs_dentry_operations;
3786
3787 if (dentry->d_name.len > BTRFS_NAME_LEN) 4106 if (dentry->d_name.len > BTRFS_NAME_LEN)
3788 return ERR_PTR(-ENAMETOOLONG); 4107 return ERR_PTR(-ENAMETOOLONG);
3789 4108
@@ -3796,7 +4115,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3796 return NULL; 4115 return NULL;
3797 4116
3798 if (location.type == BTRFS_INODE_ITEM_KEY) { 4117 if (location.type == BTRFS_INODE_ITEM_KEY) {
3799 inode = btrfs_iget(dir->i_sb, &location, root); 4118 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
3800 return inode; 4119 return inode;
3801 } 4120 }
3802 4121
@@ -3811,7 +4130,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3811 else 4130 else
3812 inode = new_simple_dir(dir->i_sb, &location, sub_root); 4131 inode = new_simple_dir(dir->i_sb, &location, sub_root);
3813 } else { 4132 } else {
3814 inode = btrfs_iget(dir->i_sb, &location, sub_root); 4133 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
3815 } 4134 }
3816 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 4135 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3817 4136
@@ -3825,7 +4144,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3825 return inode; 4144 return inode;
3826} 4145}
3827 4146
3828static int btrfs_dentry_delete(struct dentry *dentry) 4147static int btrfs_dentry_delete(const struct dentry *dentry)
3829{ 4148{
3830 struct btrfs_root *root; 4149 struct btrfs_root *root;
3831 4150
@@ -4010,19 +4329,29 @@ err:
4010 return ret; 4329 return ret;
4011} 4330}
4012 4331
4013int btrfs_write_inode(struct inode *inode, int wait) 4332int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4014{ 4333{
4015 struct btrfs_root *root = BTRFS_I(inode)->root; 4334 struct btrfs_root *root = BTRFS_I(inode)->root;
4016 struct btrfs_trans_handle *trans; 4335 struct btrfs_trans_handle *trans;
4017 int ret = 0; 4336 int ret = 0;
4337 bool nolock = false;
4018 4338
4019 if (root->fs_info->btree_inode == inode) 4339 if (BTRFS_I(inode)->dummy_inode)
4020 return 0; 4340 return 0;
4021 4341
4022 if (wait) { 4342 smp_mb();
4023 trans = btrfs_join_transaction(root, 1); 4343 nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
4344
4345 if (wbc->sync_mode == WB_SYNC_ALL) {
4346 if (nolock)
4347 trans = btrfs_join_transaction_nolock(root, 1);
4348 else
4349 trans = btrfs_join_transaction(root, 1);
4024 btrfs_set_trans_block_group(trans, inode); 4350 btrfs_set_trans_block_group(trans, inode);
4025 ret = btrfs_commit_transaction(trans, root); 4351 if (nolock)
4352 ret = btrfs_end_transaction_nolock(trans, root);
4353 else
4354 ret = btrfs_commit_transaction(trans, root);
4026 } 4355 }
4027 return ret; 4356 return ret;
4028} 4357}
@@ -4037,10 +4366,38 @@ void btrfs_dirty_inode(struct inode *inode)
4037{ 4366{
4038 struct btrfs_root *root = BTRFS_I(inode)->root; 4367 struct btrfs_root *root = BTRFS_I(inode)->root;
4039 struct btrfs_trans_handle *trans; 4368 struct btrfs_trans_handle *trans;
4369 int ret;
4370
4371 if (BTRFS_I(inode)->dummy_inode)
4372 return;
4040 4373
4041 trans = btrfs_join_transaction(root, 1); 4374 trans = btrfs_join_transaction(root, 1);
4042 btrfs_set_trans_block_group(trans, inode); 4375 btrfs_set_trans_block_group(trans, inode);
4043 btrfs_update_inode(trans, root, inode); 4376
4377 ret = btrfs_update_inode(trans, root, inode);
4378 if (ret && ret == -ENOSPC) {
4379 /* whoops, lets try again with the full transaction */
4380 btrfs_end_transaction(trans, root);
4381 trans = btrfs_start_transaction(root, 1);
4382 if (IS_ERR(trans)) {
4383 if (printk_ratelimit()) {
4384 printk(KERN_ERR "btrfs: fail to "
4385 "dirty inode %lu error %ld\n",
4386 inode->i_ino, PTR_ERR(trans));
4387 }
4388 return;
4389 }
4390 btrfs_set_trans_block_group(trans, inode);
4391
4392 ret = btrfs_update_inode(trans, root, inode);
4393 if (ret) {
4394 if (printk_ratelimit()) {
4395 printk(KERN_ERR "btrfs: fail to "
4396 "dirty inode %lu error %d\n",
4397 inode->i_ino, ret);
4398 }
4399 }
4400 }
4044 btrfs_end_transaction(trans, root); 4401 btrfs_end_transaction(trans, root);
4045} 4402}
4046 4403
@@ -4158,10 +4515,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4158 * btrfs_get_inode_index_count has an explanation for the magic 4515 * btrfs_get_inode_index_count has an explanation for the magic
4159 * number 4516 * number
4160 */ 4517 */
4161 init_btrfs_i(inode);
4162 BTRFS_I(inode)->index_cnt = 2; 4518 BTRFS_I(inode)->index_cnt = 2;
4163 BTRFS_I(inode)->root = root; 4519 BTRFS_I(inode)->root = root;
4164 BTRFS_I(inode)->generation = trans->transid; 4520 BTRFS_I(inode)->generation = trans->transid;
4521 inode->i_generation = BTRFS_I(inode)->generation;
4165 btrfs_set_inode_space_info(root, inode); 4522 btrfs_set_inode_space_info(root, inode);
4166 4523
4167 if (mode & S_IFDIR) 4524 if (mode & S_IFDIR)
@@ -4187,16 +4544,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4187 if (ret != 0) 4544 if (ret != 0)
4188 goto fail; 4545 goto fail;
4189 4546
4190 inode->i_uid = current_fsuid(); 4547 inode_init_owner(inode, dir, mode);
4191
4192 if (dir && (dir->i_mode & S_ISGID)) {
4193 inode->i_gid = dir->i_gid;
4194 if (S_ISDIR(mode))
4195 mode |= S_ISGID;
4196 } else
4197 inode->i_gid = current_fsgid();
4198
4199 inode->i_mode = mode;
4200 inode->i_ino = objectid; 4548 inode->i_ino = objectid;
4201 inode_set_bytes(inode, 0); 4549 inode_set_bytes(inode, 0);
4202 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4550 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -4292,12 +4640,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4292} 4640}
4293 4641
4294static int btrfs_add_nondir(struct btrfs_trans_handle *trans, 4642static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
4295 struct dentry *dentry, struct inode *inode, 4643 struct inode *dir, struct dentry *dentry,
4296 int backref, u64 index) 4644 struct inode *inode, int backref, u64 index)
4297{ 4645{
4298 int err = btrfs_add_link(trans, dentry->d_parent->d_inode, 4646 int err = btrfs_add_link(trans, dir, inode,
4299 inode, dentry->d_name.name, 4647 dentry->d_name.name, dentry->d_name.len,
4300 dentry->d_name.len, backref, index); 4648 backref, index);
4301 if (!err) { 4649 if (!err) {
4302 d_instantiate(dentry, inode); 4650 d_instantiate(dentry, inode);
4303 return 0; 4651 return 0;
@@ -4322,29 +4670,23 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4322 if (!new_valid_dev(rdev)) 4670 if (!new_valid_dev(rdev))
4323 return -EINVAL; 4671 return -EINVAL;
4324 4672
4673 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4674 if (err)
4675 return err;
4676
4325 /* 4677 /*
4326 * 2 for inode item and ref 4678 * 2 for inode item and ref
4327 * 2 for dir items 4679 * 2 for dir items
4328 * 1 for xattr if selinux is on 4680 * 1 for xattr if selinux is on
4329 */ 4681 */
4330 err = btrfs_reserve_metadata_space(root, 5); 4682 trans = btrfs_start_transaction(root, 5);
4331 if (err) 4683 if (IS_ERR(trans))
4332 return err; 4684 return PTR_ERR(trans);
4333 4685
4334 trans = btrfs_start_transaction(root, 1);
4335 if (!trans)
4336 goto fail;
4337 btrfs_set_trans_block_group(trans, dir); 4686 btrfs_set_trans_block_group(trans, dir);
4338 4687
4339 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4340 if (err) {
4341 err = -ENOSPC;
4342 goto out_unlock;
4343 }
4344
4345 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4688 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4346 dentry->d_name.len, 4689 dentry->d_name.len, dir->i_ino, objectid,
4347 dentry->d_parent->d_inode->i_ino, objectid,
4348 BTRFS_I(dir)->block_group, mode, &index); 4690 BTRFS_I(dir)->block_group, mode, &index);
4349 err = PTR_ERR(inode); 4691 err = PTR_ERR(inode);
4350 if (IS_ERR(inode)) 4692 if (IS_ERR(inode))
@@ -4357,7 +4699,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4357 } 4699 }
4358 4700
4359 btrfs_set_trans_block_group(trans, inode); 4701 btrfs_set_trans_block_group(trans, inode);
4360 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 4702 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4361 if (err) 4703 if (err)
4362 drop_inode = 1; 4704 drop_inode = 1;
4363 else { 4705 else {
@@ -4370,13 +4712,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4370out_unlock: 4712out_unlock:
4371 nr = trans->blocks_used; 4713 nr = trans->blocks_used;
4372 btrfs_end_transaction_throttle(trans, root); 4714 btrfs_end_transaction_throttle(trans, root);
4373fail: 4715 btrfs_btree_balance_dirty(root, nr);
4374 btrfs_unreserve_metadata_space(root, 5);
4375 if (drop_inode) { 4716 if (drop_inode) {
4376 inode_dec_link_count(inode); 4717 inode_dec_link_count(inode);
4377 iput(inode); 4718 iput(inode);
4378 } 4719 }
4379 btrfs_btree_balance_dirty(root, nr);
4380 return err; 4720 return err;
4381} 4721}
4382 4722
@@ -4386,37 +4726,29 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4386 struct btrfs_trans_handle *trans; 4726 struct btrfs_trans_handle *trans;
4387 struct btrfs_root *root = BTRFS_I(dir)->root; 4727 struct btrfs_root *root = BTRFS_I(dir)->root;
4388 struct inode *inode = NULL; 4728 struct inode *inode = NULL;
4389 int err;
4390 int drop_inode = 0; 4729 int drop_inode = 0;
4730 int err;
4391 unsigned long nr = 0; 4731 unsigned long nr = 0;
4392 u64 objectid; 4732 u64 objectid;
4393 u64 index = 0; 4733 u64 index = 0;
4394 4734
4735 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4736 if (err)
4737 return err;
4395 /* 4738 /*
4396 * 2 for inode item and ref 4739 * 2 for inode item and ref
4397 * 2 for dir items 4740 * 2 for dir items
4398 * 1 for xattr if selinux is on 4741 * 1 for xattr if selinux is on
4399 */ 4742 */
4400 err = btrfs_reserve_metadata_space(root, 5); 4743 trans = btrfs_start_transaction(root, 5);
4401 if (err) 4744 if (IS_ERR(trans))
4402 return err; 4745 return PTR_ERR(trans);
4403 4746
4404 trans = btrfs_start_transaction(root, 1);
4405 if (!trans)
4406 goto fail;
4407 btrfs_set_trans_block_group(trans, dir); 4747 btrfs_set_trans_block_group(trans, dir);
4408 4748
4409 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4410 if (err) {
4411 err = -ENOSPC;
4412 goto out_unlock;
4413 }
4414
4415 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4749 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4416 dentry->d_name.len, 4750 dentry->d_name.len, dir->i_ino, objectid,
4417 dentry->d_parent->d_inode->i_ino, 4751 BTRFS_I(dir)->block_group, mode, &index);
4418 objectid, BTRFS_I(dir)->block_group, mode,
4419 &index);
4420 err = PTR_ERR(inode); 4752 err = PTR_ERR(inode);
4421 if (IS_ERR(inode)) 4753 if (IS_ERR(inode))
4422 goto out_unlock; 4754 goto out_unlock;
@@ -4428,7 +4760,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4428 } 4760 }
4429 4761
4430 btrfs_set_trans_block_group(trans, inode); 4762 btrfs_set_trans_block_group(trans, inode);
4431 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 4763 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4432 if (err) 4764 if (err)
4433 drop_inode = 1; 4765 drop_inode = 1;
4434 else { 4766 else {
@@ -4443,8 +4775,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4443out_unlock: 4775out_unlock:
4444 nr = trans->blocks_used; 4776 nr = trans->blocks_used;
4445 btrfs_end_transaction_throttle(trans, root); 4777 btrfs_end_transaction_throttle(trans, root);
4446fail:
4447 btrfs_unreserve_metadata_space(root, 5);
4448 if (drop_inode) { 4778 if (drop_inode) {
4449 inode_dec_link_count(inode); 4779 inode_dec_link_count(inode);
4450 iput(inode); 4780 iput(inode);
@@ -4471,40 +4801,42 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4471 if (root->objectid != BTRFS_I(inode)->root->objectid) 4801 if (root->objectid != BTRFS_I(inode)->root->objectid)
4472 return -EPERM; 4802 return -EPERM;
4473 4803
4474 /*
4475 * 1 item for inode ref
4476 * 2 items for dir items
4477 */
4478 err = btrfs_reserve_metadata_space(root, 3);
4479 if (err)
4480 return err;
4481
4482 btrfs_inc_nlink(inode); 4804 btrfs_inc_nlink(inode);
4805 inode->i_ctime = CURRENT_TIME;
4483 4806
4484 err = btrfs_set_inode_index(dir, &index); 4807 err = btrfs_set_inode_index(dir, &index);
4485 if (err) 4808 if (err)
4486 goto fail; 4809 goto fail;
4487 4810
4488 trans = btrfs_start_transaction(root, 1); 4811 /*
4812 * 1 item for inode ref
4813 * 2 items for dir items
4814 */
4815 trans = btrfs_start_transaction(root, 3);
4816 if (IS_ERR(trans)) {
4817 err = PTR_ERR(trans);
4818 goto fail;
4819 }
4489 4820
4490 btrfs_set_trans_block_group(trans, dir); 4821 btrfs_set_trans_block_group(trans, dir);
4491 atomic_inc(&inode->i_count); 4822 ihold(inode);
4492 4823
4493 err = btrfs_add_nondir(trans, dentry, inode, 1, index); 4824 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
4494 4825
4495 if (err) { 4826 if (err) {
4496 drop_inode = 1; 4827 drop_inode = 1;
4497 } else { 4828 } else {
4829 struct dentry *parent = dget_parent(dentry);
4498 btrfs_update_inode_block_group(trans, dir); 4830 btrfs_update_inode_block_group(trans, dir);
4499 err = btrfs_update_inode(trans, root, inode); 4831 err = btrfs_update_inode(trans, root, inode);
4500 BUG_ON(err); 4832 BUG_ON(err);
4501 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); 4833 btrfs_log_new_name(trans, inode, NULL, parent);
4834 dput(parent);
4502 } 4835 }
4503 4836
4504 nr = trans->blocks_used; 4837 nr = trans->blocks_used;
4505 btrfs_end_transaction_throttle(trans, root); 4838 btrfs_end_transaction_throttle(trans, root);
4506fail: 4839fail:
4507 btrfs_unreserve_metadata_space(root, 3);
4508 if (drop_inode) { 4840 if (drop_inode) {
4509 inode_dec_link_count(inode); 4841 inode_dec_link_count(inode);
4510 iput(inode); 4842 iput(inode);
@@ -4524,31 +4856,22 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4524 u64 index = 0; 4856 u64 index = 0;
4525 unsigned long nr = 1; 4857 unsigned long nr = 1;
4526 4858
4859 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4860 if (err)
4861 return err;
4862
4527 /* 4863 /*
4528 * 2 items for inode and ref 4864 * 2 items for inode and ref
4529 * 2 items for dir items 4865 * 2 items for dir items
4530 * 1 for xattr if selinux is on 4866 * 1 for xattr if selinux is on
4531 */ 4867 */
4532 err = btrfs_reserve_metadata_space(root, 5); 4868 trans = btrfs_start_transaction(root, 5);
4533 if (err) 4869 if (IS_ERR(trans))
4534 return err; 4870 return PTR_ERR(trans);
4535
4536 trans = btrfs_start_transaction(root, 1);
4537 if (!trans) {
4538 err = -ENOMEM;
4539 goto out_unlock;
4540 }
4541 btrfs_set_trans_block_group(trans, dir); 4871 btrfs_set_trans_block_group(trans, dir);
4542 4872
4543 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4544 if (err) {
4545 err = -ENOSPC;
4546 goto out_unlock;
4547 }
4548
4549 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4873 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4550 dentry->d_name.len, 4874 dentry->d_name.len, dir->i_ino, objectid,
4551 dentry->d_parent->d_inode->i_ino, objectid,
4552 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4875 BTRFS_I(dir)->block_group, S_IFDIR | mode,
4553 &index); 4876 &index);
4554 if (IS_ERR(inode)) { 4877 if (IS_ERR(inode)) {
@@ -4571,9 +4894,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4571 if (err) 4894 if (err)
4572 goto out_fail; 4895 goto out_fail;
4573 4896
4574 err = btrfs_add_link(trans, dentry->d_parent->d_inode, 4897 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
4575 inode, dentry->d_name.name, 4898 dentry->d_name.len, 0, index);
4576 dentry->d_name.len, 0, index);
4577 if (err) 4899 if (err)
4578 goto out_fail; 4900 goto out_fail;
4579 4901
@@ -4585,9 +4907,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4585out_fail: 4907out_fail:
4586 nr = trans->blocks_used; 4908 nr = trans->blocks_used;
4587 btrfs_end_transaction_throttle(trans, root); 4909 btrfs_end_transaction_throttle(trans, root);
4588
4589out_unlock:
4590 btrfs_unreserve_metadata_space(root, 5);
4591 if (drop_on_err) 4910 if (drop_on_err)
4592 iput(inode); 4911 iput(inode);
4593 btrfs_btree_balance_dirty(root, nr); 4912 btrfs_btree_balance_dirty(root, nr);
@@ -4628,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4628 size_t max_size; 4947 size_t max_size;
4629 unsigned long inline_size; 4948 unsigned long inline_size;
4630 unsigned long ptr; 4949 unsigned long ptr;
4950 int compress_type;
4631 4951
4632 WARN_ON(pg_offset != 0); 4952 WARN_ON(pg_offset != 0);
4953 compress_type = btrfs_file_extent_compression(leaf, item);
4633 max_size = btrfs_file_extent_ram_bytes(leaf, item); 4954 max_size = btrfs_file_extent_ram_bytes(leaf, item);
4634 inline_size = btrfs_file_extent_inline_item_len(leaf, 4955 inline_size = btrfs_file_extent_inline_item_len(leaf,
4635 btrfs_item_nr(leaf, path->slots[0])); 4956 btrfs_item_nr(leaf, path->slots[0]));
@@ -4639,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4639 read_extent_buffer(leaf, tmp, ptr, inline_size); 4960 read_extent_buffer(leaf, tmp, ptr, inline_size);
4640 4961
4641 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); 4962 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
4642 ret = btrfs_zlib_decompress(tmp, page, extent_offset, 4963 ret = btrfs_decompress(compress_type, tmp, page,
4643 inline_size, max_size); 4964 extent_offset, inline_size, max_size);
4644 if (ret) { 4965 if (ret) {
4645 char *kaddr = kmap_atomic(page, KM_USER0); 4966 char *kaddr = kmap_atomic(page, KM_USER0);
4646 unsigned long copy_size = min_t(u64, 4967 unsigned long copy_size = min_t(u64,
@@ -4682,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
4682 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5003 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4683 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5004 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4684 struct btrfs_trans_handle *trans = NULL; 5005 struct btrfs_trans_handle *trans = NULL;
4685 int compressed; 5006 int compress_type;
4686 5007
4687again: 5008again:
4688 read_lock(&em_tree->lock); 5009 read_lock(&em_tree->lock);
@@ -4741,7 +5062,7 @@ again:
4741 5062
4742 found_type = btrfs_file_extent_type(leaf, item); 5063 found_type = btrfs_file_extent_type(leaf, item);
4743 extent_start = found_key.offset; 5064 extent_start = found_key.offset;
4744 compressed = btrfs_file_extent_compression(leaf, item); 5065 compress_type = btrfs_file_extent_compression(leaf, item);
4745 if (found_type == BTRFS_FILE_EXTENT_REG || 5066 if (found_type == BTRFS_FILE_EXTENT_REG ||
4746 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 5067 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
4747 extent_end = extent_start + 5068 extent_end = extent_start +
@@ -4787,8 +5108,9 @@ again:
4787 em->block_start = EXTENT_MAP_HOLE; 5108 em->block_start = EXTENT_MAP_HOLE;
4788 goto insert; 5109 goto insert;
4789 } 5110 }
4790 if (compressed) { 5111 if (compress_type != BTRFS_COMPRESS_NONE) {
4791 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5112 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
5113 em->compress_type = compress_type;
4792 em->block_start = bytenr; 5114 em->block_start = bytenr;
4793 em->block_len = btrfs_file_extent_disk_num_bytes(leaf, 5115 em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
4794 item); 5116 item);
@@ -4822,12 +5144,14 @@ again:
4822 em->len = (copy_size + root->sectorsize - 1) & 5144 em->len = (copy_size + root->sectorsize - 1) &
4823 ~((u64)root->sectorsize - 1); 5145 ~((u64)root->sectorsize - 1);
4824 em->orig_start = EXTENT_MAP_INLINE; 5146 em->orig_start = EXTENT_MAP_INLINE;
4825 if (compressed) 5147 if (compress_type) {
4826 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 5148 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
5149 em->compress_type = compress_type;
5150 }
4827 ptr = btrfs_file_extent_inline_start(item) + extent_offset; 5151 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
4828 if (create == 0 && !PageUptodate(page)) { 5152 if (create == 0 && !PageUptodate(page)) {
4829 if (btrfs_file_extent_compression(leaf, item) == 5153 if (btrfs_file_extent_compression(leaf, item) !=
4830 BTRFS_COMPRESS_ZLIB) { 5154 BTRFS_COMPRESS_NONE) {
4831 ret = uncompress_inline(path, inode, page, 5155 ret = uncompress_inline(path, inode, page,
4832 pg_offset, 5156 pg_offset,
4833 extent_offset, item); 5157 extent_offset, item);
@@ -4845,6 +5169,7 @@ again:
4845 } 5169 }
4846 flush_dcache_page(page); 5170 flush_dcache_page(page);
4847 } else if (create && PageUptodate(page)) { 5171 } else if (create && PageUptodate(page)) {
5172 WARN_ON(1);
4848 if (!trans) { 5173 if (!trans) {
4849 kunmap(page); 5174 kunmap(page);
4850 free_extent_map(em); 5175 free_extent_map(em);
@@ -4941,11 +5266,823 @@ out:
4941 return em; 5266 return em;
4942} 5267}
4943 5268
5269static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5270 u64 start, u64 len)
5271{
5272 struct btrfs_root *root = BTRFS_I(inode)->root;
5273 struct btrfs_trans_handle *trans;
5274 struct extent_map *em;
5275 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5276 struct btrfs_key ins;
5277 u64 alloc_hint;
5278 int ret;
5279
5280 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5281
5282 trans = btrfs_join_transaction(root, 0);
5283 if (!trans)
5284 return ERR_PTR(-ENOMEM);
5285
5286 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5287
5288 alloc_hint = get_extent_allocation_hint(inode, start, len);
5289 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
5290 alloc_hint, (u64)-1, &ins, 1);
5291 if (ret) {
5292 em = ERR_PTR(ret);
5293 goto out;
5294 }
5295
5296 em = alloc_extent_map(GFP_NOFS);
5297 if (!em) {
5298 em = ERR_PTR(-ENOMEM);
5299 goto out;
5300 }
5301
5302 em->start = start;
5303 em->orig_start = em->start;
5304 em->len = ins.offset;
5305
5306 em->block_start = ins.objectid;
5307 em->block_len = ins.offset;
5308 em->bdev = root->fs_info->fs_devices->latest_bdev;
5309 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5310
5311 while (1) {
5312 write_lock(&em_tree->lock);
5313 ret = add_extent_mapping(em_tree, em);
5314 write_unlock(&em_tree->lock);
5315 if (ret != -EEXIST)
5316 break;
5317 btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
5318 }
5319
5320 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
5321 ins.offset, ins.offset, 0);
5322 if (ret) {
5323 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
5324 em = ERR_PTR(ret);
5325 }
5326out:
5327 btrfs_end_transaction(trans, root);
5328 return em;
5329}
5330
5331/*
5332 * returns 1 when the nocow is safe, < 1 on error, 0 if the
5333 * block must be cow'd
5334 */
5335static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5336 struct inode *inode, u64 offset, u64 len)
5337{
5338 struct btrfs_path *path;
5339 int ret;
5340 struct extent_buffer *leaf;
5341 struct btrfs_root *root = BTRFS_I(inode)->root;
5342 struct btrfs_file_extent_item *fi;
5343 struct btrfs_key key;
5344 u64 disk_bytenr;
5345 u64 backref_offset;
5346 u64 extent_end;
5347 u64 num_bytes;
5348 int slot;
5349 int found_type;
5350
5351 path = btrfs_alloc_path();
5352 if (!path)
5353 return -ENOMEM;
5354
5355 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
5356 offset, 0);
5357 if (ret < 0)
5358 goto out;
5359
5360 slot = path->slots[0];
5361 if (ret == 1) {
5362 if (slot == 0) {
5363 /* can't find the item, must cow */
5364 ret = 0;
5365 goto out;
5366 }
5367 slot--;
5368 }
5369 ret = 0;
5370 leaf = path->nodes[0];
5371 btrfs_item_key_to_cpu(leaf, &key, slot);
5372 if (key.objectid != inode->i_ino ||
5373 key.type != BTRFS_EXTENT_DATA_KEY) {
5374 /* not our file or wrong item type, must cow */
5375 goto out;
5376 }
5377
5378 if (key.offset > offset) {
5379 /* Wrong offset, must cow */
5380 goto out;
5381 }
5382
5383 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5384 found_type = btrfs_file_extent_type(leaf, fi);
5385 if (found_type != BTRFS_FILE_EXTENT_REG &&
5386 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
5387 /* not a regular extent, must cow */
5388 goto out;
5389 }
5390 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5391 backref_offset = btrfs_file_extent_offset(leaf, fi);
5392
5393 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
5394 if (extent_end < offset + len) {
5395 /* extent doesn't include our full range, must cow */
5396 goto out;
5397 }
5398
5399 if (btrfs_extent_readonly(root, disk_bytenr))
5400 goto out;
5401
5402 /*
5403 * look for other files referencing this extent, if we
5404 * find any we must cow
5405 */
5406 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
5407 key.offset - backref_offset, disk_bytenr))
5408 goto out;
5409
5410 /*
5411 * adjust disk_bytenr and num_bytes to cover just the bytes
5412 * in this extent we are about to write. If there
5413 * are any csums in that range we have to cow in order
5414 * to keep the csums correct
5415 */
5416 disk_bytenr += backref_offset;
5417 disk_bytenr += offset - key.offset;
5418 num_bytes = min(offset + len, extent_end) - offset;
5419 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
5420 goto out;
5421 /*
5422 * all of the above have passed, it is safe to overwrite this extent
5423 * without cow
5424 */
5425 ret = 1;
5426out:
5427 btrfs_free_path(path);
5428 return ret;
5429}
5430
5431static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5432 struct buffer_head *bh_result, int create)
5433{
5434 struct extent_map *em;
5435 struct btrfs_root *root = BTRFS_I(inode)->root;
5436 u64 start = iblock << inode->i_blkbits;
5437 u64 len = bh_result->b_size;
5438 struct btrfs_trans_handle *trans;
5439
5440 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5441 if (IS_ERR(em))
5442 return PTR_ERR(em);
5443
5444 /*
5445 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
5446 * io. INLINE is special, and we could probably kludge it in here, but
5447 * it's still buffered so for safety lets just fall back to the generic
5448 * buffered path.
5449 *
5450 * For COMPRESSED we _have_ to read the entire extent in so we can
5451 * decompress it, so there will be buffering required no matter what we
5452 * do, so go ahead and fallback to buffered.
5453 *
5454 * We return -ENOTBLK because thats what makes DIO go ahead and go back
5455 * to buffered IO. Don't blame me, this is the price we pay for using
5456 * the generic code.
5457 */
5458 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5459 em->block_start == EXTENT_MAP_INLINE) {
5460 free_extent_map(em);
5461 return -ENOTBLK;
5462 }
5463
5464 /* Just a good old fashioned hole, return */
5465 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5466 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5467 free_extent_map(em);
5468 /* DIO will do one hole at a time, so just unlock a sector */
5469 unlock_extent(&BTRFS_I(inode)->io_tree, start,
5470 start + root->sectorsize - 1, GFP_NOFS);
5471 return 0;
5472 }
5473
5474 /*
5475 * We don't allocate a new extent in the following cases
5476 *
5477 * 1) The inode is marked as NODATACOW. In this case we'll just use the
5478 * existing extent.
5479 * 2) The extent is marked as PREALLOC. We're good to go here and can
5480 * just use the extent.
5481 *
5482 */
5483 if (!create) {
5484 len = em->len - (start - em->start);
5485 goto map;
5486 }
5487
5488 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
5489 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
5490 em->block_start != EXTENT_MAP_HOLE)) {
5491 int type;
5492 int ret;
5493 u64 block_start;
5494
5495 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5496 type = BTRFS_ORDERED_PREALLOC;
5497 else
5498 type = BTRFS_ORDERED_NOCOW;
5499 len = min(len, em->len - (start - em->start));
5500 block_start = em->block_start + (start - em->start);
5501
5502 /*
5503 * we're not going to log anything, but we do need
5504 * to make sure the current transaction stays open
5505 * while we look for nocow cross refs
5506 */
5507 trans = btrfs_join_transaction(root, 0);
5508 if (!trans)
5509 goto must_cow;
5510
5511 if (can_nocow_odirect(trans, inode, start, len) == 1) {
5512 ret = btrfs_add_ordered_extent_dio(inode, start,
5513 block_start, len, len, type);
5514 btrfs_end_transaction(trans, root);
5515 if (ret) {
5516 free_extent_map(em);
5517 return ret;
5518 }
5519 goto unlock;
5520 }
5521 btrfs_end_transaction(trans, root);
5522 }
5523must_cow:
5524 /*
5525 * this will cow the extent, reset the len in case we changed
5526 * it above
5527 */
5528 len = bh_result->b_size;
5529 free_extent_map(em);
5530 em = btrfs_new_extent_direct(inode, start, len);
5531 if (IS_ERR(em))
5532 return PTR_ERR(em);
5533 len = min(len, em->len - (start - em->start));
5534unlock:
5535 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5536 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5537 0, NULL, GFP_NOFS);
5538map:
5539 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5540 inode->i_blkbits;
5541 bh_result->b_size = len;
5542 bh_result->b_bdev = em->bdev;
5543 set_buffer_mapped(bh_result);
5544 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5545 set_buffer_new(bh_result);
5546
5547 free_extent_map(em);
5548
5549 return 0;
5550}
5551
5552struct btrfs_dio_private {
5553 struct inode *inode;
5554 u64 logical_offset;
5555 u64 disk_bytenr;
5556 u64 bytes;
5557 u32 *csums;
5558 void *private;
5559
5560 /* number of bios pending for this dio */
5561 atomic_t pending_bios;
5562
5563 /* IO errors */
5564 int errors;
5565
5566 struct bio *orig_bio;
5567};
5568
5569static void btrfs_endio_direct_read(struct bio *bio, int err)
5570{
5571 struct btrfs_dio_private *dip = bio->bi_private;
5572 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
5573 struct bio_vec *bvec = bio->bi_io_vec;
5574 struct inode *inode = dip->inode;
5575 struct btrfs_root *root = BTRFS_I(inode)->root;
5576 u64 start;
5577 u32 *private = dip->csums;
5578
5579 start = dip->logical_offset;
5580 do {
5581 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
5582 struct page *page = bvec->bv_page;
5583 char *kaddr;
5584 u32 csum = ~(u32)0;
5585 unsigned long flags;
5586
5587 local_irq_save(flags);
5588 kaddr = kmap_atomic(page, KM_IRQ0);
5589 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5590 csum, bvec->bv_len);
5591 btrfs_csum_final(csum, (char *)&csum);
5592 kunmap_atomic(kaddr, KM_IRQ0);
5593 local_irq_restore(flags);
5594
5595 flush_dcache_page(bvec->bv_page);
5596 if (csum != *private) {
5597 printk(KERN_ERR "btrfs csum failed ino %lu off"
5598 " %llu csum %u private %u\n",
5599 inode->i_ino, (unsigned long long)start,
5600 csum, *private);
5601 err = -EIO;
5602 }
5603 }
5604
5605 start += bvec->bv_len;
5606 private++;
5607 bvec++;
5608 } while (bvec <= bvec_end);
5609
5610 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
5611 dip->logical_offset + dip->bytes - 1, GFP_NOFS);
5612 bio->bi_private = dip->private;
5613
5614 kfree(dip->csums);
5615 kfree(dip);
5616 dio_end_io(bio, err);
5617}
5618
5619static void btrfs_endio_direct_write(struct bio *bio, int err)
5620{
5621 struct btrfs_dio_private *dip = bio->bi_private;
5622 struct inode *inode = dip->inode;
5623 struct btrfs_root *root = BTRFS_I(inode)->root;
5624 struct btrfs_trans_handle *trans;
5625 struct btrfs_ordered_extent *ordered = NULL;
5626 struct extent_state *cached_state = NULL;
5627 u64 ordered_offset = dip->logical_offset;
5628 u64 ordered_bytes = dip->bytes;
5629 int ret;
5630
5631 if (err)
5632 goto out_done;
5633again:
5634 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
5635 &ordered_offset,
5636 ordered_bytes);
5637 if (!ret)
5638 goto out_test;
5639
5640 BUG_ON(!ordered);
5641
5642 trans = btrfs_join_transaction(root, 1);
5643 if (!trans) {
5644 err = -ENOMEM;
5645 goto out;
5646 }
5647 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5648
5649 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5650 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5651 if (!ret)
5652 ret = btrfs_update_inode(trans, root, inode);
5653 err = ret;
5654 goto out;
5655 }
5656
5657 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5658 ordered->file_offset + ordered->len - 1, 0,
5659 &cached_state, GFP_NOFS);
5660
5661 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5662 ret = btrfs_mark_extent_written(trans, inode,
5663 ordered->file_offset,
5664 ordered->file_offset +
5665 ordered->len);
5666 if (ret) {
5667 err = ret;
5668 goto out_unlock;
5669 }
5670 } else {
5671 ret = insert_reserved_file_extent(trans, inode,
5672 ordered->file_offset,
5673 ordered->start,
5674 ordered->disk_len,
5675 ordered->len,
5676 ordered->len,
5677 0, 0, 0,
5678 BTRFS_FILE_EXTENT_REG);
5679 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
5680 ordered->file_offset, ordered->len);
5681 if (ret) {
5682 err = ret;
5683 WARN_ON(1);
5684 goto out_unlock;
5685 }
5686 }
5687
5688 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5689 btrfs_ordered_update_i_size(inode, 0, ordered);
5690 btrfs_update_inode(trans, root, inode);
5691out_unlock:
5692 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5693 ordered->file_offset + ordered->len - 1,
5694 &cached_state, GFP_NOFS);
5695out:
5696 btrfs_delalloc_release_metadata(inode, ordered->len);
5697 btrfs_end_transaction(trans, root);
5698 ordered_offset = ordered->file_offset + ordered->len;
5699 btrfs_put_ordered_extent(ordered);
5700 btrfs_put_ordered_extent(ordered);
5701
5702out_test:
5703 /*
5704 * our bio might span multiple ordered extents. If we haven't
5705 * completed the accounting for the whole dio, go back and try again
5706 */
5707 if (ordered_offset < dip->logical_offset + dip->bytes) {
5708 ordered_bytes = dip->logical_offset + dip->bytes -
5709 ordered_offset;
5710 goto again;
5711 }
5712out_done:
5713 bio->bi_private = dip->private;
5714
5715 kfree(dip->csums);
5716 kfree(dip);
5717 dio_end_io(bio, err);
5718}
5719
5720static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5721 struct bio *bio, int mirror_num,
5722 unsigned long bio_flags, u64 offset)
5723{
5724 int ret;
5725 struct btrfs_root *root = BTRFS_I(inode)->root;
5726 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
5727 BUG_ON(ret);
5728 return 0;
5729}
5730
5731static void btrfs_end_dio_bio(struct bio *bio, int err)
5732{
5733 struct btrfs_dio_private *dip = bio->bi_private;
5734
5735 if (err) {
5736 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
5737 "sector %#Lx len %u err no %d\n",
5738 dip->inode->i_ino, bio->bi_rw,
5739 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5740 dip->errors = 1;
5741
5742 /*
5743 * before atomic variable goto zero, we must make sure
5744 * dip->errors is perceived to be set.
5745 */
5746 smp_mb__before_atomic_dec();
5747 }
5748
5749 /* if there are more bios still pending for this dio, just exit */
5750 if (!atomic_dec_and_test(&dip->pending_bios))
5751 goto out;
5752
5753 if (dip->errors)
5754 bio_io_error(dip->orig_bio);
5755 else {
5756 set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
5757 bio_endio(dip->orig_bio, 0);
5758 }
5759out:
5760 bio_put(bio);
5761}
5762
5763static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5764 u64 first_sector, gfp_t gfp_flags)
5765{
5766 int nr_vecs = bio_get_nr_vecs(bdev);
5767 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
5768}
5769
5770static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5771 int rw, u64 file_offset, int skip_sum,
5772 u32 *csums)
5773{
5774 int write = rw & REQ_WRITE;
5775 struct btrfs_root *root = BTRFS_I(inode)->root;
5776 int ret;
5777
5778 bio_get(bio);
5779 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
5780 if (ret)
5781 goto err;
5782
5783 if (write && !skip_sum) {
5784 ret = btrfs_wq_submit_bio(root->fs_info,
5785 inode, rw, bio, 0, 0,
5786 file_offset,
5787 __btrfs_submit_bio_start_direct_io,
5788 __btrfs_submit_bio_done);
5789 goto err;
5790 } else if (!skip_sum)
5791 btrfs_lookup_bio_sums_dio(root, inode, bio,
5792 file_offset, csums);
5793
5794 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5795err:
5796 bio_put(bio);
5797 return ret;
5798}
5799
5800static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5801 int skip_sum)
5802{
5803 struct inode *inode = dip->inode;
5804 struct btrfs_root *root = BTRFS_I(inode)->root;
5805 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
5806 struct bio *bio;
5807 struct bio *orig_bio = dip->orig_bio;
5808 struct bio_vec *bvec = orig_bio->bi_io_vec;
5809 u64 start_sector = orig_bio->bi_sector;
5810 u64 file_offset = dip->logical_offset;
5811 u64 submit_len = 0;
5812 u64 map_length;
5813 int nr_pages = 0;
5814 u32 *csums = dip->csums;
5815 int ret = 0;
5816
5817 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5818 if (!bio)
5819 return -ENOMEM;
5820 bio->bi_private = dip;
5821 bio->bi_end_io = btrfs_end_dio_bio;
5822 atomic_inc(&dip->pending_bios);
5823
5824 map_length = orig_bio->bi_size;
5825 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5826 &map_length, NULL, 0);
5827 if (ret) {
5828 bio_put(bio);
5829 return -EIO;
5830 }
5831
5832 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
5833 if (unlikely(map_length < submit_len + bvec->bv_len ||
5834 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
5835 bvec->bv_offset) < bvec->bv_len)) {
5836 /*
5837 * inc the count before we submit the bio so
5838 * we know the end IO handler won't happen before
5839 * we inc the count. Otherwise, the dip might get freed
5840 * before we're done setting it up
5841 */
5842 atomic_inc(&dip->pending_bios);
5843 ret = __btrfs_submit_dio_bio(bio, inode, rw,
5844 file_offset, skip_sum,
5845 csums);
5846 if (ret) {
5847 bio_put(bio);
5848 atomic_dec(&dip->pending_bios);
5849 goto out_err;
5850 }
5851
5852 if (!skip_sum)
5853 csums = csums + nr_pages;
5854 start_sector += submit_len >> 9;
5855 file_offset += submit_len;
5856
5857 submit_len = 0;
5858 nr_pages = 0;
5859
5860 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
5861 start_sector, GFP_NOFS);
5862 if (!bio)
5863 goto out_err;
5864 bio->bi_private = dip;
5865 bio->bi_end_io = btrfs_end_dio_bio;
5866
5867 map_length = orig_bio->bi_size;
5868 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5869 &map_length, NULL, 0);
5870 if (ret) {
5871 bio_put(bio);
5872 goto out_err;
5873 }
5874 } else {
5875 submit_len += bvec->bv_len;
5876 nr_pages ++;
5877 bvec++;
5878 }
5879 }
5880
5881 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
5882 csums);
5883 if (!ret)
5884 return 0;
5885
5886 bio_put(bio);
5887out_err:
5888 dip->errors = 1;
5889 /*
5890 * before atomic variable goto zero, we must
5891 * make sure dip->errors is perceived to be set.
5892 */
5893 smp_mb__before_atomic_dec();
5894 if (atomic_dec_and_test(&dip->pending_bios))
5895 bio_io_error(dip->orig_bio);
5896
5897 /* bio_end_io() will handle error, so we needn't return it */
5898 return 0;
5899}
5900
5901static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5902 loff_t file_offset)
5903{
5904 struct btrfs_root *root = BTRFS_I(inode)->root;
5905 struct btrfs_dio_private *dip;
5906 struct bio_vec *bvec = bio->bi_io_vec;
5907 int skip_sum;
5908 int write = rw & REQ_WRITE;
5909 int ret = 0;
5910
5911 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
5912
5913 dip = kmalloc(sizeof(*dip), GFP_NOFS);
5914 if (!dip) {
5915 ret = -ENOMEM;
5916 goto free_ordered;
5917 }
5918 dip->csums = NULL;
5919
5920 if (!skip_sum) {
5921 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5922 if (!dip->csums) {
5923 ret = -ENOMEM;
5924 goto free_ordered;
5925 }
5926 }
5927
5928 dip->private = bio->bi_private;
5929 dip->inode = inode;
5930 dip->logical_offset = file_offset;
5931
5932 dip->bytes = 0;
5933 do {
5934 dip->bytes += bvec->bv_len;
5935 bvec++;
5936 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
5937
5938 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5939 bio->bi_private = dip;
5940 dip->errors = 0;
5941 dip->orig_bio = bio;
5942 atomic_set(&dip->pending_bios, 0);
5943
5944 if (write)
5945 bio->bi_end_io = btrfs_endio_direct_write;
5946 else
5947 bio->bi_end_io = btrfs_endio_direct_read;
5948
5949 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
5950 if (!ret)
5951 return;
5952free_ordered:
5953 /*
5954 * If this is a write, we need to clean up the reserved space and kill
5955 * the ordered extent.
5956 */
5957 if (write) {
5958 struct btrfs_ordered_extent *ordered;
5959 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
5960 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
5961 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
5962 btrfs_free_reserved_extent(root, ordered->start,
5963 ordered->disk_len);
5964 btrfs_put_ordered_extent(ordered);
5965 btrfs_put_ordered_extent(ordered);
5966 }
5967 bio_endio(bio, ret);
5968}
5969
5970static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
5971 const struct iovec *iov, loff_t offset,
5972 unsigned long nr_segs)
5973{
5974 int seg;
5975 size_t size;
5976 unsigned long addr;
5977 unsigned blocksize_mask = root->sectorsize - 1;
5978 ssize_t retval = -EINVAL;
5979 loff_t end = offset;
5980
5981 if (offset & blocksize_mask)
5982 goto out;
5983
5984 /* Check the memory alignment. Blocks cannot straddle pages */
5985 for (seg = 0; seg < nr_segs; seg++) {
5986 addr = (unsigned long)iov[seg].iov_base;
5987 size = iov[seg].iov_len;
5988 end += size;
5989 if ((addr & blocksize_mask) || (size & blocksize_mask))
5990 goto out;
5991 }
5992 retval = 0;
5993out:
5994 return retval;
5995}
4944static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 5996static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
4945 const struct iovec *iov, loff_t offset, 5997 const struct iovec *iov, loff_t offset,
4946 unsigned long nr_segs) 5998 unsigned long nr_segs)
4947{ 5999{
4948 return -EINVAL; 6000 struct file *file = iocb->ki_filp;
6001 struct inode *inode = file->f_mapping->host;
6002 struct btrfs_ordered_extent *ordered;
6003 struct extent_state *cached_state = NULL;
6004 u64 lockstart, lockend;
6005 ssize_t ret;
6006 int writing = rw & WRITE;
6007 int write_bits = 0;
6008 size_t count = iov_length(iov, nr_segs);
6009
6010 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6011 offset, nr_segs)) {
6012 return 0;
6013 }
6014
6015 lockstart = offset;
6016 lockend = offset + count - 1;
6017
6018 if (writing) {
6019 ret = btrfs_delalloc_reserve_space(inode, count);
6020 if (ret)
6021 goto out;
6022 }
6023
6024 while (1) {
6025 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6026 0, &cached_state, GFP_NOFS);
6027 /*
6028 * We're concerned with the entire range that we're going to be
6029 * doing DIO to, so we need to make sure theres no ordered
6030 * extents in this range.
6031 */
6032 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6033 lockend - lockstart + 1);
6034 if (!ordered)
6035 break;
6036 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6037 &cached_state, GFP_NOFS);
6038 btrfs_start_ordered_extent(inode, ordered, 1);
6039 btrfs_put_ordered_extent(ordered);
6040 cond_resched();
6041 }
6042
6043 /*
6044 * we don't use btrfs_set_extent_delalloc because we don't want
6045 * the dirty or uptodate bits
6046 */
6047 if (writing) {
6048 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6049 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6050 EXTENT_DELALLOC, 0, NULL, &cached_state,
6051 GFP_NOFS);
6052 if (ret) {
6053 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6054 lockend, EXTENT_LOCKED | write_bits,
6055 1, 0, &cached_state, GFP_NOFS);
6056 goto out;
6057 }
6058 }
6059
6060 free_extent_state(cached_state);
6061 cached_state = NULL;
6062
6063 ret = __blockdev_direct_IO(rw, iocb, inode,
6064 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6065 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6066 btrfs_submit_direct, 0);
6067
6068 if (ret < 0 && ret != -EIOCBQUEUED) {
6069 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
6070 offset + iov_length(iov, nr_segs) - 1,
6071 EXTENT_LOCKED | write_bits, 1, 0,
6072 &cached_state, GFP_NOFS);
6073 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
6074 /*
6075 * We're falling back to buffered, unlock the section we didn't
6076 * do IO on.
6077 */
6078 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
6079 offset + iov_length(iov, nr_segs) - 1,
6080 EXTENT_LOCKED | write_bits, 1, 0,
6081 &cached_state, GFP_NOFS);
6082 }
6083out:
6084 free_extent_state(cached_state);
6085 return ret;
4949} 6086}
4950 6087
4951static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6088static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -5021,6 +6158,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5021{ 6158{
5022 struct extent_io_tree *tree; 6159 struct extent_io_tree *tree;
5023 struct btrfs_ordered_extent *ordered; 6160 struct btrfs_ordered_extent *ordered;
6161 struct extent_state *cached_state = NULL;
5024 u64 page_start = page_offset(page); 6162 u64 page_start = page_offset(page);
5025 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 6163 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
5026 6164
@@ -5039,7 +6177,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5039 btrfs_releasepage(page, GFP_NOFS); 6177 btrfs_releasepage(page, GFP_NOFS);
5040 return; 6178 return;
5041 } 6179 }
5042 lock_extent(tree, page_start, page_end, GFP_NOFS); 6180 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
6181 GFP_NOFS);
5043 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 6182 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
5044 page_offset(page)); 6183 page_offset(page));
5045 if (ordered) { 6184 if (ordered) {
@@ -5050,7 +6189,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5050 clear_extent_bit(tree, page_start, page_end, 6189 clear_extent_bit(tree, page_start, page_end,
5051 EXTENT_DIRTY | EXTENT_DELALLOC | 6190 EXTENT_DIRTY | EXTENT_DELALLOC |
5052 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, 6191 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
5053 NULL, GFP_NOFS); 6192 &cached_state, GFP_NOFS);
5054 /* 6193 /*
5055 * whoever cleared the private bit is responsible 6194 * whoever cleared the private bit is responsible
5056 * for the finish_ordered_io 6195 * for the finish_ordered_io
@@ -5060,11 +6199,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5060 page_start, page_end); 6199 page_start, page_end);
5061 } 6200 }
5062 btrfs_put_ordered_extent(ordered); 6201 btrfs_put_ordered_extent(ordered);
5063 lock_extent(tree, page_start, page_end, GFP_NOFS); 6202 cached_state = NULL;
6203 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
6204 GFP_NOFS);
5064 } 6205 }
5065 clear_extent_bit(tree, page_start, page_end, 6206 clear_extent_bit(tree, page_start, page_end,
5066 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 6207 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
5067 EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); 6208 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
5068 __btrfs_releasepage(page, GFP_NOFS); 6209 __btrfs_releasepage(page, GFP_NOFS);
5069 6210
5070 ClearPageChecked(page); 6211 ClearPageChecked(page);
@@ -5097,6 +6238,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5097 struct btrfs_root *root = BTRFS_I(inode)->root; 6238 struct btrfs_root *root = BTRFS_I(inode)->root;
5098 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 6239 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5099 struct btrfs_ordered_extent *ordered; 6240 struct btrfs_ordered_extent *ordered;
6241 struct extent_state *cached_state = NULL;
5100 char *kaddr; 6242 char *kaddr;
5101 unsigned long zero_start; 6243 unsigned long zero_start;
5102 loff_t size; 6244 loff_t size;
@@ -5104,7 +6246,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5104 u64 page_start; 6246 u64 page_start;
5105 u64 page_end; 6247 u64 page_end;
5106 6248
5107 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 6249 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
5108 if (ret) { 6250 if (ret) {
5109 if (ret == -ENOMEM) 6251 if (ret == -ENOMEM)
5110 ret = VM_FAULT_OOM; 6252 ret = VM_FAULT_OOM;
@@ -5113,13 +6255,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5113 goto out; 6255 goto out;
5114 } 6256 }
5115 6257
5116 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
5117 if (ret) {
5118 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5119 ret = VM_FAULT_SIGBUS;
5120 goto out;
5121 }
5122
5123 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 6258 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
5124again: 6259again:
5125 lock_page(page); 6260 lock_page(page);
@@ -5129,13 +6264,13 @@ again:
5129 6264
5130 if ((page->mapping != inode->i_mapping) || 6265 if ((page->mapping != inode->i_mapping) ||
5131 (page_start >= size)) { 6266 (page_start >= size)) {
5132 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5133 /* page got truncated out from underneath us */ 6267 /* page got truncated out from underneath us */
5134 goto out_unlock; 6268 goto out_unlock;
5135 } 6269 }
5136 wait_on_page_writeback(page); 6270 wait_on_page_writeback(page);
5137 6271
5138 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 6272 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
6273 GFP_NOFS);
5139 set_page_extent_mapped(page); 6274 set_page_extent_mapped(page);
5140 6275
5141 /* 6276 /*
@@ -5144,7 +6279,8 @@ again:
5144 */ 6279 */
5145 ordered = btrfs_lookup_ordered_extent(inode, page_start); 6280 ordered = btrfs_lookup_ordered_extent(inode, page_start);
5146 if (ordered) { 6281 if (ordered) {
5147 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 6282 unlock_extent_cached(io_tree, page_start, page_end,
6283 &cached_state, GFP_NOFS);
5148 unlock_page(page); 6284 unlock_page(page);
5149 btrfs_start_ordered_extent(inode, ordered, 1); 6285 btrfs_start_ordered_extent(inode, ordered, 1);
5150 btrfs_put_ordered_extent(ordered); 6286 btrfs_put_ordered_extent(ordered);
@@ -5158,15 +6294,16 @@ again:
5158 * is probably a better way to do this, but for now keep consistent with 6294 * is probably a better way to do this, but for now keep consistent with
5159 * prepare_pages in the normal write path. 6295 * prepare_pages in the normal write path.
5160 */ 6296 */
5161 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 6297 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
5162 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 6298 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
5163 GFP_NOFS); 6299 0, 0, &cached_state, GFP_NOFS);
5164 6300
5165 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 6301 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
6302 &cached_state);
5166 if (ret) { 6303 if (ret) {
5167 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 6304 unlock_extent_cached(io_tree, page_start, page_end,
6305 &cached_state, GFP_NOFS);
5168 ret = VM_FAULT_SIGBUS; 6306 ret = VM_FAULT_SIGBUS;
5169 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5170 goto out_unlock; 6307 goto out_unlock;
5171 } 6308 }
5172 ret = 0; 6309 ret = 0;
@@ -5190,13 +6327,13 @@ again:
5190 BTRFS_I(inode)->last_trans = root->fs_info->generation; 6327 BTRFS_I(inode)->last_trans = root->fs_info->generation;
5191 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 6328 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
5192 6329
5193 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 6330 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5194 6331
5195out_unlock: 6332out_unlock:
5196 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
5197 if (!ret) 6333 if (!ret)
5198 return VM_FAULT_LOCKED; 6334 return VM_FAULT_LOCKED;
5199 unlock_page(page); 6335 unlock_page(page);
6336 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
5200out: 6337out:
5201 return ret; 6338 return ret;
5202} 6339}
@@ -5221,8 +6358,10 @@ static void btrfs_truncate(struct inode *inode)
5221 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6358 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5222 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6359 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5223 6360
5224 trans = btrfs_start_transaction(root, 1); 6361 trans = btrfs_start_transaction(root, 0);
6362 BUG_ON(IS_ERR(trans));
5225 btrfs_set_trans_block_group(trans, inode); 6363 btrfs_set_trans_block_group(trans, inode);
6364 trans->block_rsv = root->orphan_block_rsv;
5226 6365
5227 /* 6366 /*
5228 * setattr is responsible for setting the ordered_data_close flag, 6367 * setattr is responsible for setting the ordered_data_close flag,
@@ -5245,6 +6384,23 @@ static void btrfs_truncate(struct inode *inode)
5245 btrfs_add_ordered_operation(trans, root, inode); 6384 btrfs_add_ordered_operation(trans, root, inode);
5246 6385
5247 while (1) { 6386 while (1) {
6387 if (!trans) {
6388 trans = btrfs_start_transaction(root, 0);
6389 BUG_ON(IS_ERR(trans));
6390 btrfs_set_trans_block_group(trans, inode);
6391 trans->block_rsv = root->orphan_block_rsv;
6392 }
6393
6394 ret = btrfs_block_rsv_check(trans, root,
6395 root->orphan_block_rsv, 0, 5);
6396 if (ret) {
6397 BUG_ON(ret != -EAGAIN);
6398 ret = btrfs_commit_transaction(trans, root);
6399 BUG_ON(ret);
6400 trans = NULL;
6401 continue;
6402 }
6403
5248 ret = btrfs_truncate_inode_items(trans, root, inode, 6404 ret = btrfs_truncate_inode_items(trans, root, inode,
5249 inode->i_size, 6405 inode->i_size,
5250 BTRFS_EXTENT_DATA_KEY); 6406 BTRFS_EXTENT_DATA_KEY);
@@ -5256,10 +6412,8 @@ static void btrfs_truncate(struct inode *inode)
5256 6412
5257 nr = trans->blocks_used; 6413 nr = trans->blocks_used;
5258 btrfs_end_transaction(trans, root); 6414 btrfs_end_transaction(trans, root);
6415 trans = NULL;
5259 btrfs_btree_balance_dirty(root, nr); 6416 btrfs_btree_balance_dirty(root, nr);
5260
5261 trans = btrfs_start_transaction(root, 1);
5262 btrfs_set_trans_block_group(trans, inode);
5263 } 6417 }
5264 6418
5265 if (ret == 0 && inode->i_nlink > 0) { 6419 if (ret == 0 && inode->i_nlink > 0) {
@@ -5320,21 +6474,54 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
5320struct inode *btrfs_alloc_inode(struct super_block *sb) 6474struct inode *btrfs_alloc_inode(struct super_block *sb)
5321{ 6475{
5322 struct btrfs_inode *ei; 6476 struct btrfs_inode *ei;
6477 struct inode *inode;
5323 6478
5324 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); 6479 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
5325 if (!ei) 6480 if (!ei)
5326 return NULL; 6481 return NULL;
6482
6483 ei->root = NULL;
6484 ei->space_info = NULL;
6485 ei->generation = 0;
6486 ei->sequence = 0;
5327 ei->last_trans = 0; 6487 ei->last_trans = 0;
5328 ei->last_sub_trans = 0; 6488 ei->last_sub_trans = 0;
5329 ei->logged_trans = 0; 6489 ei->logged_trans = 0;
5330 ei->outstanding_extents = 0; 6490 ei->delalloc_bytes = 0;
5331 ei->reserved_extents = 0; 6491 ei->reserved_bytes = 0;
5332 ei->root = NULL; 6492 ei->disk_i_size = 0;
6493 ei->flags = 0;
6494 ei->index_cnt = (u64)-1;
6495 ei->last_unlink_trans = 0;
6496
5333 spin_lock_init(&ei->accounting_lock); 6497 spin_lock_init(&ei->accounting_lock);
6498 atomic_set(&ei->outstanding_extents, 0);
6499 ei->reserved_extents = 0;
6500
6501 ei->ordered_data_close = 0;
6502 ei->orphan_meta_reserved = 0;
6503 ei->dummy_inode = 0;
6504 ei->force_compress = BTRFS_COMPRESS_NONE;
6505
6506 inode = &ei->vfs_inode;
6507 extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
6508 extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS);
6509 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS);
6510 mutex_init(&ei->log_mutex);
5334 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6511 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
5335 INIT_LIST_HEAD(&ei->i_orphan); 6512 INIT_LIST_HEAD(&ei->i_orphan);
6513 INIT_LIST_HEAD(&ei->delalloc_inodes);
5336 INIT_LIST_HEAD(&ei->ordered_operations); 6514 INIT_LIST_HEAD(&ei->ordered_operations);
5337 return &ei->vfs_inode; 6515 RB_CLEAR_NODE(&ei->rb_node);
6516
6517 return inode;
6518}
6519
6520static void btrfs_i_callback(struct rcu_head *head)
6521{
6522 struct inode *inode = container_of(head, struct inode, i_rcu);
6523 INIT_LIST_HEAD(&inode->i_dentry);
6524 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
5338} 6525}
5339 6526
5340void btrfs_destroy_inode(struct inode *inode) 6527void btrfs_destroy_inode(struct inode *inode)
@@ -5344,6 +6531,8 @@ void btrfs_destroy_inode(struct inode *inode)
5344 6531
5345 WARN_ON(!list_empty(&inode->i_dentry)); 6532 WARN_ON(!list_empty(&inode->i_dentry));
5346 WARN_ON(inode->i_data.nrpages); 6533 WARN_ON(inode->i_data.nrpages);
6534 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
6535 WARN_ON(BTRFS_I(inode)->reserved_extents);
5347 6536
5348 /* 6537 /*
5349 * This can happen where we create an inode, but somebody else also 6538 * This can happen where we create an inode, but somebody else also
@@ -5364,13 +6553,28 @@ void btrfs_destroy_inode(struct inode *inode)
5364 spin_unlock(&root->fs_info->ordered_extent_lock); 6553 spin_unlock(&root->fs_info->ordered_extent_lock);
5365 } 6554 }
5366 6555
5367 spin_lock(&root->list_lock); 6556 if (root == root->fs_info->tree_root) {
6557 struct btrfs_block_group_cache *block_group;
6558
6559 block_group = btrfs_lookup_block_group(root->fs_info,
6560 BTRFS_I(inode)->block_group);
6561 if (block_group && block_group->inode == inode) {
6562 spin_lock(&block_group->lock);
6563 block_group->inode = NULL;
6564 spin_unlock(&block_group->lock);
6565 btrfs_put_block_group(block_group);
6566 } else if (block_group) {
6567 btrfs_put_block_group(block_group);
6568 }
6569 }
6570
6571 spin_lock(&root->orphan_lock);
5368 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6572 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5369 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6573 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5370 inode->i_ino); 6574 inode->i_ino);
5371 list_del_init(&BTRFS_I(inode)->i_orphan); 6575 list_del_init(&BTRFS_I(inode)->i_orphan);
5372 } 6576 }
5373 spin_unlock(&root->list_lock); 6577 spin_unlock(&root->orphan_lock);
5374 6578
5375 while (1) { 6579 while (1) {
5376 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 6580 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -5389,17 +6593,18 @@ void btrfs_destroy_inode(struct inode *inode)
5389 inode_tree_del(inode); 6593 inode_tree_del(inode);
5390 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6594 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
5391free: 6595free:
5392 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6596 call_rcu(&inode->i_rcu, btrfs_i_callback);
5393} 6597}
5394 6598
5395void btrfs_drop_inode(struct inode *inode) 6599int btrfs_drop_inode(struct inode *inode)
5396{ 6600{
5397 struct btrfs_root *root = BTRFS_I(inode)->root; 6601 struct btrfs_root *root = BTRFS_I(inode)->root;
5398 6602
5399 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 6603 if (btrfs_root_refs(&root->root_item) == 0 &&
5400 generic_delete_inode(inode); 6604 root != root->fs_info->tree_root)
6605 return 1;
5401 else 6606 else
5402 generic_drop_inode(inode); 6607 return generic_drop_inode(inode);
5403} 6608}
5404 6609
5405static void init_once(void *foo) 6610static void init_once(void *foo)
@@ -5492,19 +6697,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5492 if (S_ISDIR(old_inode->i_mode) && new_inode && 6697 if (S_ISDIR(old_inode->i_mode) && new_inode &&
5493 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) 6698 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
5494 return -ENOTEMPTY; 6699 return -ENOTEMPTY;
5495
5496 /*
5497 * We want to reserve the absolute worst case amount of items. So if
5498 * both inodes are subvols and we need to unlink them then that would
5499 * require 4 item modifications, but if they are both normal inodes it
5500 * would require 5 item modifications, so we'll assume their normal
5501 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
5502 * should cover the worst case number of items we'll modify.
5503 */
5504 ret = btrfs_reserve_metadata_space(root, 11);
5505 if (ret)
5506 return ret;
5507
5508 /* 6700 /*
5509 * we're using rename to replace one file with another. 6701 * we're using rename to replace one file with another.
5510 * and the replacement file is large. Start IO on it now so 6702 * and the replacement file is large. Start IO on it now so
@@ -5517,8 +6709,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5517 /* close the racy window with snapshot create/destroy ioctl */ 6709 /* close the racy window with snapshot create/destroy ioctl */
5518 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6710 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5519 down_read(&root->fs_info->subvol_sem); 6711 down_read(&root->fs_info->subvol_sem);
6712 /*
6713 * We want to reserve the absolute worst case amount of items. So if
6714 * both inodes are subvols and we need to unlink them then that would
6715 * require 4 item modifications, but if they are both normal inodes it
6716 * would require 5 item modifications, so we'll assume their normal
6717 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
6718 * should cover the worst case number of items we'll modify.
6719 */
6720 trans = btrfs_start_transaction(root, 20);
6721 if (IS_ERR(trans))
6722 return PTR_ERR(trans);
5520 6723
5521 trans = btrfs_start_transaction(root, 1);
5522 btrfs_set_trans_block_group(trans, new_dir); 6724 btrfs_set_trans_block_group(trans, new_dir);
5523 6725
5524 if (dest != root) 6726 if (dest != root)
@@ -5607,8 +6809,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5607 BUG_ON(ret); 6809 BUG_ON(ret);
5608 6810
5609 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 6811 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
5610 btrfs_log_new_name(trans, old_inode, old_dir, 6812 struct dentry *parent = dget_parent(new_dentry);
5611 new_dentry->d_parent); 6813 btrfs_log_new_name(trans, old_inode, old_dir, parent);
6814 dput(parent);
5612 btrfs_end_log_trans(root); 6815 btrfs_end_log_trans(root);
5613 } 6816 }
5614out_fail: 6817out_fail:
@@ -5617,7 +6820,6 @@ out_fail:
5617 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6820 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5618 up_read(&root->fs_info->subvol_sem); 6821 up_read(&root->fs_info->subvol_sem);
5619 6822
5620 btrfs_unreserve_metadata_space(root, 11);
5621 return ret; 6823 return ret;
5622} 6824}
5623 6825
@@ -5669,6 +6871,58 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5669 return 0; 6871 return 0;
5670} 6872}
5671 6873
6874int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
6875 int sync)
6876{
6877 struct btrfs_inode *binode;
6878 struct inode *inode = NULL;
6879
6880 spin_lock(&root->fs_info->delalloc_lock);
6881 while (!list_empty(&root->fs_info->delalloc_inodes)) {
6882 binode = list_entry(root->fs_info->delalloc_inodes.next,
6883 struct btrfs_inode, delalloc_inodes);
6884 inode = igrab(&binode->vfs_inode);
6885 if (inode) {
6886 list_move_tail(&binode->delalloc_inodes,
6887 &root->fs_info->delalloc_inodes);
6888 break;
6889 }
6890
6891 list_del_init(&binode->delalloc_inodes);
6892 cond_resched_lock(&root->fs_info->delalloc_lock);
6893 }
6894 spin_unlock(&root->fs_info->delalloc_lock);
6895
6896 if (inode) {
6897 if (sync) {
6898 filemap_write_and_wait(inode->i_mapping);
6899 /*
6900 * We have to do this because compression doesn't
6901 * actually set PG_writeback until it submits the pages
6902 * for IO, which happens in an async thread, so we could
6903 * race and not actually wait for any writeback pages
6904 * because they've not been submitted yet. Technically
6905 * this could still be the case for the ordered stuff
6906 * since the async thread may not have started to do its
6907 * work yet. If this becomes the case then we need to
6908 * figure out a way to make sure that in writepage we
6909 * wait for any async pages to be submitted before
6910 * returning so that fdatawait does what its supposed to
6911 * do.
6912 */
6913 btrfs_wait_ordered_range(inode, 0, (u64)-1);
6914 } else {
6915 filemap_flush(inode->i_mapping);
6916 }
6917 if (delay_iput)
6918 btrfs_add_delayed_iput(inode);
6919 else
6920 iput(inode);
6921 return 1;
6922 }
6923 return 0;
6924}
6925
5672static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 6926static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5673 const char *symname) 6927 const char *symname)
5674{ 6928{
@@ -5692,29 +6946,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5692 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 6946 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
5693 return -ENAMETOOLONG; 6947 return -ENAMETOOLONG;
5694 6948
6949 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
6950 if (err)
6951 return err;
5695 /* 6952 /*
5696 * 2 items for inode item and ref 6953 * 2 items for inode item and ref
5697 * 2 items for dir items 6954 * 2 items for dir items
5698 * 1 item for xattr if selinux is on 6955 * 1 item for xattr if selinux is on
5699 */ 6956 */
5700 err = btrfs_reserve_metadata_space(root, 5); 6957 trans = btrfs_start_transaction(root, 5);
5701 if (err) 6958 if (IS_ERR(trans))
5702 return err; 6959 return PTR_ERR(trans);
5703 6960
5704 trans = btrfs_start_transaction(root, 1);
5705 if (!trans)
5706 goto out_fail;
5707 btrfs_set_trans_block_group(trans, dir); 6961 btrfs_set_trans_block_group(trans, dir);
5708 6962
5709 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
5710 if (err) {
5711 err = -ENOSPC;
5712 goto out_unlock;
5713 }
5714
5715 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 6963 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
5716 dentry->d_name.len, 6964 dentry->d_name.len, dir->i_ino, objectid,
5717 dentry->d_parent->d_inode->i_ino, objectid,
5718 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 6965 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
5719 &index); 6966 &index);
5720 err = PTR_ERR(inode); 6967 err = PTR_ERR(inode);
@@ -5728,7 +6975,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5728 } 6975 }
5729 6976
5730 btrfs_set_trans_block_group(trans, inode); 6977 btrfs_set_trans_block_group(trans, inode);
5731 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 6978 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
5732 if (err) 6979 if (err)
5733 drop_inode = 1; 6980 drop_inode = 1;
5734 else { 6981 else {
@@ -5783,8 +7030,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5783out_unlock: 7030out_unlock:
5784 nr = trans->blocks_used; 7031 nr = trans->blocks_used;
5785 btrfs_end_transaction_throttle(trans, root); 7032 btrfs_end_transaction_throttle(trans, root);
5786out_fail:
5787 btrfs_unreserve_metadata_space(root, 5);
5788 if (drop_inode) { 7033 if (drop_inode) {
5789 inode_dec_link_count(inode); 7034 inode_dec_link_count(inode);
5790 iput(inode); 7035 iput(inode);
@@ -5793,36 +7038,35 @@ out_fail:
5793 return err; 7038 return err;
5794} 7039}
5795 7040
5796static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 7041static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
5797 u64 alloc_hint, int mode, loff_t actual_len) 7042 u64 start, u64 num_bytes, u64 min_size,
7043 loff_t actual_len, u64 *alloc_hint,
7044 struct btrfs_trans_handle *trans)
5798{ 7045{
5799 struct btrfs_trans_handle *trans;
5800 struct btrfs_root *root = BTRFS_I(inode)->root; 7046 struct btrfs_root *root = BTRFS_I(inode)->root;
5801 struct btrfs_key ins; 7047 struct btrfs_key ins;
5802 u64 alloc_size;
5803 u64 cur_offset = start; 7048 u64 cur_offset = start;
5804 u64 num_bytes = end - start;
5805 int ret = 0;
5806 u64 i_size; 7049 u64 i_size;
7050 int ret = 0;
7051 bool own_trans = true;
5807 7052
7053 if (trans)
7054 own_trans = false;
5808 while (num_bytes > 0) { 7055 while (num_bytes > 0) {
5809 alloc_size = min(num_bytes, root->fs_info->max_extent); 7056 if (own_trans) {
5810 7057 trans = btrfs_start_transaction(root, 3);
5811 trans = btrfs_start_transaction(root, 1); 7058 if (IS_ERR(trans)) {
5812 7059 ret = PTR_ERR(trans);
5813 ret = btrfs_reserve_extent(trans, root, alloc_size, 7060 break;
5814 root->sectorsize, 0, alloc_hint, 7061 }
5815 (u64)-1, &ins, 1);
5816 if (ret) {
5817 WARN_ON(1);
5818 goto stop_trans;
5819 } 7062 }
5820 7063
5821 ret = btrfs_reserve_metadata_space(root, 3); 7064 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
7065 0, *alloc_hint, (u64)-1, &ins, 1);
5822 if (ret) { 7066 if (ret) {
5823 btrfs_free_reserved_extent(root, ins.objectid, 7067 if (own_trans)
5824 ins.offset); 7068 btrfs_end_transaction(trans, root);
5825 goto stop_trans; 7069 break;
5826 } 7070 }
5827 7071
5828 ret = insert_reserved_file_extent(trans, inode, 7072 ret = insert_reserved_file_extent(trans, inode,
@@ -5836,14 +7080,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5836 7080
5837 num_bytes -= ins.offset; 7081 num_bytes -= ins.offset;
5838 cur_offset += ins.offset; 7082 cur_offset += ins.offset;
5839 alloc_hint = ins.objectid + ins.offset; 7083 *alloc_hint = ins.objectid + ins.offset;
5840 7084
5841 inode->i_ctime = CURRENT_TIME; 7085 inode->i_ctime = CURRENT_TIME;
5842 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 7086 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5843 if (!(mode & FALLOC_FL_KEEP_SIZE) && 7087 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5844 cur_offset > inode->i_size) { 7088 (actual_len > inode->i_size) &&
7089 (cur_offset > inode->i_size)) {
5845 if (cur_offset > actual_len) 7090 if (cur_offset > actual_len)
5846 i_size = actual_len; 7091 i_size = actual_len;
5847 else 7092 else
5848 i_size = cur_offset; 7093 i_size = cur_offset;
5849 i_size_write(inode, i_size); 7094 i_size_write(inode, i_size);
@@ -5853,117 +7098,28 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5853 ret = btrfs_update_inode(trans, root, inode); 7098 ret = btrfs_update_inode(trans, root, inode);
5854 BUG_ON(ret); 7099 BUG_ON(ret);
5855 7100
5856 btrfs_end_transaction(trans, root); 7101 if (own_trans)
5857 btrfs_unreserve_metadata_space(root, 3); 7102 btrfs_end_transaction(trans, root);
5858 } 7103 }
5859 return ret; 7104 return ret;
5860
5861stop_trans:
5862 btrfs_end_transaction(trans, root);
5863 return ret;
5864
5865} 7105}
5866 7106
5867static long btrfs_fallocate(struct inode *inode, int mode, 7107int btrfs_prealloc_file_range(struct inode *inode, int mode,
5868 loff_t offset, loff_t len) 7108 u64 start, u64 num_bytes, u64 min_size,
7109 loff_t actual_len, u64 *alloc_hint)
5869{ 7110{
5870 u64 cur_offset; 7111 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
5871 u64 last_byte; 7112 min_size, actual_len, alloc_hint,
5872 u64 alloc_start; 7113 NULL);
5873 u64 alloc_end; 7114}
5874 u64 alloc_hint = 0;
5875 u64 locked_end;
5876 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5877 struct extent_map *em;
5878 int ret;
5879
5880 alloc_start = offset & ~mask;
5881 alloc_end = (offset + len + mask) & ~mask;
5882
5883 /*
5884 * wait for ordered IO before we have any locks. We'll loop again
5885 * below with the locks held.
5886 */
5887 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
5888
5889 mutex_lock(&inode->i_mutex);
5890 if (alloc_start > inode->i_size) {
5891 ret = btrfs_cont_expand(inode, alloc_start);
5892 if (ret)
5893 goto out;
5894 }
5895
5896 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
5897 alloc_end - alloc_start);
5898 if (ret)
5899 goto out;
5900
5901 locked_end = alloc_end - 1;
5902 while (1) {
5903 struct btrfs_ordered_extent *ordered;
5904
5905 /* the extent lock is ordered inside the running
5906 * transaction
5907 */
5908 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5909 GFP_NOFS);
5910 ordered = btrfs_lookup_first_ordered_extent(inode,
5911 alloc_end - 1);
5912 if (ordered &&
5913 ordered->file_offset + ordered->len > alloc_start &&
5914 ordered->file_offset < alloc_end) {
5915 btrfs_put_ordered_extent(ordered);
5916 unlock_extent(&BTRFS_I(inode)->io_tree,
5917 alloc_start, locked_end, GFP_NOFS);
5918 /*
5919 * we can't wait on the range with the transaction
5920 * running or with the extent lock held
5921 */
5922 btrfs_wait_ordered_range(inode, alloc_start,
5923 alloc_end - alloc_start);
5924 } else {
5925 if (ordered)
5926 btrfs_put_ordered_extent(ordered);
5927 break;
5928 }
5929 }
5930
5931 cur_offset = alloc_start;
5932 while (1) {
5933 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
5934 alloc_end - cur_offset, 0);
5935 BUG_ON(IS_ERR(em) || !em);
5936 last_byte = min(extent_map_end(em), alloc_end);
5937 last_byte = (last_byte + mask) & ~mask;
5938 if (em->block_start == EXTENT_MAP_HOLE ||
5939 (cur_offset >= inode->i_size &&
5940 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5941 ret = prealloc_file_range(inode,
5942 cur_offset, last_byte,
5943 alloc_hint, mode, offset+len);
5944 if (ret < 0) {
5945 free_extent_map(em);
5946 break;
5947 }
5948 }
5949 if (em->block_start <= EXTENT_MAP_LAST_BYTE)
5950 alloc_hint = em->block_start;
5951 free_extent_map(em);
5952
5953 cur_offset = last_byte;
5954 if (cur_offset >= alloc_end) {
5955 ret = 0;
5956 break;
5957 }
5958 }
5959 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5960 GFP_NOFS);
5961 7115
5962 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, 7116int btrfs_prealloc_file_range_trans(struct inode *inode,
5963 alloc_end - alloc_start); 7117 struct btrfs_trans_handle *trans, int mode,
5964out: 7118 u64 start, u64 num_bytes, u64 min_size,
5965 mutex_unlock(&inode->i_mutex); 7119 loff_t actual_len, u64 *alloc_hint)
5966 return ret; 7120{
7121 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
7122 min_size, actual_len, alloc_hint, trans);
5967} 7123}
5968 7124
5969static int btrfs_set_page_dirty(struct page *page) 7125static int btrfs_set_page_dirty(struct page *page)
@@ -5971,11 +7127,15 @@ static int btrfs_set_page_dirty(struct page *page)
5971 return __set_page_dirty_nobuffers(page); 7127 return __set_page_dirty_nobuffers(page);
5972} 7128}
5973 7129
5974static int btrfs_permission(struct inode *inode, int mask) 7130static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
5975{ 7131{
7132 struct btrfs_root *root = BTRFS_I(inode)->root;
7133
7134 if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
7135 return -EROFS;
5976 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7136 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
5977 return -EACCES; 7137 return -EACCES;
5978 return generic_permission(inode, mask, btrfs_check_acl); 7138 return generic_permission(inode, mask, flags, btrfs_check_acl);
5979} 7139}
5980 7140
5981static const struct inode_operations btrfs_dir_inode_operations = { 7141static const struct inode_operations btrfs_dir_inode_operations = {
@@ -6068,7 +7228,6 @@ static const struct inode_operations btrfs_file_inode_operations = {
6068 .listxattr = btrfs_listxattr, 7228 .listxattr = btrfs_listxattr,
6069 .removexattr = btrfs_removexattr, 7229 .removexattr = btrfs_removexattr,
6070 .permission = btrfs_permission, 7230 .permission = btrfs_permission,
6071 .fallocate = btrfs_fallocate,
6072 .fiemap = btrfs_fiemap, 7231 .fiemap = btrfs_fiemap,
6073}; 7232};
6074static const struct inode_operations btrfs_special_inode_operations = { 7233static const struct inode_operations btrfs_special_inode_operations = {
@@ -6084,6 +7243,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
6084 .readlink = generic_readlink, 7243 .readlink = generic_readlink,
6085 .follow_link = page_follow_link_light, 7244 .follow_link = page_follow_link_light,
6086 .put_link = page_put_link, 7245 .put_link = page_put_link,
7246 .getattr = btrfs_getattr,
6087 .permission = btrfs_permission, 7247 .permission = btrfs_permission,
6088 .setxattr = btrfs_setxattr, 7248 .setxattr = btrfs_setxattr,
6089 .getxattr = btrfs_getxattr, 7249 .getxattr = btrfs_getxattr,