aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2013-02-20 14:05:45 -0500
committerChris Mason <chris.mason@fusionio.com>2013-02-20 14:05:45 -0500
commitb2c6b3e0611c58fbeb6b9c0892b6249f7bdfaf6b (patch)
treede7cf0825605aa6acf33a8d107003efd7aedbe72 /fs/btrfs/inode.c
parent19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff)
parent272d26d0ad8c0e326689f2fa3cdc6a5fcc8e74e0 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next into for-linus-3.9
Signed-off-by: Chris Mason <chris.mason@fusionio.com> Conflicts: fs/btrfs/disk-io.c
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c321
1 files changed, 204 insertions, 117 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cc93b23ca352..1aa98be54ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,12 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h> 41#include <linux/mount.h>
42#include <linux/btrfs.h>
42#include "compat.h" 43#include "compat.h"
43#include "ctree.h" 44#include "ctree.h"
44#include "disk-io.h" 45#include "disk-io.h"
45#include "transaction.h" 46#include "transaction.h"
46#include "btrfs_inode.h" 47#include "btrfs_inode.h"
47#include "ioctl.h"
48#include "print-tree.h" 48#include "print-tree.h"
49#include "ordered-data.h" 49#include "ordered-data.h"
50#include "xattr.h" 50#include "xattr.h"
@@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
608 if (list_empty(&async_cow->extents)) 608 if (list_empty(&async_cow->extents))
609 return 0; 609 return 0;
610 610
611 611again:
612 while (!list_empty(&async_cow->extents)) { 612 while (!list_empty(&async_cow->extents)) {
613 async_extent = list_entry(async_cow->extents.next, 613 async_extent = list_entry(async_cow->extents.next,
614 struct async_extent, list); 614 struct async_extent, list);
@@ -648,6 +648,8 @@ retry:
648 async_extent->ram_size - 1, 648 async_extent->ram_size - 1,
649 btrfs_get_extent, 649 btrfs_get_extent,
650 WB_SYNC_ALL); 650 WB_SYNC_ALL);
651 else if (ret)
652 unlock_page(async_cow->locked_page);
651 kfree(async_extent); 653 kfree(async_extent);
652 cond_resched(); 654 cond_resched();
653 continue; 655 continue;
@@ -672,6 +674,7 @@ retry:
672 674
673 if (ret) { 675 if (ret) {
674 int i; 676 int i;
677
675 for (i = 0; i < async_extent->nr_pages; i++) { 678 for (i = 0; i < async_extent->nr_pages; i++) {
676 WARN_ON(async_extent->pages[i]->mapping); 679 WARN_ON(async_extent->pages[i]->mapping);
677 page_cache_release(async_extent->pages[i]); 680 page_cache_release(async_extent->pages[i]);
@@ -679,12 +682,10 @@ retry:
679 kfree(async_extent->pages); 682 kfree(async_extent->pages);
680 async_extent->nr_pages = 0; 683 async_extent->nr_pages = 0;
681 async_extent->pages = NULL; 684 async_extent->pages = NULL;
682 unlock_extent(io_tree, async_extent->start, 685
683 async_extent->start +
684 async_extent->ram_size - 1);
685 if (ret == -ENOSPC) 686 if (ret == -ENOSPC)
686 goto retry; 687 goto retry;
687 goto out_free; /* JDM: Requeue? */ 688 goto out_free;
688 } 689 }
689 690
690 /* 691 /*
@@ -696,10 +697,13 @@ retry:
696 async_extent->ram_size - 1, 0); 697 async_extent->ram_size - 1, 0);
697 698
698 em = alloc_extent_map(); 699 em = alloc_extent_map();
699 BUG_ON(!em); /* -ENOMEM */ 700 if (!em)
701 goto out_free_reserve;
700 em->start = async_extent->start; 702 em->start = async_extent->start;
701 em->len = async_extent->ram_size; 703 em->len = async_extent->ram_size;
702 em->orig_start = em->start; 704 em->orig_start = em->start;
705 em->mod_start = em->start;
706 em->mod_len = em->len;
703 707
704 em->block_start = ins.objectid; 708 em->block_start = ins.objectid;
705 em->block_len = ins.offset; 709 em->block_len = ins.offset;
@@ -726,6 +730,9 @@ retry:
726 async_extent->ram_size - 1, 0); 730 async_extent->ram_size - 1, 0);
727 } 731 }
728 732
733 if (ret)
734 goto out_free_reserve;
735
729 ret = btrfs_add_ordered_extent_compress(inode, 736 ret = btrfs_add_ordered_extent_compress(inode,
730 async_extent->start, 737 async_extent->start,
731 ins.objectid, 738 ins.objectid,
@@ -733,7 +740,8 @@ retry:
733 ins.offset, 740 ins.offset,
734 BTRFS_ORDERED_COMPRESSED, 741 BTRFS_ORDERED_COMPRESSED,
735 async_extent->compress_type); 742 async_extent->compress_type);
736 BUG_ON(ret); /* -ENOMEM */ 743 if (ret)
744 goto out_free_reserve;
737 745
738 /* 746 /*
739 * clear dirty, set writeback and unlock the pages. 747 * clear dirty, set writeback and unlock the pages.
@@ -754,18 +762,30 @@ retry:
754 ins.objectid, 762 ins.objectid,
755 ins.offset, async_extent->pages, 763 ins.offset, async_extent->pages,
756 async_extent->nr_pages); 764 async_extent->nr_pages);
757
758 BUG_ON(ret); /* -ENOMEM */
759 alloc_hint = ins.objectid + ins.offset; 765 alloc_hint = ins.objectid + ins.offset;
760 kfree(async_extent); 766 kfree(async_extent);
767 if (ret)
768 goto out;
761 cond_resched(); 769 cond_resched();
762 } 770 }
763 ret = 0; 771 ret = 0;
764out: 772out:
765 return ret; 773 return ret;
774out_free_reserve:
775 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
766out_free: 776out_free:
777 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
778 async_extent->start,
779 async_extent->start +
780 async_extent->ram_size - 1,
781 NULL, EXTENT_CLEAR_UNLOCK_PAGE |
782 EXTENT_CLEAR_UNLOCK |
783 EXTENT_CLEAR_DELALLOC |
784 EXTENT_CLEAR_DIRTY |
785 EXTENT_SET_WRITEBACK |
786 EXTENT_END_WRITEBACK);
767 kfree(async_extent); 787 kfree(async_extent);
768 goto out; 788 goto again;
769} 789}
770 790
771static u64 get_extent_allocation_hint(struct inode *inode, u64 start, 791static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
892 em->orig_start = em->start; 912 em->orig_start = em->start;
893 ram_size = ins.offset; 913 ram_size = ins.offset;
894 em->len = ins.offset; 914 em->len = ins.offset;
915 em->mod_start = em->start;
916 em->mod_len = em->len;
895 917
896 em->block_start = ins.objectid; 918 em->block_start = ins.objectid;
897 em->block_len = ins.offset; 919 em->block_len = ins.offset;
@@ -1338,6 +1360,8 @@ out_check:
1338 em->block_start = disk_bytenr; 1360 em->block_start = disk_bytenr;
1339 em->orig_block_len = disk_num_bytes; 1361 em->orig_block_len = disk_num_bytes;
1340 em->bdev = root->fs_info->fs_devices->latest_bdev; 1362 em->bdev = root->fs_info->fs_devices->latest_bdev;
1363 em->mod_start = em->start;
1364 em->mod_len = em->len;
1341 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1365 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1342 set_bit(EXTENT_FLAG_FILLING, &em->flags); 1366 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1343 em->generation = -1; 1367 em->generation = -1;
@@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
1508 spin_unlock(&BTRFS_I(inode)->lock); 1532 spin_unlock(&BTRFS_I(inode)->lock);
1509 } 1533 }
1510 1534
1511 spin_lock(&root->fs_info->delalloc_lock); 1535 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1536 root->fs_info->delalloc_batch);
1537 spin_lock(&BTRFS_I(inode)->lock);
1512 BTRFS_I(inode)->delalloc_bytes += len; 1538 BTRFS_I(inode)->delalloc_bytes += len;
1513 root->fs_info->delalloc_bytes += len; 1539 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1514 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1540 &BTRFS_I(inode)->runtime_flags)) {
1515 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1541 spin_lock(&root->fs_info->delalloc_lock);
1516 &root->fs_info->delalloc_inodes); 1542 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1543 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1544 &root->fs_info->delalloc_inodes);
1545 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1546 &BTRFS_I(inode)->runtime_flags);
1547 }
1548 spin_unlock(&root->fs_info->delalloc_lock);
1517 } 1549 }
1518 spin_unlock(&root->fs_info->delalloc_lock); 1550 spin_unlock(&BTRFS_I(inode)->lock);
1519 } 1551 }
1520} 1552}
1521 1553
@@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1550 && do_list) 1582 && do_list)
1551 btrfs_free_reserved_data_space(inode, len); 1583 btrfs_free_reserved_data_space(inode, len);
1552 1584
1553 spin_lock(&root->fs_info->delalloc_lock); 1585 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1554 root->fs_info->delalloc_bytes -= len; 1586 root->fs_info->delalloc_batch);
1587 spin_lock(&BTRFS_I(inode)->lock);
1555 BTRFS_I(inode)->delalloc_bytes -= len; 1588 BTRFS_I(inode)->delalloc_bytes -= len;
1556
1557 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && 1589 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1558 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1590 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1559 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1591 &BTRFS_I(inode)->runtime_flags)) {
1592 spin_lock(&root->fs_info->delalloc_lock);
1593 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1594 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1595 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1596 &BTRFS_I(inode)->runtime_flags);
1597 }
1598 spin_unlock(&root->fs_info->delalloc_lock);
1560 } 1599 }
1561 spin_unlock(&root->fs_info->delalloc_lock); 1600 spin_unlock(&BTRFS_I(inode)->lock);
1562 } 1601 }
1563} 1602}
1564 1603
@@ -2001,11 +2040,23 @@ out:
2001 if (trans) 2040 if (trans)
2002 btrfs_end_transaction(trans, root); 2041 btrfs_end_transaction(trans, root);
2003 2042
2004 if (ret) 2043 if (ret) {
2005 clear_extent_uptodate(io_tree, ordered_extent->file_offset, 2044 clear_extent_uptodate(io_tree, ordered_extent->file_offset,
2006 ordered_extent->file_offset + 2045 ordered_extent->file_offset +
2007 ordered_extent->len - 1, NULL, GFP_NOFS); 2046 ordered_extent->len - 1, NULL, GFP_NOFS);
2008 2047
2048 /*
2049 * If the ordered extent had an IOERR or something else went
2050 * wrong we need to return the space for this ordered extent
2051 * back to the allocator.
2052 */
2053 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2054 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2055 btrfs_free_reserved_extent(root, ordered_extent->start,
2056 ordered_extent->disk_len);
2057 }
2058
2059
2009 /* 2060 /*
2010 * This needs to be done to make sure anybody waiting knows we are done 2061 * This needs to be done to make sure anybody waiting knows we are done
2011 * updating everything for this ordered extent. 2062 * updating everything for this ordered extent.
@@ -2062,7 +2113,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2062static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2113static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2063 struct extent_state *state, int mirror) 2114 struct extent_state *state, int mirror)
2064{ 2115{
2065 size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); 2116 size_t offset = start - page_offset(page);
2066 struct inode *inode = page->mapping->host; 2117 struct inode *inode = page->mapping->host;
2067 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2118 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2068 char *kaddr; 2119 char *kaddr;
@@ -2167,11 +2218,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2167 } 2218 }
2168} 2219}
2169 2220
2170enum btrfs_orphan_cleanup_state {
2171 ORPHAN_CLEANUP_STARTED = 1,
2172 ORPHAN_CLEANUP_DONE = 2,
2173};
2174
2175/* 2221/*
2176 * This is called in transaction commit time. If there are no orphan 2222 * This is called in transaction commit time. If there are no orphan
2177 * files in the subvolume, it removes orphan item and frees block_rsv 2223 * files in the subvolume, it removes orphan item and frees block_rsv
@@ -2469,6 +2515,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2469 */ 2515 */
2470 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 2516 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2471 &BTRFS_I(inode)->runtime_flags); 2517 &BTRFS_I(inode)->runtime_flags);
2518 atomic_inc(&root->orphan_inodes);
2472 2519
2473 /* if we have links, this was a truncate, lets do that */ 2520 /* if we have links, this was a truncate, lets do that */
2474 if (inode->i_nlink) { 2521 if (inode->i_nlink) {
@@ -2491,6 +2538,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2491 goto out; 2538 goto out;
2492 2539
2493 ret = btrfs_truncate(inode); 2540 ret = btrfs_truncate(inode);
2541 if (ret)
2542 btrfs_orphan_del(NULL, inode);
2494 } else { 2543 } else {
2495 nr_unlink++; 2544 nr_unlink++;
2496 } 2545 }
@@ -2709,34 +2758,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2709 struct btrfs_inode_item *item, 2758 struct btrfs_inode_item *item,
2710 struct inode *inode) 2759 struct inode *inode)
2711{ 2760{
2712 btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); 2761 struct btrfs_map_token token;
2713 btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); 2762
2714 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2763 btrfs_init_map_token(&token);
2715 btrfs_set_inode_mode(leaf, item, inode->i_mode);
2716 btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
2717 2764
2718 btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), 2765 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
2719 inode->i_atime.tv_sec); 2766 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
2720 btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), 2767 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
2721 inode->i_atime.tv_nsec); 2768 &token);
2769 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
2770 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
2722 2771
2723 btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), 2772 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
2724 inode->i_mtime.tv_sec); 2773 inode->i_atime.tv_sec, &token);
2725 btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), 2774 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
2726 inode->i_mtime.tv_nsec); 2775 inode->i_atime.tv_nsec, &token);
2727 2776
2728 btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), 2777 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
2729 inode->i_ctime.tv_sec); 2778 inode->i_mtime.tv_sec, &token);
2730 btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), 2779 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
2731 inode->i_ctime.tv_nsec); 2780 inode->i_mtime.tv_nsec, &token);
2732 2781
2733 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); 2782 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
2734 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); 2783 inode->i_ctime.tv_sec, &token);
2735 btrfs_set_inode_sequence(leaf, item, inode->i_version); 2784 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
2736 btrfs_set_inode_transid(leaf, item, trans->transid); 2785 inode->i_ctime.tv_nsec, &token);
2737 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2786
2738 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2787 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
2739 btrfs_set_inode_block_group(leaf, item, 0); 2788 &token);
2789 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
2790 &token);
2791 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
2792 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
2793 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
2794 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
2795 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
2740} 2796}
2741 2797
2742/* 2798/*
@@ -3832,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3832 3888
3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3889 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3834 truncate_setsize(inode, newsize); 3890 truncate_setsize(inode, newsize);
3891
3892 /* Disable nonlocked read DIO to avoid the end less truncate */
3893 btrfs_inode_block_unlocked_dio(inode);
3894 inode_dio_wait(inode);
3895 btrfs_inode_resume_unlocked_dio(inode);
3896
3835 ret = btrfs_truncate(inode); 3897 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink) 3898 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode); 3899 btrfs_orphan_del(NULL, inode);
@@ -3904,6 +3966,12 @@ void btrfs_evict_inode(struct inode *inode)
3904 goto no_delete; 3966 goto no_delete;
3905 } 3967 }
3906 3968
3969 ret = btrfs_commit_inode_delayed_inode(inode);
3970 if (ret) {
3971 btrfs_orphan_del(NULL, inode);
3972 goto no_delete;
3973 }
3974
3907 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); 3975 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
3908 if (!rsv) { 3976 if (!rsv) {
3909 btrfs_orphan_del(NULL, inode); 3977 btrfs_orphan_del(NULL, inode);
@@ -3941,7 +4009,7 @@ void btrfs_evict_inode(struct inode *inode)
3941 goto no_delete; 4009 goto no_delete;
3942 } 4010 }
3943 4011
3944 trans = btrfs_start_transaction_lflush(root, 1); 4012 trans = btrfs_join_transaction(root);
3945 if (IS_ERR(trans)) { 4013 if (IS_ERR(trans)) {
3946 btrfs_orphan_del(NULL, inode); 4014 btrfs_orphan_del(NULL, inode);
3947 btrfs_free_block_rsv(root, rsv); 4015 btrfs_free_block_rsv(root, rsv);
@@ -3955,9 +4023,6 @@ void btrfs_evict_inode(struct inode *inode)
3955 break; 4023 break;
3956 4024
3957 trans->block_rsv = &root->fs_info->trans_block_rsv; 4025 trans->block_rsv = &root->fs_info->trans_block_rsv;
3958 ret = btrfs_update_inode(trans, root, inode);
3959 BUG_ON(ret);
3960
3961 btrfs_end_transaction(trans, root); 4026 btrfs_end_transaction(trans, root);
3962 trans = NULL; 4027 trans = NULL;
3963 btrfs_btree_balance_dirty(root); 4028 btrfs_btree_balance_dirty(root);
@@ -5006,12 +5071,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
5006 goto out_unlock; 5071 goto out_unlock;
5007 } 5072 }
5008 5073
5009 err = btrfs_update_inode(trans, root, inode);
5010 if (err) {
5011 drop_inode = 1;
5012 goto out_unlock;
5013 }
5014
5015 /* 5074 /*
5016 * If the active LSM wants to access the inode during 5075 * If the active LSM wants to access the inode during
5017 * d_instantiate it needs these. Smack checks to see 5076 * d_instantiate it needs these. Smack checks to see
@@ -5949,6 +6008,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
5949 6008
5950 em->start = start; 6009 em->start = start;
5951 em->orig_start = orig_start; 6010 em->orig_start = orig_start;
6011 em->mod_start = start;
6012 em->mod_len = len;
5952 em->len = len; 6013 em->len = len;
5953 em->block_len = block_len; 6014 em->block_len = block_len;
5954 em->block_start = block_start; 6015 em->block_start = block_start;
@@ -5990,16 +6051,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5990 u64 len = bh_result->b_size; 6051 u64 len = bh_result->b_size;
5991 struct btrfs_trans_handle *trans; 6052 struct btrfs_trans_handle *trans;
5992 int unlock_bits = EXTENT_LOCKED; 6053 int unlock_bits = EXTENT_LOCKED;
5993 int ret; 6054 int ret = 0;
5994 6055
5995 if (create) { 6056 if (create) {
5996 ret = btrfs_delalloc_reserve_space(inode, len); 6057 spin_lock(&BTRFS_I(inode)->lock);
5997 if (ret) 6058 BTRFS_I(inode)->outstanding_extents++;
5998 return ret; 6059 spin_unlock(&BTRFS_I(inode)->lock);
5999 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 6060 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
6000 } else { 6061 } else
6001 len = min_t(u64, len, root->sectorsize); 6062 len = min_t(u64, len, root->sectorsize);
6002 }
6003 6063
6004 lockstart = start; 6064 lockstart = start;
6005 lockend = start + len - 1; 6065 lockend = start + len - 1;
@@ -6011,14 +6071,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6011 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) 6071 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
6012 return -ENOTBLK; 6072 return -ENOTBLK;
6013 6073
6014 if (create) {
6015 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6016 lockend, EXTENT_DELALLOC, NULL,
6017 &cached_state, GFP_NOFS);
6018 if (ret)
6019 goto unlock_err;
6020 }
6021
6022 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 6074 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
6023 if (IS_ERR(em)) { 6075 if (IS_ERR(em)) {
6024 ret = PTR_ERR(em); 6076 ret = PTR_ERR(em);
@@ -6050,7 +6102,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6050 if (!create && (em->block_start == EXTENT_MAP_HOLE || 6102 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
6051 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6103 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6052 free_extent_map(em); 6104 free_extent_map(em);
6053 ret = 0;
6054 goto unlock_err; 6105 goto unlock_err;
6055 } 6106 }
6056 6107
@@ -6148,6 +6199,11 @@ unlock:
6148 */ 6199 */
6149 if (start + len > i_size_read(inode)) 6200 if (start + len > i_size_read(inode))
6150 i_size_write(inode, start + len); 6201 i_size_write(inode, start + len);
6202
6203 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6204 lockstart + len - 1, EXTENT_DELALLOC, NULL,
6205 &cached_state, GFP_NOFS);
6206 BUG_ON(ret);
6151 } 6207 }
6152 6208
6153 /* 6209 /*
@@ -6156,24 +6212,9 @@ unlock:
6156 * aren't using if there is any left over space. 6212 * aren't using if there is any left over space.
6157 */ 6213 */
6158 if (lockstart < lockend) { 6214 if (lockstart < lockend) {
6159 if (create && len < lockend - lockstart) { 6215 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6160 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6216 lockend, unlock_bits, 1, 0,
6161 lockstart + len - 1, 6217 &cached_state, GFP_NOFS);
6162 unlock_bits | EXTENT_DEFRAG, 1, 0,
6163 &cached_state, GFP_NOFS);
6164 /*
6165 * Beside unlock, we also need to cleanup reserved space
6166 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6167 */
6168 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6169 lockstart + len, lockend,
6170 unlock_bits | EXTENT_DO_ACCOUNTING |
6171 EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
6172 } else {
6173 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6174 lockend, unlock_bits, 1, 0,
6175 &cached_state, GFP_NOFS);
6176 }
6177 } else { 6218 } else {
6178 free_extent_state(cached_state); 6219 free_extent_state(cached_state);
6179 } 6220 }
@@ -6183,9 +6224,6 @@ unlock:
6183 return 0; 6224 return 0;
6184 6225
6185unlock_err: 6226unlock_err:
6186 if (create)
6187 unlock_bits |= EXTENT_DO_ACCOUNTING;
6188
6189 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6227 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6190 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 6228 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6191 return ret; 6229 return ret;
@@ -6623,15 +6661,63 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6623{ 6661{
6624 struct file *file = iocb->ki_filp; 6662 struct file *file = iocb->ki_filp;
6625 struct inode *inode = file->f_mapping->host; 6663 struct inode *inode = file->f_mapping->host;
6664 size_t count = 0;
6665 int flags = 0;
6666 bool wakeup = true;
6667 bool relock = false;
6668 ssize_t ret;
6626 6669
6627 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6670 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6628 offset, nr_segs)) 6671 offset, nr_segs))
6629 return 0; 6672 return 0;
6630 6673
6631 return __blockdev_direct_IO(rw, iocb, inode, 6674 atomic_inc(&inode->i_dio_count);
6632 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6675 smp_mb__after_atomic_inc();
6633 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6676
6634 btrfs_submit_direct, 0); 6677 if (rw & WRITE) {
6678 count = iov_length(iov, nr_segs);
6679 /*
6680 * If the write DIO is beyond the EOF, we need update
6681 * the isize, but it is protected by i_mutex. So we can
6682 * not unlock the i_mutex at this case.
6683 */
6684 if (offset + count <= inode->i_size) {
6685 mutex_unlock(&inode->i_mutex);
6686 relock = true;
6687 }
6688 ret = btrfs_delalloc_reserve_space(inode, count);
6689 if (ret)
6690 goto out;
6691 } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
6692 &BTRFS_I(inode)->runtime_flags))) {
6693 inode_dio_done(inode);
6694 flags = DIO_LOCKING | DIO_SKIP_HOLES;
6695 wakeup = false;
6696 }
6697
6698 ret = __blockdev_direct_IO(rw, iocb, inode,
6699 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6700 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6701 btrfs_submit_direct, flags);
6702 if (rw & WRITE) {
6703 if (ret < 0 && ret != -EIOCBQUEUED)
6704 btrfs_delalloc_release_space(inode, count);
6705 else if (ret > 0 && (size_t)ret < count) {
6706 spin_lock(&BTRFS_I(inode)->lock);
6707 BTRFS_I(inode)->outstanding_extents++;
6708 spin_unlock(&BTRFS_I(inode)->lock);
6709 btrfs_delalloc_release_space(inode,
6710 count - (size_t)ret);
6711 }
6712 btrfs_delalloc_release_metadata(inode, 0);
6713 }
6714out:
6715 if (wakeup)
6716 inode_dio_done(inode);
6717 if (relock)
6718 mutex_lock(&inode->i_mutex);
6719
6720 return ret;
6635} 6721}
6636 6722
6637#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) 6723#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
@@ -6735,8 +6821,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6735 return; 6821 return;
6736 } 6822 }
6737 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 6823 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6738 ordered = btrfs_lookup_ordered_extent(inode, 6824 ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
6739 page_offset(page));
6740 if (ordered) { 6825 if (ordered) {
6741 /* 6826 /*
6742 * IO on this page will never be started, so we need 6827 * IO on this page will never be started, so we need
@@ -7216,8 +7301,9 @@ int btrfs_drop_inode(struct inode *inode)
7216{ 7301{
7217 struct btrfs_root *root = BTRFS_I(inode)->root; 7302 struct btrfs_root *root = BTRFS_I(inode)->root;
7218 7303
7304 /* the snap/subvol tree is on deleting */
7219 if (btrfs_root_refs(&root->root_item) == 0 && 7305 if (btrfs_root_refs(&root->root_item) == 0 &&
7220 !btrfs_is_free_space_inode(inode)) 7306 root != root->fs_info->tree_root)
7221 return 1; 7307 return 1;
7222 else 7308 else
7223 return generic_drop_inode(inode); 7309 return generic_drop_inode(inode);
@@ -7299,14 +7385,19 @@ fail:
7299static int btrfs_getattr(struct vfsmount *mnt, 7385static int btrfs_getattr(struct vfsmount *mnt,
7300 struct dentry *dentry, struct kstat *stat) 7386 struct dentry *dentry, struct kstat *stat)
7301{ 7387{
7388 u64 delalloc_bytes;
7302 struct inode *inode = dentry->d_inode; 7389 struct inode *inode = dentry->d_inode;
7303 u32 blocksize = inode->i_sb->s_blocksize; 7390 u32 blocksize = inode->i_sb->s_blocksize;
7304 7391
7305 generic_fillattr(inode, stat); 7392 generic_fillattr(inode, stat);
7306 stat->dev = BTRFS_I(inode)->root->anon_dev; 7393 stat->dev = BTRFS_I(inode)->root->anon_dev;
7307 stat->blksize = PAGE_CACHE_SIZE; 7394 stat->blksize = PAGE_CACHE_SIZE;
7395
7396 spin_lock(&BTRFS_I(inode)->lock);
7397 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
7398 spin_unlock(&BTRFS_I(inode)->lock);
7308 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + 7399 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
7309 ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; 7400 ALIGN(delalloc_bytes, blocksize)) >> 9;
7310 return 0; 7401 return 0;
7311} 7402}
7312 7403
@@ -7583,7 +7674,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7583 7674
7584 INIT_LIST_HEAD(&works); 7675 INIT_LIST_HEAD(&works);
7585 INIT_LIST_HEAD(&splice); 7676 INIT_LIST_HEAD(&splice);
7586again: 7677
7587 spin_lock(&root->fs_info->delalloc_lock); 7678 spin_lock(&root->fs_info->delalloc_lock);
7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 7679 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7589 while (!list_empty(&splice)) { 7680 while (!list_empty(&splice)) {
@@ -7593,8 +7684,11 @@ again:
7593 list_del_init(&binode->delalloc_inodes); 7684 list_del_init(&binode->delalloc_inodes);
7594 7685
7595 inode = igrab(&binode->vfs_inode); 7686 inode = igrab(&binode->vfs_inode);
7596 if (!inode) 7687 if (!inode) {
7688 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
7689 &binode->runtime_flags);
7597 continue; 7690 continue;
7691 }
7598 7692
7599 list_add_tail(&binode->delalloc_inodes, 7693 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes); 7694 &root->fs_info->delalloc_inodes);
@@ -7619,13 +7713,6 @@ again:
7619 btrfs_wait_and_free_delalloc_work(work); 7713 btrfs_wait_and_free_delalloc_work(work);
7620 } 7714 }
7621 7715
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7629 /* the filemap_flush will queue IO into the worker threads, but 7716 /* the filemap_flush will queue IO into the worker threads, but
7630 * we have to make sure the IO is actually started and that 7717 * we have to make sure the IO is actually started and that
7631 * ordered extents get created before we return 7718 * ordered extents get created before we return