diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 13:49:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 13:49:22 -0400 |
commit | 07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch) | |
tree | e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs | |
parent | 63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff) | |
parent | c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff) |
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This has our merge window series of cleanups and fixes. These target
a wide range of issues, but do include some important fixes for
qgroups, O_DIRECT, and fsync handling. Jeff Mahoney moved around a
few definitions to make them easier for userland to consume.
Also whiteout support is included now that issues with overlayfs have
been cleared up.
I have one more fix pending for page faults during btrfs_copy_from_user,
but I wanted to get this bulk out the door first"
* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits)
btrfs: fix memory leak during RAID 5/6 device replacement
Btrfs: add semaphore to synchronize direct IO writes with fsync
Btrfs: fix race between block group relocation and nocow writes
Btrfs: fix race between fsync and direct IO writes for prealloc extents
Btrfs: fix number of transaction units for renames with whiteout
Btrfs: pin logs earlier when doing a rename exchange operation
Btrfs: unpin logs if rename exchange operation fails
Btrfs: fix inode leak on failure to setup whiteout inode in rename
btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
Btrfs: pin log earlier when renaming
Btrfs: unpin log if rename operation fails
Btrfs: don't do unnecessary delalloc flushes when relocating
Btrfs: don't wait for unrelated IO to finish before relocation
Btrfs: fix empty symlink after creating symlink and fsync parent dir
Btrfs: fix for incorrect directory entries after fsync log replay
btrfs: build fixup for qgroup_account_snapshot
btrfs: qgroup: Fix qgroup accounting when creating snapshot
Btrfs: fix fspath error deallocation
btrfs: make find_workspace warn if there are no workspaces
btrfs: make find_workspace always succeed
...
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/backref.c | 2 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 10 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 85 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 6 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 1123 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 2 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 101 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.h | 4 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 130 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 167 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 82 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/file.c | 6 | ||||
-rw-r--r-- | fs/btrfs/inode-item.c | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 466 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 198 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 26 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 6 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 13 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 4 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 25 | ||||
-rw-r--r-- | fs/btrfs/send.c | 62 | ||||
-rw-r--r-- | fs/btrfs/super.c | 60 | ||||
-rw-r--r-- | fs/btrfs/sysfs.c | 14 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 138 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 74 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 454 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 57 |
28 files changed, 1530 insertions, 1788 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 80e8472d618b..d3090187fd76 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -1991,7 +1991,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | |||
1991 | 1991 | ||
1992 | ifp = kmalloc(sizeof(*ifp), GFP_NOFS); | 1992 | ifp = kmalloc(sizeof(*ifp), GFP_NOFS); |
1993 | if (!ifp) { | 1993 | if (!ifp) { |
1994 | kfree(fspath); | 1994 | vfree(fspath); |
1995 | return ERR_PTR(-ENOMEM); | 1995 | return ERR_PTR(-ENOMEM); |
1996 | } | 1996 | } |
1997 | 1997 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 61205e3bbefa..1da5753d886d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -196,6 +196,16 @@ struct btrfs_inode { | |||
196 | struct list_head delayed_iput; | 196 | struct list_head delayed_iput; |
197 | long delayed_iput_count; | 197 | long delayed_iput_count; |
198 | 198 | ||
199 | /* | ||
200 | * To avoid races between lockless (i_mutex not held) direct IO writes | ||
201 | * and concurrent fsync requests. Direct IO writes must acquire read | ||
202 | * access on this semaphore for creating an extent map and its | ||
203 | * corresponding ordered extent. The fast fsync path must acquire write | ||
204 | * access on this semaphore before it collects ordered extents and | ||
205 | * extent maps. | ||
206 | */ | ||
207 | struct rw_semaphore dio_sem; | ||
208 | |||
199 | struct inode vfs_inode; | 209 | struct inode vfs_inode; |
200 | }; | 210 | }; |
201 | 211 | ||
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ff61a41ac90b..658c39b70fba 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -743,8 +743,11 @@ out: | |||
743 | static struct { | 743 | static struct { |
744 | struct list_head idle_ws; | 744 | struct list_head idle_ws; |
745 | spinlock_t ws_lock; | 745 | spinlock_t ws_lock; |
746 | int num_ws; | 746 | /* Number of free workspaces */ |
747 | atomic_t alloc_ws; | 747 | int free_ws; |
748 | /* Total number of allocated workspaces */ | ||
749 | atomic_t total_ws; | ||
750 | /* Waiters for a free workspace */ | ||
748 | wait_queue_head_t ws_wait; | 751 | wait_queue_head_t ws_wait; |
749 | } btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; | 752 | } btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; |
750 | 753 | ||
@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void) | |||
758 | int i; | 761 | int i; |
759 | 762 | ||
760 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | 763 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { |
764 | struct list_head *workspace; | ||
765 | |||
761 | INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); | 766 | INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); |
762 | spin_lock_init(&btrfs_comp_ws[i].ws_lock); | 767 | spin_lock_init(&btrfs_comp_ws[i].ws_lock); |
763 | atomic_set(&btrfs_comp_ws[i].alloc_ws, 0); | 768 | atomic_set(&btrfs_comp_ws[i].total_ws, 0); |
764 | init_waitqueue_head(&btrfs_comp_ws[i].ws_wait); | 769 | init_waitqueue_head(&btrfs_comp_ws[i].ws_wait); |
770 | |||
771 | /* | ||
772 | * Preallocate one workspace for each compression type so | ||
773 | * we can guarantee forward progress in the worst case | ||
774 | */ | ||
775 | workspace = btrfs_compress_op[i]->alloc_workspace(); | ||
776 | if (IS_ERR(workspace)) { | ||
777 | printk(KERN_WARNING | ||
778 | "BTRFS: cannot preallocate compression workspace, will try later"); | ||
779 | } else { | ||
780 | atomic_set(&btrfs_comp_ws[i].total_ws, 1); | ||
781 | btrfs_comp_ws[i].free_ws = 1; | ||
782 | list_add(workspace, &btrfs_comp_ws[i].idle_ws); | ||
783 | } | ||
765 | } | 784 | } |
766 | } | 785 | } |
767 | 786 | ||
768 | /* | 787 | /* |
769 | * this finds an available workspace or allocates a new one | 788 | * This finds an available workspace or allocates a new one. |
770 | * ERR_PTR is returned if things go bad. | 789 | * If it's not possible to allocate a new one, waits until there's one. |
790 | * Preallocation makes a forward progress guarantees and we do not return | ||
791 | * errors. | ||
771 | */ | 792 | */ |
772 | static struct list_head *find_workspace(int type) | 793 | static struct list_head *find_workspace(int type) |
773 | { | 794 | { |
@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type) | |||
777 | 798 | ||
778 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; | 799 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; |
779 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; | 800 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; |
780 | atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; | 801 | atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws; |
781 | wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; | 802 | wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; |
782 | int *num_ws = &btrfs_comp_ws[idx].num_ws; | 803 | int *free_ws = &btrfs_comp_ws[idx].free_ws; |
783 | again: | 804 | again: |
784 | spin_lock(ws_lock); | 805 | spin_lock(ws_lock); |
785 | if (!list_empty(idle_ws)) { | 806 | if (!list_empty(idle_ws)) { |
786 | workspace = idle_ws->next; | 807 | workspace = idle_ws->next; |
787 | list_del(workspace); | 808 | list_del(workspace); |
788 | (*num_ws)--; | 809 | (*free_ws)--; |
789 | spin_unlock(ws_lock); | 810 | spin_unlock(ws_lock); |
790 | return workspace; | 811 | return workspace; |
791 | 812 | ||
792 | } | 813 | } |
793 | if (atomic_read(alloc_ws) > cpus) { | 814 | if (atomic_read(total_ws) > cpus) { |
794 | DEFINE_WAIT(wait); | 815 | DEFINE_WAIT(wait); |
795 | 816 | ||
796 | spin_unlock(ws_lock); | 817 | spin_unlock(ws_lock); |
797 | prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); | 818 | prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); |
798 | if (atomic_read(alloc_ws) > cpus && !*num_ws) | 819 | if (atomic_read(total_ws) > cpus && !*free_ws) |
799 | schedule(); | 820 | schedule(); |
800 | finish_wait(ws_wait, &wait); | 821 | finish_wait(ws_wait, &wait); |
801 | goto again; | 822 | goto again; |
802 | } | 823 | } |
803 | atomic_inc(alloc_ws); | 824 | atomic_inc(total_ws); |
804 | spin_unlock(ws_lock); | 825 | spin_unlock(ws_lock); |
805 | 826 | ||
806 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | 827 | workspace = btrfs_compress_op[idx]->alloc_workspace(); |
807 | if (IS_ERR(workspace)) { | 828 | if (IS_ERR(workspace)) { |
808 | atomic_dec(alloc_ws); | 829 | atomic_dec(total_ws); |
809 | wake_up(ws_wait); | 830 | wake_up(ws_wait); |
831 | |||
832 | /* | ||
833 | * Do not return the error but go back to waiting. There's a | ||
834 | * workspace preallocated for each type and the compression | ||
835 | * time is bounded so we get to a workspace eventually. This | ||
836 | * makes our caller's life easier. | ||
837 | * | ||
838 | * To prevent silent and low-probability deadlocks (when the | ||
839 | * initial preallocation fails), check if there are any | ||
840 | * workspaces at all. | ||
841 | */ | ||
842 | if (atomic_read(total_ws) == 0) { | ||
843 | static DEFINE_RATELIMIT_STATE(_rs, | ||
844 | /* once per minute */ 60 * HZ, | ||
845 | /* no burst */ 1); | ||
846 | |||
847 | if (__ratelimit(&_rs)) { | ||
848 | printk(KERN_WARNING | ||
849 | "no compression workspaces, low memory, retrying"); | ||
850 | } | ||
851 | } | ||
852 | goto again; | ||
810 | } | 853 | } |
811 | return workspace; | 854 | return workspace; |
812 | } | 855 | } |
@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace) | |||
820 | int idx = type - 1; | 863 | int idx = type - 1; |
821 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; | 864 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; |
822 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; | 865 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; |
823 | atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; | 866 | atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws; |
824 | wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; | 867 | wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; |
825 | int *num_ws = &btrfs_comp_ws[idx].num_ws; | 868 | int *free_ws = &btrfs_comp_ws[idx].free_ws; |
826 | 869 | ||
827 | spin_lock(ws_lock); | 870 | spin_lock(ws_lock); |
828 | if (*num_ws < num_online_cpus()) { | 871 | if (*free_ws < num_online_cpus()) { |
829 | list_add(workspace, idle_ws); | 872 | list_add(workspace, idle_ws); |
830 | (*num_ws)++; | 873 | (*free_ws)++; |
831 | spin_unlock(ws_lock); | 874 | spin_unlock(ws_lock); |
832 | goto wake; | 875 | goto wake; |
833 | } | 876 | } |
834 | spin_unlock(ws_lock); | 877 | spin_unlock(ws_lock); |
835 | 878 | ||
836 | btrfs_compress_op[idx]->free_workspace(workspace); | 879 | btrfs_compress_op[idx]->free_workspace(workspace); |
837 | atomic_dec(alloc_ws); | 880 | atomic_dec(total_ws); |
838 | wake: | 881 | wake: |
839 | /* | 882 | /* |
840 | * Make sure counter is updated before we wake up waiters. | 883 | * Make sure counter is updated before we wake up waiters. |
@@ -857,7 +900,7 @@ static void free_workspaces(void) | |||
857 | workspace = btrfs_comp_ws[i].idle_ws.next; | 900 | workspace = btrfs_comp_ws[i].idle_ws.next; |
858 | list_del(workspace); | 901 | list_del(workspace); |
859 | btrfs_compress_op[i]->free_workspace(workspace); | 902 | btrfs_compress_op[i]->free_workspace(workspace); |
860 | atomic_dec(&btrfs_comp_ws[i].alloc_ws); | 903 | atomic_dec(&btrfs_comp_ws[i].total_ws); |
861 | } | 904 | } |
862 | } | 905 | } |
863 | } | 906 | } |
@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping, | |||
894 | int ret; | 937 | int ret; |
895 | 938 | ||
896 | workspace = find_workspace(type); | 939 | workspace = find_workspace(type); |
897 | if (IS_ERR(workspace)) | ||
898 | return PTR_ERR(workspace); | ||
899 | 940 | ||
900 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | 941 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, |
901 | start, len, pages, | 942 | start, len, pages, |
@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in, | |||
930 | int ret; | 971 | int ret; |
931 | 972 | ||
932 | workspace = find_workspace(type); | 973 | workspace = find_workspace(type); |
933 | if (IS_ERR(workspace)) | ||
934 | return PTR_ERR(workspace); | ||
935 | 974 | ||
936 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | 975 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, |
937 | disk_start, | 976 | disk_start, |
@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | |||
952 | int ret; | 991 | int ret; |
953 | 992 | ||
954 | workspace = find_workspace(type); | 993 | workspace = find_workspace(type); |
955 | if (IS_ERR(workspace)) | ||
956 | return PTR_ERR(workspace); | ||
957 | 994 | ||
958 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | 995 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, |
959 | dest_page, start_byte, | 996 | dest_page, start_byte, |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ec7928a27aaa..decd0a3f5d61 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
1011 | return ret; | 1011 | return ret; |
1012 | if (refs == 0) { | 1012 | if (refs == 0) { |
1013 | ret = -EROFS; | 1013 | ret = -EROFS; |
1014 | btrfs_std_error(root->fs_info, ret, NULL); | 1014 | btrfs_handle_fs_error(root->fs_info, ret, NULL); |
1015 | return ret; | 1015 | return ret; |
1016 | } | 1016 | } |
1017 | } else { | 1017 | } else { |
@@ -1928,7 +1928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1928 | child = read_node_slot(root, mid, 0); | 1928 | child = read_node_slot(root, mid, 0); |
1929 | if (!child) { | 1929 | if (!child) { |
1930 | ret = -EROFS; | 1930 | ret = -EROFS; |
1931 | btrfs_std_error(root->fs_info, ret, NULL); | 1931 | btrfs_handle_fs_error(root->fs_info, ret, NULL); |
1932 | goto enospc; | 1932 | goto enospc; |
1933 | } | 1933 | } |
1934 | 1934 | ||
@@ -2031,7 +2031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
2031 | */ | 2031 | */ |
2032 | if (!left) { | 2032 | if (!left) { |
2033 | ret = -EROFS; | 2033 | ret = -EROFS; |
2034 | btrfs_std_error(root->fs_info, ret, NULL); | 2034 | btrfs_handle_fs_error(root->fs_info, ret, NULL); |
2035 | goto enospc; | 2035 | goto enospc; |
2036 | } | 2036 | } |
2037 | wret = balance_node_right(trans, root, mid, left); | 2037 | wret = balance_node_right(trans, root, mid, left); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 84a6a5b3384a..ddcc58f03c79 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/kmap_types.h> | 33 | #include <asm/kmap_types.h> |
34 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
35 | #include <linux/btrfs.h> | 35 | #include <linux/btrfs.h> |
36 | #include <linux/btrfs_tree.h> | ||
36 | #include <linux/workqueue.h> | 37 | #include <linux/workqueue.h> |
37 | #include <linux/security.h> | 38 | #include <linux/security.h> |
38 | #include <linux/sizes.h> | 39 | #include <linux/sizes.h> |
@@ -64,98 +65,6 @@ struct btrfs_ordered_sum; | |||
64 | 65 | ||
65 | #define BTRFS_COMPAT_EXTENT_TREE_V0 | 66 | #define BTRFS_COMPAT_EXTENT_TREE_V0 |
66 | 67 | ||
67 | /* holds pointers to all of the tree roots */ | ||
68 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | ||
69 | |||
70 | /* stores information about which extents are in use, and reference counts */ | ||
71 | #define BTRFS_EXTENT_TREE_OBJECTID 2ULL | ||
72 | |||
73 | /* | ||
74 | * chunk tree stores translations from logical -> physical block numbering | ||
75 | * the super block points to the chunk tree | ||
76 | */ | ||
77 | #define BTRFS_CHUNK_TREE_OBJECTID 3ULL | ||
78 | |||
79 | /* | ||
80 | * stores information about which areas of a given device are in use. | ||
81 | * one per device. The tree of tree roots points to the device tree | ||
82 | */ | ||
83 | #define BTRFS_DEV_TREE_OBJECTID 4ULL | ||
84 | |||
85 | /* one per subvolume, storing files and directories */ | ||
86 | #define BTRFS_FS_TREE_OBJECTID 5ULL | ||
87 | |||
88 | /* directory objectid inside the root tree */ | ||
89 | #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL | ||
90 | |||
91 | /* holds checksums of all the data extents */ | ||
92 | #define BTRFS_CSUM_TREE_OBJECTID 7ULL | ||
93 | |||
94 | /* holds quota configuration and tracking */ | ||
95 | #define BTRFS_QUOTA_TREE_OBJECTID 8ULL | ||
96 | |||
97 | /* for storing items that use the BTRFS_UUID_KEY* types */ | ||
98 | #define BTRFS_UUID_TREE_OBJECTID 9ULL | ||
99 | |||
100 | /* tracks free space in block groups. */ | ||
101 | #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL | ||
102 | |||
103 | /* device stats in the device tree */ | ||
104 | #define BTRFS_DEV_STATS_OBJECTID 0ULL | ||
105 | |||
106 | /* for storing balance parameters in the root tree */ | ||
107 | #define BTRFS_BALANCE_OBJECTID -4ULL | ||
108 | |||
109 | /* orhpan objectid for tracking unlinked/truncated files */ | ||
110 | #define BTRFS_ORPHAN_OBJECTID -5ULL | ||
111 | |||
112 | /* does write ahead logging to speed up fsyncs */ | ||
113 | #define BTRFS_TREE_LOG_OBJECTID -6ULL | ||
114 | #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL | ||
115 | |||
116 | /* for space balancing */ | ||
117 | #define BTRFS_TREE_RELOC_OBJECTID -8ULL | ||
118 | #define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL | ||
119 | |||
120 | /* | ||
121 | * extent checksums all have this objectid | ||
122 | * this allows them to share the logging tree | ||
123 | * for fsyncs | ||
124 | */ | ||
125 | #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL | ||
126 | |||
127 | /* For storing free space cache */ | ||
128 | #define BTRFS_FREE_SPACE_OBJECTID -11ULL | ||
129 | |||
130 | /* | ||
131 | * The inode number assigned to the special inode for storing | ||
132 | * free ino cache | ||
133 | */ | ||
134 | #define BTRFS_FREE_INO_OBJECTID -12ULL | ||
135 | |||
136 | /* dummy objectid represents multiple objectids */ | ||
137 | #define BTRFS_MULTIPLE_OBJECTIDS -255ULL | ||
138 | |||
139 | /* | ||
140 | * All files have objectids in this range. | ||
141 | */ | ||
142 | #define BTRFS_FIRST_FREE_OBJECTID 256ULL | ||
143 | #define BTRFS_LAST_FREE_OBJECTID -256ULL | ||
144 | #define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL | ||
145 | |||
146 | |||
147 | /* | ||
148 | * the device items go into the chunk tree. The key is in the form | ||
149 | * [ 1 BTRFS_DEV_ITEM_KEY device_id ] | ||
150 | */ | ||
151 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL | ||
152 | |||
153 | #define BTRFS_BTREE_INODE_OBJECTID 1 | ||
154 | |||
155 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 | ||
156 | |||
157 | #define BTRFS_DEV_REPLACE_DEVID 0ULL | ||
158 | |||
159 | /* | 68 | /* |
160 | * the max metadata block size. This limit is somewhat artificial, | 69 | * the max metadata block size. This limit is somewhat artificial, |
161 | * but the memmove costs go through the roof for larger blocks. | 70 | * but the memmove costs go through the roof for larger blocks. |
@@ -175,12 +84,6 @@ struct btrfs_ordered_sum; | |||
175 | */ | 84 | */ |
176 | #define BTRFS_LINK_MAX 65535U | 85 | #define BTRFS_LINK_MAX 65535U |
177 | 86 | ||
178 | /* 32 bytes in various csum fields */ | ||
179 | #define BTRFS_CSUM_SIZE 32 | ||
180 | |||
181 | /* csum types */ | ||
182 | #define BTRFS_CSUM_TYPE_CRC32 0 | ||
183 | |||
184 | static const int btrfs_csum_sizes[] = { 4 }; | 87 | static const int btrfs_csum_sizes[] = { 4 }; |
185 | 88 | ||
186 | /* four bytes for CRC32 */ | 89 | /* four bytes for CRC32 */ |
@@ -189,17 +92,6 @@ static const int btrfs_csum_sizes[] = { 4 }; | |||
189 | /* spefic to btrfs_map_block(), therefore not in include/linux/blk_types.h */ | 92 | /* spefic to btrfs_map_block(), therefore not in include/linux/blk_types.h */ |
190 | #define REQ_GET_READ_MIRRORS (1 << 30) | 93 | #define REQ_GET_READ_MIRRORS (1 << 30) |
191 | 94 | ||
192 | #define BTRFS_FT_UNKNOWN 0 | ||
193 | #define BTRFS_FT_REG_FILE 1 | ||
194 | #define BTRFS_FT_DIR 2 | ||
195 | #define BTRFS_FT_CHRDEV 3 | ||
196 | #define BTRFS_FT_BLKDEV 4 | ||
197 | #define BTRFS_FT_FIFO 5 | ||
198 | #define BTRFS_FT_SOCK 6 | ||
199 | #define BTRFS_FT_SYMLINK 7 | ||
200 | #define BTRFS_FT_XATTR 8 | ||
201 | #define BTRFS_FT_MAX 9 | ||
202 | |||
203 | /* ioprio of readahead is set to idle */ | 95 | /* ioprio of readahead is set to idle */ |
204 | #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) | 96 | #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) |
205 | 97 | ||
@@ -207,138 +99,10 @@ static const int btrfs_csum_sizes[] = { 4 }; | |||
207 | 99 | ||
208 | #define BTRFS_MAX_EXTENT_SIZE SZ_128M | 100 | #define BTRFS_MAX_EXTENT_SIZE SZ_128M |
209 | 101 | ||
210 | /* | ||
211 | * The key defines the order in the tree, and so it also defines (optimal) | ||
212 | * block layout. | ||
213 | * | ||
214 | * objectid corresponds to the inode number. | ||
215 | * | ||
216 | * type tells us things about the object, and is a kind of stream selector. | ||
217 | * so for a given inode, keys with type of 1 might refer to the inode data, | ||
218 | * type of 2 may point to file data in the btree and type == 3 may point to | ||
219 | * extents. | ||
220 | * | ||
221 | * offset is the starting byte offset for this key in the stream. | ||
222 | * | ||
223 | * btrfs_disk_key is in disk byte order. struct btrfs_key is always | ||
224 | * in cpu native order. Otherwise they are identical and their sizes | ||
225 | * should be the same (ie both packed) | ||
226 | */ | ||
227 | struct btrfs_disk_key { | ||
228 | __le64 objectid; | ||
229 | u8 type; | ||
230 | __le64 offset; | ||
231 | } __attribute__ ((__packed__)); | ||
232 | |||
233 | struct btrfs_key { | ||
234 | u64 objectid; | ||
235 | u8 type; | ||
236 | u64 offset; | ||
237 | } __attribute__ ((__packed__)); | ||
238 | |||
239 | struct btrfs_mapping_tree { | 102 | struct btrfs_mapping_tree { |
240 | struct extent_map_tree map_tree; | 103 | struct extent_map_tree map_tree; |
241 | }; | 104 | }; |
242 | 105 | ||
243 | struct btrfs_dev_item { | ||
244 | /* the internal btrfs device id */ | ||
245 | __le64 devid; | ||
246 | |||
247 | /* size of the device */ | ||
248 | __le64 total_bytes; | ||
249 | |||
250 | /* bytes used */ | ||
251 | __le64 bytes_used; | ||
252 | |||
253 | /* optimal io alignment for this device */ | ||
254 | __le32 io_align; | ||
255 | |||
256 | /* optimal io width for this device */ | ||
257 | __le32 io_width; | ||
258 | |||
259 | /* minimal io size for this device */ | ||
260 | __le32 sector_size; | ||
261 | |||
262 | /* type and info about this device */ | ||
263 | __le64 type; | ||
264 | |||
265 | /* expected generation for this device */ | ||
266 | __le64 generation; | ||
267 | |||
268 | /* | ||
269 | * starting byte of this partition on the device, | ||
270 | * to allow for stripe alignment in the future | ||
271 | */ | ||
272 | __le64 start_offset; | ||
273 | |||
274 | /* grouping information for allocation decisions */ | ||
275 | __le32 dev_group; | ||
276 | |||
277 | /* seek speed 0-100 where 100 is fastest */ | ||
278 | u8 seek_speed; | ||
279 | |||
280 | /* bandwidth 0-100 where 100 is fastest */ | ||
281 | u8 bandwidth; | ||
282 | |||
283 | /* btrfs generated uuid for this device */ | ||
284 | u8 uuid[BTRFS_UUID_SIZE]; | ||
285 | |||
286 | /* uuid of FS who owns this device */ | ||
287 | u8 fsid[BTRFS_UUID_SIZE]; | ||
288 | } __attribute__ ((__packed__)); | ||
289 | |||
290 | struct btrfs_stripe { | ||
291 | __le64 devid; | ||
292 | __le64 offset; | ||
293 | u8 dev_uuid[BTRFS_UUID_SIZE]; | ||
294 | } __attribute__ ((__packed__)); | ||
295 | |||
296 | struct btrfs_chunk { | ||
297 | /* size of this chunk in bytes */ | ||
298 | __le64 length; | ||
299 | |||
300 | /* objectid of the root referencing this chunk */ | ||
301 | __le64 owner; | ||
302 | |||
303 | __le64 stripe_len; | ||
304 | __le64 type; | ||
305 | |||
306 | /* optimal io alignment for this chunk */ | ||
307 | __le32 io_align; | ||
308 | |||
309 | /* optimal io width for this chunk */ | ||
310 | __le32 io_width; | ||
311 | |||
312 | /* minimal io size for this chunk */ | ||
313 | __le32 sector_size; | ||
314 | |||
315 | /* 2^16 stripes is quite a lot, a second limit is the size of a single | ||
316 | * item in the btree | ||
317 | */ | ||
318 | __le16 num_stripes; | ||
319 | |||
320 | /* sub stripes only matter for raid10 */ | ||
321 | __le16 sub_stripes; | ||
322 | struct btrfs_stripe stripe; | ||
323 | /* additional stripes go here */ | ||
324 | } __attribute__ ((__packed__)); | ||
325 | |||
326 | #define BTRFS_FREE_SPACE_EXTENT 1 | ||
327 | #define BTRFS_FREE_SPACE_BITMAP 2 | ||
328 | |||
329 | struct btrfs_free_space_entry { | ||
330 | __le64 offset; | ||
331 | __le64 bytes; | ||
332 | u8 type; | ||
333 | } __attribute__ ((__packed__)); | ||
334 | |||
335 | struct btrfs_free_space_header { | ||
336 | struct btrfs_disk_key location; | ||
337 | __le64 generation; | ||
338 | __le64 num_entries; | ||
339 | __le64 num_bitmaps; | ||
340 | } __attribute__ ((__packed__)); | ||
341 | |||
342 | static inline unsigned long btrfs_chunk_item_size(int num_stripes) | 106 | static inline unsigned long btrfs_chunk_item_size(int num_stripes) |
343 | { | 107 | { |
344 | BUG_ON(num_stripes == 0); | 108 | BUG_ON(num_stripes == 0); |
@@ -346,9 +110,6 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
346 | sizeof(struct btrfs_stripe) * (num_stripes - 1); | 110 | sizeof(struct btrfs_stripe) * (num_stripes - 1); |
347 | } | 111 | } |
348 | 112 | ||
349 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | ||
350 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | ||
351 | |||
352 | /* | 113 | /* |
353 | * File system states | 114 | * File system states |
354 | */ | 115 | */ |
@@ -357,13 +118,6 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
357 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 | 118 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 |
358 | #define BTRFS_FS_STATE_DEV_REPLACING 3 | 119 | #define BTRFS_FS_STATE_DEV_REPLACING 3 |
359 | 120 | ||
360 | /* Super block flags */ | ||
361 | /* Errors detected */ | ||
362 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
363 | |||
364 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | ||
365 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | ||
366 | |||
367 | #define BTRFS_BACKREF_REV_MAX 256 | 121 | #define BTRFS_BACKREF_REV_MAX 256 |
368 | #define BTRFS_BACKREF_REV_SHIFT 56 | 122 | #define BTRFS_BACKREF_REV_SHIFT 56 |
369 | #define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ | 123 | #define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \ |
@@ -410,7 +164,6 @@ struct btrfs_header { | |||
410 | * room to translate 14 chunks with 3 stripes each. | 164 | * room to translate 14 chunks with 3 stripes each. |
411 | */ | 165 | */ |
412 | #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 | 166 | #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 |
413 | #define BTRFS_LABEL_SIZE 256 | ||
414 | 167 | ||
415 | /* | 168 | /* |
416 | * just in case we somehow lose the roots and are not able to mount, | 169 | * just in case we somehow lose the roots and are not able to mount, |
@@ -507,31 +260,6 @@ struct btrfs_super_block { | |||
507 | * Compat flags that we support. If any incompat flags are set other than the | 260 | * Compat flags that we support. If any incompat flags are set other than the |
508 | * ones specified below then we will fail to mount | 261 | * ones specified below then we will fail to mount |
509 | */ | 262 | */ |
510 | #define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0) | ||
511 | |||
512 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | ||
513 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) | ||
514 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | ||
515 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
516 | /* | ||
517 | * some patches floated around with a second compression method | ||
518 | * lets save that incompat here for when they do get in | ||
519 | * Note we don't actually support it, we're just reserving the | ||
520 | * number | ||
521 | */ | ||
522 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) | ||
523 | |||
524 | /* | ||
525 | * older kernels tried to do bigger metadata blocks, but the | ||
526 | * code was pretty buggy. Lets not let them try anymore. | ||
527 | */ | ||
528 | #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5) | ||
529 | |||
530 | #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) | ||
531 | #define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) | ||
532 | #define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) | ||
533 | #define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9) | ||
534 | |||
535 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 263 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
536 | #define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL | 264 | #define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL |
537 | #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL | 265 | #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL |
@@ -624,357 +352,8 @@ struct btrfs_path { | |||
624 | unsigned int need_commit_sem:1; | 352 | unsigned int need_commit_sem:1; |
625 | unsigned int skip_release_on_error:1; | 353 | unsigned int skip_release_on_error:1; |
626 | }; | 354 | }; |
627 | |||
628 | /* | ||
629 | * items in the extent btree are used to record the objectid of the | ||
630 | * owner of the block and the number of references | ||
631 | */ | ||
632 | |||
633 | struct btrfs_extent_item { | ||
634 | __le64 refs; | ||
635 | __le64 generation; | ||
636 | __le64 flags; | ||
637 | } __attribute__ ((__packed__)); | ||
638 | |||
639 | struct btrfs_extent_item_v0 { | ||
640 | __le32 refs; | ||
641 | } __attribute__ ((__packed__)); | ||
642 | |||
643 | #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \ | 355 | #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \ |
644 | sizeof(struct btrfs_item)) | 356 | sizeof(struct btrfs_item)) |
645 | |||
646 | #define BTRFS_EXTENT_FLAG_DATA (1ULL << 0) | ||
647 | #define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1) | ||
648 | |||
649 | /* following flags only apply to tree blocks */ | ||
650 | |||
651 | /* use full backrefs for extent pointers in the block */ | ||
652 | #define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8) | ||
653 | |||
654 | /* | ||
655 | * this flag is only used internally by scrub and may be changed at any time | ||
656 | * it is only declared here to avoid collisions | ||
657 | */ | ||
658 | #define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48) | ||
659 | |||
660 | struct btrfs_tree_block_info { | ||
661 | struct btrfs_disk_key key; | ||
662 | u8 level; | ||
663 | } __attribute__ ((__packed__)); | ||
664 | |||
665 | struct btrfs_extent_data_ref { | ||
666 | __le64 root; | ||
667 | __le64 objectid; | ||
668 | __le64 offset; | ||
669 | __le32 count; | ||
670 | } __attribute__ ((__packed__)); | ||
671 | |||
672 | struct btrfs_shared_data_ref { | ||
673 | __le32 count; | ||
674 | } __attribute__ ((__packed__)); | ||
675 | |||
676 | struct btrfs_extent_inline_ref { | ||
677 | u8 type; | ||
678 | __le64 offset; | ||
679 | } __attribute__ ((__packed__)); | ||
680 | |||
681 | /* old style backrefs item */ | ||
682 | struct btrfs_extent_ref_v0 { | ||
683 | __le64 root; | ||
684 | __le64 generation; | ||
685 | __le64 objectid; | ||
686 | __le32 count; | ||
687 | } __attribute__ ((__packed__)); | ||
688 | |||
689 | |||
690 | /* dev extents record free space on individual devices. The owner | ||
691 | * field points back to the chunk allocation mapping tree that allocated | ||
692 | * the extent. The chunk tree uuid field is a way to double check the owner | ||
693 | */ | ||
694 | struct btrfs_dev_extent { | ||
695 | __le64 chunk_tree; | ||
696 | __le64 chunk_objectid; | ||
697 | __le64 chunk_offset; | ||
698 | __le64 length; | ||
699 | u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; | ||
700 | } __attribute__ ((__packed__)); | ||
701 | |||
702 | struct btrfs_inode_ref { | ||
703 | __le64 index; | ||
704 | __le16 name_len; | ||
705 | /* name goes here */ | ||
706 | } __attribute__ ((__packed__)); | ||
707 | |||
708 | struct btrfs_inode_extref { | ||
709 | __le64 parent_objectid; | ||
710 | __le64 index; | ||
711 | __le16 name_len; | ||
712 | __u8 name[0]; | ||
713 | /* name goes here */ | ||
714 | } __attribute__ ((__packed__)); | ||
715 | |||
716 | struct btrfs_timespec { | ||
717 | __le64 sec; | ||
718 | __le32 nsec; | ||
719 | } __attribute__ ((__packed__)); | ||
720 | |||
721 | struct btrfs_inode_item { | ||
722 | /* nfs style generation number */ | ||
723 | __le64 generation; | ||
724 | /* transid that last touched this inode */ | ||
725 | __le64 transid; | ||
726 | __le64 size; | ||
727 | __le64 nbytes; | ||
728 | __le64 block_group; | ||
729 | __le32 nlink; | ||
730 | __le32 uid; | ||
731 | __le32 gid; | ||
732 | __le32 mode; | ||
733 | __le64 rdev; | ||
734 | __le64 flags; | ||
735 | |||
736 | /* modification sequence number for NFS */ | ||
737 | __le64 sequence; | ||
738 | |||
739 | /* | ||
740 | * a little future expansion, for more than this we can | ||
741 | * just grow the inode item and version it | ||
742 | */ | ||
743 | __le64 reserved[4]; | ||
744 | struct btrfs_timespec atime; | ||
745 | struct btrfs_timespec ctime; | ||
746 | struct btrfs_timespec mtime; | ||
747 | struct btrfs_timespec otime; | ||
748 | } __attribute__ ((__packed__)); | ||
749 | |||
750 | struct btrfs_dir_log_item { | ||
751 | __le64 end; | ||
752 | } __attribute__ ((__packed__)); | ||
753 | |||
754 | struct btrfs_dir_item { | ||
755 | struct btrfs_disk_key location; | ||
756 | __le64 transid; | ||
757 | __le16 data_len; | ||
758 | __le16 name_len; | ||
759 | u8 type; | ||
760 | } __attribute__ ((__packed__)); | ||
761 | |||
762 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
763 | |||
764 | /* | ||
765 | * Internal in-memory flag that a subvolume has been marked for deletion but | ||
766 | * still visible as a directory | ||
767 | */ | ||
768 | #define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48) | ||
769 | |||
770 | struct btrfs_root_item { | ||
771 | struct btrfs_inode_item inode; | ||
772 | __le64 generation; | ||
773 | __le64 root_dirid; | ||
774 | __le64 bytenr; | ||
775 | __le64 byte_limit; | ||
776 | __le64 bytes_used; | ||
777 | __le64 last_snapshot; | ||
778 | __le64 flags; | ||
779 | __le32 refs; | ||
780 | struct btrfs_disk_key drop_progress; | ||
781 | u8 drop_level; | ||
782 | u8 level; | ||
783 | |||
784 | /* | ||
785 | * The following fields appear after subvol_uuids+subvol_times | ||
786 | * were introduced. | ||
787 | */ | ||
788 | |||
789 | /* | ||
790 | * This generation number is used to test if the new fields are valid | ||
791 | * and up to date while reading the root item. Every time the root item | ||
792 | * is written out, the "generation" field is copied into this field. If | ||
793 | * anyone ever mounted the fs with an older kernel, we will have | ||
794 | * mismatching generation values here and thus must invalidate the | ||
795 | * new fields. See btrfs_update_root and btrfs_find_last_root for | ||
796 | * details. | ||
797 | * the offset of generation_v2 is also used as the start for the memset | ||
798 | * when invalidating the fields. | ||
799 | */ | ||
800 | __le64 generation_v2; | ||
801 | u8 uuid[BTRFS_UUID_SIZE]; | ||
802 | u8 parent_uuid[BTRFS_UUID_SIZE]; | ||
803 | u8 received_uuid[BTRFS_UUID_SIZE]; | ||
804 | __le64 ctransid; /* updated when an inode changes */ | ||
805 | __le64 otransid; /* trans when created */ | ||
806 | __le64 stransid; /* trans when sent. non-zero for received subvol */ | ||
807 | __le64 rtransid; /* trans when received. non-zero for received subvol */ | ||
808 | struct btrfs_timespec ctime; | ||
809 | struct btrfs_timespec otime; | ||
810 | struct btrfs_timespec stime; | ||
811 | struct btrfs_timespec rtime; | ||
812 | __le64 reserved[8]; /* for future */ | ||
813 | } __attribute__ ((__packed__)); | ||
814 | |||
815 | /* | ||
816 | * this is used for both forward and backward root refs | ||
817 | */ | ||
818 | struct btrfs_root_ref { | ||
819 | __le64 dirid; | ||
820 | __le64 sequence; | ||
821 | __le16 name_len; | ||
822 | } __attribute__ ((__packed__)); | ||
823 | |||
824 | struct btrfs_disk_balance_args { | ||
825 | /* | ||
826 | * profiles to operate on, single is denoted by | ||
827 | * BTRFS_AVAIL_ALLOC_BIT_SINGLE | ||
828 | */ | ||
829 | __le64 profiles; | ||
830 | |||
831 | /* | ||
832 | * usage filter | ||
833 | * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N' | ||
834 | * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max | ||
835 | */ | ||
836 | union { | ||
837 | __le64 usage; | ||
838 | struct { | ||
839 | __le32 usage_min; | ||
840 | __le32 usage_max; | ||
841 | }; | ||
842 | }; | ||
843 | |||
844 | /* devid filter */ | ||
845 | __le64 devid; | ||
846 | |||
847 | /* devid subset filter [pstart..pend) */ | ||
848 | __le64 pstart; | ||
849 | __le64 pend; | ||
850 | |||
851 | /* btrfs virtual address space subset filter [vstart..vend) */ | ||
852 | __le64 vstart; | ||
853 | __le64 vend; | ||
854 | |||
855 | /* | ||
856 | * profile to convert to, single is denoted by | ||
857 | * BTRFS_AVAIL_ALLOC_BIT_SINGLE | ||
858 | */ | ||
859 | __le64 target; | ||
860 | |||
861 | /* BTRFS_BALANCE_ARGS_* */ | ||
862 | __le64 flags; | ||
863 | |||
864 | /* | ||
865 | * BTRFS_BALANCE_ARGS_LIMIT with value 'limit' | ||
866 | * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum | ||
867 | * and maximum | ||
868 | */ | ||
869 | union { | ||
870 | __le64 limit; | ||
871 | struct { | ||
872 | __le32 limit_min; | ||
873 | __le32 limit_max; | ||
874 | }; | ||
875 | }; | ||
876 | |||
877 | /* | ||
878 | * Process chunks that cross stripes_min..stripes_max devices, | ||
879 | * BTRFS_BALANCE_ARGS_STRIPES_RANGE | ||
880 | */ | ||
881 | __le32 stripes_min; | ||
882 | __le32 stripes_max; | ||
883 | |||
884 | __le64 unused[6]; | ||
885 | } __attribute__ ((__packed__)); | ||
886 | |||
887 | /* | ||
888 | * store balance parameters to disk so that balance can be properly | ||
889 | * resumed after crash or unmount | ||
890 | */ | ||
891 | struct btrfs_balance_item { | ||
892 | /* BTRFS_BALANCE_* */ | ||
893 | __le64 flags; | ||
894 | |||
895 | struct btrfs_disk_balance_args data; | ||
896 | struct btrfs_disk_balance_args meta; | ||
897 | struct btrfs_disk_balance_args sys; | ||
898 | |||
899 | __le64 unused[4]; | ||
900 | } __attribute__ ((__packed__)); | ||
901 | |||
902 | #define BTRFS_FILE_EXTENT_INLINE 0 | ||
903 | #define BTRFS_FILE_EXTENT_REG 1 | ||
904 | #define BTRFS_FILE_EXTENT_PREALLOC 2 | ||
905 | |||
906 | struct btrfs_file_extent_item { | ||
907 | /* | ||
908 | * transaction id that created this extent | ||
909 | */ | ||
910 | __le64 generation; | ||
911 | /* | ||
912 | * max number of bytes to hold this extent in ram | ||
913 | * when we split a compressed extent we can't know how big | ||
914 | * each of the resulting pieces will be. So, this is | ||
915 | * an upper limit on the size of the extent in ram instead of | ||
916 | * an exact limit. | ||
917 | */ | ||
918 | __le64 ram_bytes; | ||
919 | |||
920 | /* | ||
921 | * 32 bits for the various ways we might encode the data, | ||
922 | * including compression and encryption. If any of these | ||
923 | * are set to something a given disk format doesn't understand | ||
924 | * it is treated like an incompat flag for reading and writing, | ||
925 | * but not for stat. | ||
926 | */ | ||
927 | u8 compression; | ||
928 | u8 encryption; | ||
929 | __le16 other_encoding; /* spare for later use */ | ||
930 | |||
931 | /* are we inline data or a real extent? */ | ||
932 | u8 type; | ||
933 | |||
934 | /* | ||
935 | * disk space consumed by the extent, checksum blocks are included | ||
936 | * in these numbers | ||
937 | * | ||
938 | * At this offset in the structure, the inline extent data start. | ||
939 | */ | ||
940 | __le64 disk_bytenr; | ||
941 | __le64 disk_num_bytes; | ||
942 | /* | ||
943 | * the logical offset in file blocks (no csums) | ||
944 | * this extent record is for. This allows a file extent to point | ||
945 | * into the middle of an existing extent on disk, sharing it | ||
946 | * between two snapshots (useful if some bytes in the middle of the | ||
947 | * extent have changed | ||
948 | */ | ||
949 | __le64 offset; | ||
950 | /* | ||
951 | * the logical number of file blocks (no csums included). This | ||
952 | * always reflects the size uncompressed and without encoding. | ||
953 | */ | ||
954 | __le64 num_bytes; | ||
955 | |||
956 | } __attribute__ ((__packed__)); | ||
957 | |||
958 | struct btrfs_csum_item { | ||
959 | u8 csum; | ||
960 | } __attribute__ ((__packed__)); | ||
961 | |||
962 | struct btrfs_dev_stats_item { | ||
963 | /* | ||
964 | * grow this item struct at the end for future enhancements and keep | ||
965 | * the existing values unchanged | ||
966 | */ | ||
967 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
968 | } __attribute__ ((__packed__)); | ||
969 | |||
970 | #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 | ||
971 | #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 | ||
972 | #define BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED 0 | ||
973 | #define BTRFS_DEV_REPLACE_ITEM_STATE_STARTED 1 | ||
974 | #define BTRFS_DEV_REPLACE_ITEM_STATE_SUSPENDED 2 | ||
975 | #define BTRFS_DEV_REPLACE_ITEM_STATE_FINISHED 3 | ||
976 | #define BTRFS_DEV_REPLACE_ITEM_STATE_CANCELED 4 | ||
977 | |||
978 | struct btrfs_dev_replace { | 357 | struct btrfs_dev_replace { |
979 | u64 replace_state; /* see #define above */ | 358 | u64 replace_state; /* see #define above */ |
980 | u64 time_started; /* seconds since 1-Jan-1970 */ | 359 | u64 time_started; /* seconds since 1-Jan-1970 */ |
@@ -1005,175 +384,6 @@ struct btrfs_dev_replace { | |||
1005 | struct btrfs_scrub_progress scrub_progress; | 384 | struct btrfs_scrub_progress scrub_progress; |
1006 | }; | 385 | }; |
1007 | 386 | ||
1008 | struct btrfs_dev_replace_item { | ||
1009 | /* | ||
1010 | * grow this item struct at the end for future enhancements and keep | ||
1011 | * the existing values unchanged | ||
1012 | */ | ||
1013 | __le64 src_devid; | ||
1014 | __le64 cursor_left; | ||
1015 | __le64 cursor_right; | ||
1016 | __le64 cont_reading_from_srcdev_mode; | ||
1017 | |||
1018 | __le64 replace_state; | ||
1019 | __le64 time_started; | ||
1020 | __le64 time_stopped; | ||
1021 | __le64 num_write_errors; | ||
1022 | __le64 num_uncorrectable_read_errors; | ||
1023 | } __attribute__ ((__packed__)); | ||
1024 | |||
1025 | /* different types of block groups (and chunks) */ | ||
1026 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) | ||
1027 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) | ||
1028 | #define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2) | ||
1029 | #define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3) | ||
1030 | #define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4) | ||
1031 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) | ||
1032 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) | ||
1033 | #define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) | ||
1034 | #define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) | ||
1035 | #define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \ | ||
1036 | BTRFS_SPACE_INFO_GLOBAL_RSV) | ||
1037 | |||
1038 | enum btrfs_raid_types { | ||
1039 | BTRFS_RAID_RAID10, | ||
1040 | BTRFS_RAID_RAID1, | ||
1041 | BTRFS_RAID_DUP, | ||
1042 | BTRFS_RAID_RAID0, | ||
1043 | BTRFS_RAID_SINGLE, | ||
1044 | BTRFS_RAID_RAID5, | ||
1045 | BTRFS_RAID_RAID6, | ||
1046 | BTRFS_NR_RAID_TYPES | ||
1047 | }; | ||
1048 | |||
1049 | #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ | ||
1050 | BTRFS_BLOCK_GROUP_SYSTEM | \ | ||
1051 | BTRFS_BLOCK_GROUP_METADATA) | ||
1052 | |||
1053 | #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ | ||
1054 | BTRFS_BLOCK_GROUP_RAID1 | \ | ||
1055 | BTRFS_BLOCK_GROUP_RAID5 | \ | ||
1056 | BTRFS_BLOCK_GROUP_RAID6 | \ | ||
1057 | BTRFS_BLOCK_GROUP_DUP | \ | ||
1058 | BTRFS_BLOCK_GROUP_RAID10) | ||
1059 | #define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \ | ||
1060 | BTRFS_BLOCK_GROUP_RAID6) | ||
1061 | |||
1062 | /* | ||
1063 | * We need a bit for restriper to be able to tell when chunks of type | ||
1064 | * SINGLE are available. This "extended" profile format is used in | ||
1065 | * fs_info->avail_*_alloc_bits (in-memory) and balance item fields | ||
1066 | * (on-disk). The corresponding on-disk bit in chunk.type is reserved | ||
1067 | * to avoid remappings between two formats in future. | ||
1068 | */ | ||
1069 | #define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) | ||
1070 | |||
1071 | /* | ||
1072 | * A fake block group type that is used to communicate global block reserve | ||
1073 | * size to userspace via the SPACE_INFO ioctl. | ||
1074 | */ | ||
1075 | #define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49) | ||
1076 | |||
1077 | #define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ | ||
1078 | BTRFS_AVAIL_ALLOC_BIT_SINGLE) | ||
1079 | |||
1080 | static inline u64 chunk_to_extended(u64 flags) | ||
1081 | { | ||
1082 | if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0) | ||
1083 | flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
1084 | |||
1085 | return flags; | ||
1086 | } | ||
1087 | static inline u64 extended_to_chunk(u64 flags) | ||
1088 | { | ||
1089 | return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE; | ||
1090 | } | ||
1091 | |||
1092 | struct btrfs_block_group_item { | ||
1093 | __le64 used; | ||
1094 | __le64 chunk_objectid; | ||
1095 | __le64 flags; | ||
1096 | } __attribute__ ((__packed__)); | ||
1097 | |||
1098 | struct btrfs_free_space_info { | ||
1099 | __le32 extent_count; | ||
1100 | __le32 flags; | ||
1101 | } __attribute__ ((__packed__)); | ||
1102 | |||
1103 | #define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0) | ||
1104 | |||
1105 | #define BTRFS_QGROUP_LEVEL_SHIFT 48 | ||
1106 | static inline u64 btrfs_qgroup_level(u64 qgroupid) | ||
1107 | { | ||
1108 | return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT; | ||
1109 | } | ||
1110 | |||
1111 | /* | ||
1112 | * is subvolume quota turned on? | ||
1113 | */ | ||
1114 | #define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) | ||
1115 | /* | ||
1116 | * RESCAN is set during the initialization phase | ||
1117 | */ | ||
1118 | #define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1) | ||
1119 | /* | ||
1120 | * Some qgroup entries are known to be out of date, | ||
1121 | * either because the configuration has changed in a way that | ||
1122 | * makes a rescan necessary, or because the fs has been mounted | ||
1123 | * with a non-qgroup-aware version. | ||
1124 | * Turning qouta off and on again makes it inconsistent, too. | ||
1125 | */ | ||
1126 | #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2) | ||
1127 | |||
1128 | #define BTRFS_QGROUP_STATUS_VERSION 1 | ||
1129 | |||
1130 | struct btrfs_qgroup_status_item { | ||
1131 | __le64 version; | ||
1132 | /* | ||
1133 | * the generation is updated during every commit. As older | ||
1134 | * versions of btrfs are not aware of qgroups, it will be | ||
1135 | * possible to detect inconsistencies by checking the | ||
1136 | * generation on mount time | ||
1137 | */ | ||
1138 | __le64 generation; | ||
1139 | |||
1140 | /* flag definitions see above */ | ||
1141 | __le64 flags; | ||
1142 | |||
1143 | /* | ||
1144 | * only used during scanning to record the progress | ||
1145 | * of the scan. It contains a logical address | ||
1146 | */ | ||
1147 | __le64 rescan; | ||
1148 | } __attribute__ ((__packed__)); | ||
1149 | |||
1150 | struct btrfs_qgroup_info_item { | ||
1151 | __le64 generation; | ||
1152 | __le64 rfer; | ||
1153 | __le64 rfer_cmpr; | ||
1154 | __le64 excl; | ||
1155 | __le64 excl_cmpr; | ||
1156 | } __attribute__ ((__packed__)); | ||
1157 | |||
1158 | /* flags definition for qgroup limits */ | ||
1159 | #define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0) | ||
1160 | #define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1) | ||
1161 | #define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2) | ||
1162 | #define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3) | ||
1163 | #define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4) | ||
1164 | #define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5) | ||
1165 | |||
1166 | struct btrfs_qgroup_limit_item { | ||
1167 | /* | ||
1168 | * only updated when any of the other values change | ||
1169 | */ | ||
1170 | __le64 flags; | ||
1171 | __le64 max_rfer; | ||
1172 | __le64 max_excl; | ||
1173 | __le64 rsv_rfer; | ||
1174 | __le64 rsv_excl; | ||
1175 | } __attribute__ ((__packed__)); | ||
1176 | |||
1177 | /* For raid type sysfs entries */ | 387 | /* For raid type sysfs entries */ |
1178 | struct raid_kobject { | 388 | struct raid_kobject { |
1179 | int raid_type; | 389 | int raid_type; |
@@ -1408,6 +618,27 @@ struct btrfs_block_group_cache { | |||
1408 | 618 | ||
1409 | struct btrfs_io_ctl io_ctl; | 619 | struct btrfs_io_ctl io_ctl; |
1410 | 620 | ||
621 | /* | ||
622 | * Incremented when doing extent allocations and holding a read lock | ||
623 | * on the space_info's groups_sem semaphore. | ||
624 | * Decremented when an ordered extent that represents an IO against this | ||
625 | * block group's range is created (after it's added to its inode's | ||
626 | * root's list of ordered extents) or immediately after the allocation | ||
627 | * if it's a metadata extent or fallocate extent (for these cases we | ||
628 | * don't create ordered extents). | ||
629 | */ | ||
630 | atomic_t reservations; | ||
631 | |||
632 | /* | ||
633 | * Incremented while holding the spinlock *lock* by a task checking if | ||
634 | * it can perform a nocow write (incremented if the value for the *ro* | ||
635 | * field is 0). Decremented by such tasks once they create an ordered | ||
636 | * extent or before that if some error happens before reaching that step. | ||
637 | * This is to prevent races between block group relocation and nocow | ||
638 | * writes through direct IO. | ||
639 | */ | ||
640 | atomic_t nocow_writers; | ||
641 | |||
1411 | /* Lock for free space tree operations. */ | 642 | /* Lock for free space tree operations. */ |
1412 | struct mutex free_space_lock; | 643 | struct mutex free_space_lock; |
1413 | 644 | ||
@@ -2026,228 +1257,6 @@ struct btrfs_root { | |||
2026 | atomic_t qgroup_meta_rsv; | 1257 | atomic_t qgroup_meta_rsv; |
2027 | }; | 1258 | }; |
2028 | 1259 | ||
2029 | struct btrfs_ioctl_defrag_range_args { | ||
2030 | /* start of the defrag operation */ | ||
2031 | __u64 start; | ||
2032 | |||
2033 | /* number of bytes to defrag, use (u64)-1 to say all */ | ||
2034 | __u64 len; | ||
2035 | |||
2036 | /* | ||
2037 | * flags for the operation, which can include turning | ||
2038 | * on compression for this one defrag | ||
2039 | */ | ||
2040 | __u64 flags; | ||
2041 | |||
2042 | /* | ||
2043 | * any extent bigger than this will be considered | ||
2044 | * already defragged. Use 0 to take the kernel default | ||
2045 | * Use 1 to say every single extent must be rewritten | ||
2046 | */ | ||
2047 | __u32 extent_thresh; | ||
2048 | |||
2049 | /* | ||
2050 | * which compression method to use if turning on compression | ||
2051 | * for this defrag operation. If unspecified, zlib will | ||
2052 | * be used | ||
2053 | */ | ||
2054 | __u32 compress_type; | ||
2055 | |||
2056 | /* spare for later */ | ||
2057 | __u32 unused[4]; | ||
2058 | }; | ||
2059 | |||
2060 | |||
2061 | /* | ||
2062 | * inode items have the data typically returned from stat and store other | ||
2063 | * info about object characteristics. There is one for every file and dir in | ||
2064 | * the FS | ||
2065 | */ | ||
2066 | #define BTRFS_INODE_ITEM_KEY 1 | ||
2067 | #define BTRFS_INODE_REF_KEY 12 | ||
2068 | #define BTRFS_INODE_EXTREF_KEY 13 | ||
2069 | #define BTRFS_XATTR_ITEM_KEY 24 | ||
2070 | #define BTRFS_ORPHAN_ITEM_KEY 48 | ||
2071 | /* reserve 2-15 close to the inode for later flexibility */ | ||
2072 | |||
2073 | /* | ||
2074 | * dir items are the name -> inode pointers in a directory. There is one | ||
2075 | * for every name in a directory. | ||
2076 | */ | ||
2077 | #define BTRFS_DIR_LOG_ITEM_KEY 60 | ||
2078 | #define BTRFS_DIR_LOG_INDEX_KEY 72 | ||
2079 | #define BTRFS_DIR_ITEM_KEY 84 | ||
2080 | #define BTRFS_DIR_INDEX_KEY 96 | ||
2081 | /* | ||
2082 | * extent data is for file data | ||
2083 | */ | ||
2084 | #define BTRFS_EXTENT_DATA_KEY 108 | ||
2085 | |||
2086 | /* | ||
2087 | * extent csums are stored in a separate tree and hold csums for | ||
2088 | * an entire extent on disk. | ||
2089 | */ | ||
2090 | #define BTRFS_EXTENT_CSUM_KEY 128 | ||
2091 | |||
2092 | /* | ||
2093 | * root items point to tree roots. They are typically in the root | ||
2094 | * tree used by the super block to find all the other trees | ||
2095 | */ | ||
2096 | #define BTRFS_ROOT_ITEM_KEY 132 | ||
2097 | |||
2098 | /* | ||
2099 | * root backrefs tie subvols and snapshots to the directory entries that | ||
2100 | * reference them | ||
2101 | */ | ||
2102 | #define BTRFS_ROOT_BACKREF_KEY 144 | ||
2103 | |||
2104 | /* | ||
2105 | * root refs make a fast index for listing all of the snapshots and | ||
2106 | * subvolumes referenced by a given root. They point directly to the | ||
2107 | * directory item in the root that references the subvol | ||
2108 | */ | ||
2109 | #define BTRFS_ROOT_REF_KEY 156 | ||
2110 | |||
2111 | /* | ||
2112 | * extent items are in the extent map tree. These record which blocks | ||
2113 | * are used, and how many references there are to each block | ||
2114 | */ | ||
2115 | #define BTRFS_EXTENT_ITEM_KEY 168 | ||
2116 | |||
2117 | /* | ||
2118 | * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know | ||
2119 | * the length, so we save the level in key->offset instead of the length. | ||
2120 | */ | ||
2121 | #define BTRFS_METADATA_ITEM_KEY 169 | ||
2122 | |||
2123 | #define BTRFS_TREE_BLOCK_REF_KEY 176 | ||
2124 | |||
2125 | #define BTRFS_EXTENT_DATA_REF_KEY 178 | ||
2126 | |||
2127 | #define BTRFS_EXTENT_REF_V0_KEY 180 | ||
2128 | |||
2129 | #define BTRFS_SHARED_BLOCK_REF_KEY 182 | ||
2130 | |||
2131 | #define BTRFS_SHARED_DATA_REF_KEY 184 | ||
2132 | |||
2133 | /* | ||
2134 | * block groups give us hints into the extent allocation trees. Which | ||
2135 | * blocks are free etc etc | ||
2136 | */ | ||
2137 | #define BTRFS_BLOCK_GROUP_ITEM_KEY 192 | ||
2138 | |||
2139 | /* | ||
2140 | * Every block group is represented in the free space tree by a free space info | ||
2141 | * item, which stores some accounting information. It is keyed on | ||
2142 | * (block_group_start, FREE_SPACE_INFO, block_group_length). | ||
2143 | */ | ||
2144 | #define BTRFS_FREE_SPACE_INFO_KEY 198 | ||
2145 | |||
2146 | /* | ||
2147 | * A free space extent tracks an extent of space that is free in a block group. | ||
2148 | * It is keyed on (start, FREE_SPACE_EXTENT, length). | ||
2149 | */ | ||
2150 | #define BTRFS_FREE_SPACE_EXTENT_KEY 199 | ||
2151 | |||
2152 | /* | ||
2153 | * When a block group becomes very fragmented, we convert it to use bitmaps | ||
2154 | * instead of extents. A free space bitmap is keyed on | ||
2155 | * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with | ||
2156 | * (length / sectorsize) bits. | ||
2157 | */ | ||
2158 | #define BTRFS_FREE_SPACE_BITMAP_KEY 200 | ||
2159 | |||
2160 | #define BTRFS_DEV_EXTENT_KEY 204 | ||
2161 | #define BTRFS_DEV_ITEM_KEY 216 | ||
2162 | #define BTRFS_CHUNK_ITEM_KEY 228 | ||
2163 | |||
2164 | /* | ||
2165 | * Records the overall state of the qgroups. | ||
2166 | * There's only one instance of this key present, | ||
2167 | * (0, BTRFS_QGROUP_STATUS_KEY, 0) | ||
2168 | */ | ||
2169 | #define BTRFS_QGROUP_STATUS_KEY 240 | ||
2170 | /* | ||
2171 | * Records the currently used space of the qgroup. | ||
2172 | * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid). | ||
2173 | */ | ||
2174 | #define BTRFS_QGROUP_INFO_KEY 242 | ||
2175 | /* | ||
2176 | * Contains the user configured limits for the qgroup. | ||
2177 | * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid). | ||
2178 | */ | ||
2179 | #define BTRFS_QGROUP_LIMIT_KEY 244 | ||
2180 | /* | ||
2181 | * Records the child-parent relationship of qgroups. For | ||
2182 | * each relation, 2 keys are present: | ||
2183 | * (childid, BTRFS_QGROUP_RELATION_KEY, parentid) | ||
2184 | * (parentid, BTRFS_QGROUP_RELATION_KEY, childid) | ||
2185 | */ | ||
2186 | #define BTRFS_QGROUP_RELATION_KEY 246 | ||
2187 | |||
2188 | /* | ||
2189 | * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. | ||
2190 | */ | ||
2191 | #define BTRFS_BALANCE_ITEM_KEY 248 | ||
2192 | |||
2193 | /* | ||
2194 | * The key type for tree items that are stored persistently, but do not need to | ||
2195 | * exist for extended period of time. The items can exist in any tree. | ||
2196 | * | ||
2197 | * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data] | ||
2198 | * | ||
2199 | * Existing items: | ||
2200 | * | ||
2201 | * - balance status item | ||
2202 | * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0) | ||
2203 | */ | ||
2204 | #define BTRFS_TEMPORARY_ITEM_KEY 248 | ||
2205 | |||
2206 | /* | ||
2207 | * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY | ||
2208 | */ | ||
2209 | #define BTRFS_DEV_STATS_KEY 249 | ||
2210 | |||
2211 | /* | ||
2212 | * The key type for tree items that are stored persistently and usually exist | ||
2213 | * for a long period, eg. filesystem lifetime. The item kinds can be status | ||
2214 | * information, stats or preference values. The item can exist in any tree. | ||
2215 | * | ||
2216 | * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data] | ||
2217 | * | ||
2218 | * Existing items: | ||
2219 | * | ||
2220 | * - device statistics, store IO stats in the device tree, one key for all | ||
2221 | * stats | ||
2222 | * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0) | ||
2223 | */ | ||
2224 | #define BTRFS_PERSISTENT_ITEM_KEY 249 | ||
2225 | |||
2226 | /* | ||
2227 | * Persistantly stores the device replace state in the device tree. | ||
2228 | * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). | ||
2229 | */ | ||
2230 | #define BTRFS_DEV_REPLACE_KEY 250 | ||
2231 | |||
2232 | /* | ||
2233 | * Stores items that allow to quickly map UUIDs to something else. | ||
2234 | * These items are part of the filesystem UUID tree. | ||
2235 | * The key is built like this: | ||
2236 | * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits). | ||
2237 | */ | ||
2238 | #if BTRFS_UUID_SIZE != 16 | ||
2239 | #error "UUID items require BTRFS_UUID_SIZE == 16!" | ||
2240 | #endif | ||
2241 | #define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */ | ||
2242 | #define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to | ||
2243 | * received subvols */ | ||
2244 | |||
2245 | /* | ||
2246 | * string items are for debugging. They just store a short string of | ||
2247 | * data in the FS | ||
2248 | */ | ||
2249 | #define BTRFS_STRING_ITEM_KEY 253 | ||
2250 | |||
2251 | /* | 1260 | /* |
2252 | * Flags for mount options. | 1261 | * Flags for mount options. |
2253 | * | 1262 | * |
@@ -3499,6 +2508,12 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | |||
3499 | struct btrfs_root *root); | 2508 | struct btrfs_root *root); |
3500 | int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | 2509 | int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, |
3501 | struct btrfs_root *root); | 2510 | struct btrfs_root *root); |
2511 | void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, | ||
2512 | const u64 start); | ||
2513 | void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg); | ||
2514 | bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr); | ||
2515 | void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr); | ||
2516 | void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg); | ||
3502 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 2517 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
3503 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 2518 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
3504 | struct btrfs_root *root, unsigned long count); | 2519 | struct btrfs_root *root, unsigned long count); |
@@ -4122,6 +3137,7 @@ void btrfs_test_inode_set_ops(struct inode *inode); | |||
4122 | 3137 | ||
4123 | /* ioctl.c */ | 3138 | /* ioctl.c */ |
4124 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 3139 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
3140 | long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | ||
4125 | int btrfs_ioctl_get_supported_features(void __user *arg); | 3141 | int btrfs_ioctl_get_supported_features(void __user *arg); |
4126 | void btrfs_update_iflags(struct inode *inode); | 3142 | void btrfs_update_iflags(struct inode *inode); |
4127 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | 3143 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); |
@@ -4326,10 +3342,9 @@ static inline void assfail(char *expr, char *file, int line) | |||
4326 | #define ASSERT(expr) ((void)0) | 3342 | #define ASSERT(expr) ((void)0) |
4327 | #endif | 3343 | #endif |
4328 | 3344 | ||
4329 | #define btrfs_assert() | ||
4330 | __printf(5, 6) | 3345 | __printf(5, 6) |
4331 | __cold | 3346 | __cold |
4332 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | 3347 | void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, |
4333 | unsigned int line, int errno, const char *fmt, ...); | 3348 | unsigned int line, int errno, const char *fmt, ...); |
4334 | 3349 | ||
4335 | const char *btrfs_decode_error(int errno); | 3350 | const char *btrfs_decode_error(int errno); |
@@ -4339,6 +3354,46 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
4339 | struct btrfs_root *root, const char *function, | 3354 | struct btrfs_root *root, const char *function, |
4340 | unsigned int line, int errno); | 3355 | unsigned int line, int errno); |
4341 | 3356 | ||
3357 | /* | ||
3358 | * Call btrfs_abort_transaction as early as possible when an error condition is | ||
3359 | * detected, that way the exact line number is reported. | ||
3360 | */ | ||
3361 | #define btrfs_abort_transaction(trans, root, errno) \ | ||
3362 | do { \ | ||
3363 | /* Report first abort since mount */ \ | ||
3364 | if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ | ||
3365 | &((root)->fs_info->fs_state))) { \ | ||
3366 | WARN(1, KERN_DEBUG \ | ||
3367 | "BTRFS: Transaction aborted (error %d)\n", \ | ||
3368 | (errno)); \ | ||
3369 | } \ | ||
3370 | __btrfs_abort_transaction((trans), (root), __func__, \ | ||
3371 | __LINE__, (errno)); \ | ||
3372 | } while (0) | ||
3373 | |||
3374 | #define btrfs_handle_fs_error(fs_info, errno, fmt, args...) \ | ||
3375 | do { \ | ||
3376 | __btrfs_handle_fs_error((fs_info), __func__, __LINE__, \ | ||
3377 | (errno), fmt, ##args); \ | ||
3378 | } while (0) | ||
3379 | |||
3380 | __printf(5, 6) | ||
3381 | __cold | ||
3382 | void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | ||
3383 | unsigned int line, int errno, const char *fmt, ...); | ||
3384 | /* | ||
3385 | * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic | ||
3386 | * will panic(). Otherwise we BUG() here. | ||
3387 | */ | ||
3388 | #define btrfs_panic(fs_info, errno, fmt, args...) \ | ||
3389 | do { \ | ||
3390 | __btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \ | ||
3391 | BUG(); \ | ||
3392 | } while (0) | ||
3393 | |||
3394 | |||
3395 | /* compatibility and incompatibility defines */ | ||
3396 | |||
4342 | #define btrfs_set_fs_incompat(__fs_info, opt) \ | 3397 | #define btrfs_set_fs_incompat(__fs_info, opt) \ |
4343 | __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) | 3398 | __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) |
4344 | 3399 | ||
@@ -4455,44 +3510,6 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) | |||
4455 | return !!(btrfs_super_compat_ro_flags(disk_super) & flag); | 3510 | return !!(btrfs_super_compat_ro_flags(disk_super) & flag); |
4456 | } | 3511 | } |
4457 | 3512 | ||
4458 | /* | ||
4459 | * Call btrfs_abort_transaction as early as possible when an error condition is | ||
4460 | * detected, that way the exact line number is reported. | ||
4461 | */ | ||
4462 | #define btrfs_abort_transaction(trans, root, errno) \ | ||
4463 | do { \ | ||
4464 | /* Report first abort since mount */ \ | ||
4465 | if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ | ||
4466 | &((root)->fs_info->fs_state))) { \ | ||
4467 | WARN(1, KERN_DEBUG \ | ||
4468 | "BTRFS: Transaction aborted (error %d)\n", \ | ||
4469 | (errno)); \ | ||
4470 | } \ | ||
4471 | __btrfs_abort_transaction((trans), (root), __func__, \ | ||
4472 | __LINE__, (errno)); \ | ||
4473 | } while (0) | ||
4474 | |||
4475 | #define btrfs_std_error(fs_info, errno, fmt, args...) \ | ||
4476 | do { \ | ||
4477 | __btrfs_std_error((fs_info), __func__, __LINE__, \ | ||
4478 | (errno), fmt, ##args); \ | ||
4479 | } while (0) | ||
4480 | |||
4481 | __printf(5, 6) | ||
4482 | __cold | ||
4483 | void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | ||
4484 | unsigned int line, int errno, const char *fmt, ...); | ||
4485 | |||
4486 | /* | ||
4487 | * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic | ||
4488 | * will panic(). Otherwise we BUG() here. | ||
4489 | */ | ||
4490 | #define btrfs_panic(fs_info, errno, fmt, args...) \ | ||
4491 | do { \ | ||
4492 | __btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \ | ||
4493 | BUG(); \ | ||
4494 | } while (0) | ||
4495 | |||
4496 | /* acl.c */ | 3513 | /* acl.c */ |
4497 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 3514 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
4498 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type); | 3515 | struct posix_acl *btrfs_get_acl(struct inode *inode, int type); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 6cef0062f929..61561c2a3f96 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -134,7 +134,7 @@ again: | |||
134 | /* cached in the btrfs inode and can be accessed */ | 134 | /* cached in the btrfs inode and can be accessed */ |
135 | atomic_add(2, &node->refs); | 135 | atomic_add(2, &node->refs); |
136 | 136 | ||
137 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 137 | ret = radix_tree_preload(GFP_NOFS); |
138 | if (ret) { | 138 | if (ret) { |
139 | kmem_cache_free(delayed_node_cache, node); | 139 | kmem_cache_free(delayed_node_cache, node); |
140 | return ERR_PTR(ret); | 140 | return ERR_PTR(ret); |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 26bcb487f958..85f12e6e28d2 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -44,9 +44,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree( | |||
44 | struct btrfs_fs_info *fs_info, | 44 | struct btrfs_fs_info *fs_info, |
45 | struct btrfs_device *srcdev, | 45 | struct btrfs_device *srcdev, |
46 | struct btrfs_device *tgtdev); | 46 | struct btrfs_device *tgtdev); |
47 | static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid, | ||
48 | char *srcdev_name, | ||
49 | struct btrfs_device **device); | ||
50 | static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info); | 47 | static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info); |
51 | static int btrfs_dev_replace_kthread(void *data); | 48 | static int btrfs_dev_replace_kthread(void *data); |
52 | static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info); | 49 | static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info); |
@@ -305,8 +302,8 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) | |||
305 | dev_replace->cursor_left_last_write_of_item; | 302 | dev_replace->cursor_left_last_write_of_item; |
306 | } | 303 | } |
307 | 304 | ||
308 | int btrfs_dev_replace_start(struct btrfs_root *root, | 305 | int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name, |
309 | struct btrfs_ioctl_dev_replace_args *args) | 306 | u64 srcdevid, char *srcdev_name, int read_src) |
310 | { | 307 | { |
311 | struct btrfs_trans_handle *trans; | 308 | struct btrfs_trans_handle *trans; |
312 | struct btrfs_fs_info *fs_info = root->fs_info; | 309 | struct btrfs_fs_info *fs_info = root->fs_info; |
@@ -315,29 +312,16 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
315 | struct btrfs_device *tgt_device = NULL; | 312 | struct btrfs_device *tgt_device = NULL; |
316 | struct btrfs_device *src_device = NULL; | 313 | struct btrfs_device *src_device = NULL; |
317 | 314 | ||
318 | switch (args->start.cont_reading_from_srcdev_mode) { | ||
319 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: | ||
320 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: | ||
321 | break; | ||
322 | default: | ||
323 | return -EINVAL; | ||
324 | } | ||
325 | |||
326 | if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || | ||
327 | args->start.tgtdev_name[0] == '\0') | ||
328 | return -EINVAL; | ||
329 | |||
330 | /* the disk copy procedure reuses the scrub code */ | 315 | /* the disk copy procedure reuses the scrub code */ |
331 | mutex_lock(&fs_info->volume_mutex); | 316 | mutex_lock(&fs_info->volume_mutex); |
332 | ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, | 317 | ret = btrfs_find_device_by_devspec(root, srcdevid, |
333 | args->start.srcdev_name, | 318 | srcdev_name, &src_device); |
334 | &src_device); | ||
335 | if (ret) { | 319 | if (ret) { |
336 | mutex_unlock(&fs_info->volume_mutex); | 320 | mutex_unlock(&fs_info->volume_mutex); |
337 | return ret; | 321 | return ret; |
338 | } | 322 | } |
339 | 323 | ||
340 | ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, | 324 | ret = btrfs_init_dev_replace_tgtdev(root, tgtdev_name, |
341 | src_device, &tgt_device); | 325 | src_device, &tgt_device); |
342 | mutex_unlock(&fs_info->volume_mutex); | 326 | mutex_unlock(&fs_info->volume_mutex); |
343 | if (ret) | 327 | if (ret) |
@@ -364,18 +348,17 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
364 | break; | 348 | break; |
365 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | 349 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: |
366 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | 350 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: |
367 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; | 351 | ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; |
368 | goto leave; | 352 | goto leave; |
369 | } | 353 | } |
370 | 354 | ||
371 | dev_replace->cont_reading_from_srcdev_mode = | 355 | dev_replace->cont_reading_from_srcdev_mode = read_src; |
372 | args->start.cont_reading_from_srcdev_mode; | ||
373 | WARN_ON(!src_device); | 356 | WARN_ON(!src_device); |
374 | dev_replace->srcdev = src_device; | 357 | dev_replace->srcdev = src_device; |
375 | WARN_ON(!tgt_device); | 358 | WARN_ON(!tgt_device); |
376 | dev_replace->tgtdev = tgt_device; | 359 | dev_replace->tgtdev = tgt_device; |
377 | 360 | ||
378 | btrfs_info_in_rcu(root->fs_info, | 361 | btrfs_info_in_rcu(fs_info, |
379 | "dev_replace from %s (devid %llu) to %s started", | 362 | "dev_replace from %s (devid %llu) to %s started", |
380 | src_device->missing ? "<missing disk>" : | 363 | src_device->missing ? "<missing disk>" : |
381 | rcu_str_deref(src_device->name), | 364 | rcu_str_deref(src_device->name), |
@@ -396,14 +379,13 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
396 | dev_replace->item_needs_writeback = 1; | 379 | dev_replace->item_needs_writeback = 1; |
397 | atomic64_set(&dev_replace->num_write_errors, 0); | 380 | atomic64_set(&dev_replace->num_write_errors, 0); |
398 | atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); | 381 | atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); |
399 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | ||
400 | btrfs_dev_replace_unlock(dev_replace, 1); | 382 | btrfs_dev_replace_unlock(dev_replace, 1); |
401 | 383 | ||
402 | ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); | 384 | ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); |
403 | if (ret) | 385 | if (ret) |
404 | btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); | 386 | btrfs_err(fs_info, "kobj add dev failed %d\n", ret); |
405 | 387 | ||
406 | btrfs_wait_ordered_roots(root->fs_info, -1); | 388 | btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1); |
407 | 389 | ||
408 | /* force writing the updated state information to disk */ | 390 | /* force writing the updated state information to disk */ |
409 | trans = btrfs_start_transaction(root, 0); | 391 | trans = btrfs_start_transaction(root, 0); |
@@ -421,11 +403,9 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
421 | btrfs_device_get_total_bytes(src_device), | 403 | btrfs_device_get_total_bytes(src_device), |
422 | &dev_replace->scrub_progress, 0, 1); | 404 | &dev_replace->scrub_progress, 0, 1); |
423 | 405 | ||
424 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); | 406 | ret = btrfs_dev_replace_finishing(fs_info, ret); |
425 | /* don't warn if EINPROGRESS, someone else might be running scrub */ | ||
426 | if (ret == -EINPROGRESS) { | 407 | if (ret == -EINPROGRESS) { |
427 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; | 408 | ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; |
428 | ret = 0; | ||
429 | } else { | 409 | } else { |
430 | WARN_ON(ret); | 410 | WARN_ON(ret); |
431 | } | 411 | } |
@@ -440,6 +420,35 @@ leave: | |||
440 | return ret; | 420 | return ret; |
441 | } | 421 | } |
442 | 422 | ||
423 | int btrfs_dev_replace_by_ioctl(struct btrfs_root *root, | ||
424 | struct btrfs_ioctl_dev_replace_args *args) | ||
425 | { | ||
426 | int ret; | ||
427 | |||
428 | switch (args->start.cont_reading_from_srcdev_mode) { | ||
429 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: | ||
430 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: | ||
431 | break; | ||
432 | default: | ||
433 | return -EINVAL; | ||
434 | } | ||
435 | |||
436 | if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || | ||
437 | args->start.tgtdev_name[0] == '\0') | ||
438 | return -EINVAL; | ||
439 | |||
440 | ret = btrfs_dev_replace_start(root, args->start.tgtdev_name, | ||
441 | args->start.srcdevid, | ||
442 | args->start.srcdev_name, | ||
443 | args->start.cont_reading_from_srcdev_mode); | ||
444 | args->result = ret; | ||
445 | /* don't warn if EINPROGRESS, someone else might be running scrub */ | ||
446 | if (ret == BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS) | ||
447 | ret = 0; | ||
448 | |||
449 | return ret; | ||
450 | } | ||
451 | |||
443 | /* | 452 | /* |
444 | * blocked until all flighting bios are finished. | 453 | * blocked until all flighting bios are finished. |
445 | */ | 454 | */ |
@@ -495,7 +504,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
495 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 504 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
496 | return ret; | 505 | return ret; |
497 | } | 506 | } |
498 | btrfs_wait_ordered_roots(root->fs_info, -1); | 507 | btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1); |
499 | 508 | ||
500 | trans = btrfs_start_transaction(root, 0); | 509 | trans = btrfs_start_transaction(root, 0); |
501 | if (IS_ERR(trans)) { | 510 | if (IS_ERR(trans)) { |
@@ -560,10 +569,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
560 | ASSERT(list_empty(&src_device->resized_list)); | 569 | ASSERT(list_empty(&src_device->resized_list)); |
561 | tgt_device->commit_total_bytes = src_device->commit_total_bytes; | 570 | tgt_device->commit_total_bytes = src_device->commit_total_bytes; |
562 | tgt_device->commit_bytes_used = src_device->bytes_used; | 571 | tgt_device->commit_bytes_used = src_device->bytes_used; |
563 | if (fs_info->sb->s_bdev == src_device->bdev) | 572 | |
564 | fs_info->sb->s_bdev = tgt_device->bdev; | 573 | btrfs_assign_next_active_device(fs_info, src_device, tgt_device); |
565 | if (fs_info->fs_devices->latest_bdev == src_device->bdev) | 574 | |
566 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | ||
567 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 575 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
568 | fs_info->fs_devices->rw_devices++; | 576 | fs_info->fs_devices->rw_devices++; |
569 | 577 | ||
@@ -626,25 +634,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree( | |||
626 | write_unlock(&em_tree->lock); | 634 | write_unlock(&em_tree->lock); |
627 | } | 635 | } |
628 | 636 | ||
629 | static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid, | ||
630 | char *srcdev_name, | ||
631 | struct btrfs_device **device) | ||
632 | { | ||
633 | int ret; | ||
634 | |||
635 | if (srcdevid) { | ||
636 | ret = 0; | ||
637 | *device = btrfs_find_device(root->fs_info, srcdevid, NULL, | ||
638 | NULL); | ||
639 | if (!*device) | ||
640 | ret = -ENOENT; | ||
641 | } else { | ||
642 | ret = btrfs_find_device_missing_or_by_path(root, srcdev_name, | ||
643 | device); | ||
644 | } | ||
645 | return ret; | ||
646 | } | ||
647 | |||
648 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | 637 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, |
649 | struct btrfs_ioctl_dev_replace_args *args) | 638 | struct btrfs_ioctl_dev_replace_args *args) |
650 | { | 639 | { |
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h index 29e3ef5f96bd..e922b42d91df 100644 --- a/fs/btrfs/dev-replace.h +++ b/fs/btrfs/dev-replace.h | |||
@@ -25,8 +25,10 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info); | |||
25 | int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | 25 | int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, |
26 | struct btrfs_fs_info *fs_info); | 26 | struct btrfs_fs_info *fs_info); |
27 | void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info); | 27 | void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info); |
28 | int btrfs_dev_replace_start(struct btrfs_root *root, | 28 | int btrfs_dev_replace_by_ioctl(struct btrfs_root *root, |
29 | struct btrfs_ioctl_dev_replace_args *args); | 29 | struct btrfs_ioctl_dev_replace_args *args); |
30 | int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name, | ||
31 | u64 srcdevid, char *srcdev_name, int read_src); | ||
30 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | 32 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, |
31 | struct btrfs_ioctl_dev_replace_args *args); | 33 | struct btrfs_ioctl_dev_replace_args *args); |
32 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, | 34 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4e47849d7427..91d123938cef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1640,7 +1640,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, | |||
1640 | { | 1640 | { |
1641 | int ret; | 1641 | int ret; |
1642 | 1642 | ||
1643 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1643 | ret = radix_tree_preload(GFP_NOFS); |
1644 | if (ret) | 1644 | if (ret) |
1645 | return ret; | 1645 | return ret; |
1646 | 1646 | ||
@@ -2417,7 +2417,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, | |||
2417 | /* returns with log_tree_root freed on success */ | 2417 | /* returns with log_tree_root freed on success */ |
2418 | ret = btrfs_recover_log_trees(log_tree_root); | 2418 | ret = btrfs_recover_log_trees(log_tree_root); |
2419 | if (ret) { | 2419 | if (ret) { |
2420 | btrfs_std_error(tree_root->fs_info, ret, | 2420 | btrfs_handle_fs_error(tree_root->fs_info, ret, |
2421 | "Failed to recover log tree"); | 2421 | "Failed to recover log tree"); |
2422 | free_extent_buffer(log_tree_root->node); | 2422 | free_extent_buffer(log_tree_root->node); |
2423 | kfree(log_tree_root); | 2423 | kfree(log_tree_root); |
@@ -2517,6 +2517,7 @@ int open_ctree(struct super_block *sb, | |||
2517 | int num_backups_tried = 0; | 2517 | int num_backups_tried = 0; |
2518 | int backup_index = 0; | 2518 | int backup_index = 0; |
2519 | int max_active; | 2519 | int max_active; |
2520 | bool cleaner_mutex_locked = false; | ||
2520 | 2521 | ||
2521 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); | 2522 | tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); |
2522 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); | 2523 | chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); |
@@ -2713,7 +2714,7 @@ int open_ctree(struct super_block *sb, | |||
2713 | * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). | 2714 | * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). |
2714 | */ | 2715 | */ |
2715 | if (btrfs_check_super_csum(bh->b_data)) { | 2716 | if (btrfs_check_super_csum(bh->b_data)) { |
2716 | printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); | 2717 | btrfs_err(fs_info, "superblock checksum mismatch"); |
2717 | err = -EINVAL; | 2718 | err = -EINVAL; |
2718 | brelse(bh); | 2719 | brelse(bh); |
2719 | goto fail_alloc; | 2720 | goto fail_alloc; |
@@ -2733,7 +2734,7 @@ int open_ctree(struct super_block *sb, | |||
2733 | 2734 | ||
2734 | ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 2735 | ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
2735 | if (ret) { | 2736 | if (ret) { |
2736 | printk(KERN_ERR "BTRFS: superblock contains fatal errors\n"); | 2737 | btrfs_err(fs_info, "superblock contains fatal errors"); |
2737 | err = -EINVAL; | 2738 | err = -EINVAL; |
2738 | goto fail_alloc; | 2739 | goto fail_alloc; |
2739 | } | 2740 | } |
@@ -2768,9 +2769,9 @@ int open_ctree(struct super_block *sb, | |||
2768 | features = btrfs_super_incompat_flags(disk_super) & | 2769 | features = btrfs_super_incompat_flags(disk_super) & |
2769 | ~BTRFS_FEATURE_INCOMPAT_SUPP; | 2770 | ~BTRFS_FEATURE_INCOMPAT_SUPP; |
2770 | if (features) { | 2771 | if (features) { |
2771 | printk(KERN_ERR "BTRFS: couldn't mount because of " | 2772 | btrfs_err(fs_info, |
2772 | "unsupported optional features (%Lx).\n", | 2773 | "cannot mount because of unsupported optional features (%llx)", |
2773 | features); | 2774 | features); |
2774 | err = -EINVAL; | 2775 | err = -EINVAL; |
2775 | goto fail_alloc; | 2776 | goto fail_alloc; |
2776 | } | 2777 | } |
@@ -2781,7 +2782,7 @@ int open_ctree(struct super_block *sb, | |||
2781 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | 2782 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
2782 | 2783 | ||
2783 | if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) | 2784 | if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) |
2784 | printk(KERN_INFO "BTRFS: has skinny extents\n"); | 2785 | btrfs_info(fs_info, "has skinny extents"); |
2785 | 2786 | ||
2786 | /* | 2787 | /* |
2787 | * flag our filesystem as having big metadata blocks if | 2788 | * flag our filesystem as having big metadata blocks if |
@@ -2789,7 +2790,8 @@ int open_ctree(struct super_block *sb, | |||
2789 | */ | 2790 | */ |
2790 | if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { | 2791 | if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { |
2791 | if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) | 2792 | if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) |
2792 | printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); | 2793 | btrfs_info(fs_info, |
2794 | "flagging fs with big metadata feature"); | ||
2793 | features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; | 2795 | features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; |
2794 | } | 2796 | } |
2795 | 2797 | ||
@@ -2805,9 +2807,9 @@ int open_ctree(struct super_block *sb, | |||
2805 | */ | 2807 | */ |
2806 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && | 2808 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && |
2807 | (sectorsize != nodesize)) { | 2809 | (sectorsize != nodesize)) { |
2808 | printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes " | 2810 | btrfs_err(fs_info, |
2809 | "are not allowed for mixed block groups on %s\n", | 2811 | "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", |
2810 | sb->s_id); | 2812 | nodesize, sectorsize); |
2811 | goto fail_alloc; | 2813 | goto fail_alloc; |
2812 | } | 2814 | } |
2813 | 2815 | ||
@@ -2820,8 +2822,8 @@ int open_ctree(struct super_block *sb, | |||
2820 | features = btrfs_super_compat_ro_flags(disk_super) & | 2822 | features = btrfs_super_compat_ro_flags(disk_super) & |
2821 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 2823 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
2822 | if (!(sb->s_flags & MS_RDONLY) && features) { | 2824 | if (!(sb->s_flags & MS_RDONLY) && features) { |
2823 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " | 2825 | btrfs_err(fs_info, |
2824 | "unsupported option features (%Lx).\n", | 2826 | "cannot mount read-write because of unsupported optional features (%llx)", |
2825 | features); | 2827 | features); |
2826 | err = -EINVAL; | 2828 | err = -EINVAL; |
2827 | goto fail_alloc; | 2829 | goto fail_alloc; |
@@ -2850,8 +2852,7 @@ int open_ctree(struct super_block *sb, | |||
2850 | ret = btrfs_read_sys_array(tree_root); | 2852 | ret = btrfs_read_sys_array(tree_root); |
2851 | mutex_unlock(&fs_info->chunk_mutex); | 2853 | mutex_unlock(&fs_info->chunk_mutex); |
2852 | if (ret) { | 2854 | if (ret) { |
2853 | printk(KERN_ERR "BTRFS: failed to read the system " | 2855 | btrfs_err(fs_info, "failed to read the system array: %d", ret); |
2854 | "array on %s\n", sb->s_id); | ||
2855 | goto fail_sb_buffer; | 2856 | goto fail_sb_buffer; |
2856 | } | 2857 | } |
2857 | 2858 | ||
@@ -2865,8 +2866,7 @@ int open_ctree(struct super_block *sb, | |||
2865 | generation); | 2866 | generation); |
2866 | if (IS_ERR(chunk_root->node) || | 2867 | if (IS_ERR(chunk_root->node) || |
2867 | !extent_buffer_uptodate(chunk_root->node)) { | 2868 | !extent_buffer_uptodate(chunk_root->node)) { |
2868 | printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", | 2869 | btrfs_err(fs_info, "failed to read chunk root"); |
2869 | sb->s_id); | ||
2870 | if (!IS_ERR(chunk_root->node)) | 2870 | if (!IS_ERR(chunk_root->node)) |
2871 | free_extent_buffer(chunk_root->node); | 2871 | free_extent_buffer(chunk_root->node); |
2872 | chunk_root->node = NULL; | 2872 | chunk_root->node = NULL; |
@@ -2880,8 +2880,7 @@ int open_ctree(struct super_block *sb, | |||
2880 | 2880 | ||
2881 | ret = btrfs_read_chunk_tree(chunk_root); | 2881 | ret = btrfs_read_chunk_tree(chunk_root); |
2882 | if (ret) { | 2882 | if (ret) { |
2883 | printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n", | 2883 | btrfs_err(fs_info, "failed to read chunk tree: %d", ret); |
2884 | sb->s_id); | ||
2885 | goto fail_tree_roots; | 2884 | goto fail_tree_roots; |
2886 | } | 2885 | } |
2887 | 2886 | ||
@@ -2892,8 +2891,7 @@ int open_ctree(struct super_block *sb, | |||
2892 | btrfs_close_extra_devices(fs_devices, 0); | 2891 | btrfs_close_extra_devices(fs_devices, 0); |
2893 | 2892 | ||
2894 | if (!fs_devices->latest_bdev) { | 2893 | if (!fs_devices->latest_bdev) { |
2895 | printk(KERN_ERR "BTRFS: failed to read devices on %s\n", | 2894 | btrfs_err(fs_info, "failed to read devices"); |
2896 | sb->s_id); | ||
2897 | goto fail_tree_roots; | 2895 | goto fail_tree_roots; |
2898 | } | 2896 | } |
2899 | 2897 | ||
@@ -2905,8 +2903,7 @@ retry_root_backup: | |||
2905 | generation); | 2903 | generation); |
2906 | if (IS_ERR(tree_root->node) || | 2904 | if (IS_ERR(tree_root->node) || |
2907 | !extent_buffer_uptodate(tree_root->node)) { | 2905 | !extent_buffer_uptodate(tree_root->node)) { |
2908 | printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", | 2906 | btrfs_warn(fs_info, "failed to read tree root"); |
2909 | sb->s_id); | ||
2910 | if (!IS_ERR(tree_root->node)) | 2907 | if (!IS_ERR(tree_root->node)) |
2911 | free_extent_buffer(tree_root->node); | 2908 | free_extent_buffer(tree_root->node); |
2912 | tree_root->node = NULL; | 2909 | tree_root->node = NULL; |
@@ -2938,20 +2935,19 @@ retry_root_backup: | |||
2938 | 2935 | ||
2939 | ret = btrfs_recover_balance(fs_info); | 2936 | ret = btrfs_recover_balance(fs_info); |
2940 | if (ret) { | 2937 | if (ret) { |
2941 | printk(KERN_ERR "BTRFS: failed to recover balance\n"); | 2938 | btrfs_err(fs_info, "failed to recover balance: %d", ret); |
2942 | goto fail_block_groups; | 2939 | goto fail_block_groups; |
2943 | } | 2940 | } |
2944 | 2941 | ||
2945 | ret = btrfs_init_dev_stats(fs_info); | 2942 | ret = btrfs_init_dev_stats(fs_info); |
2946 | if (ret) { | 2943 | if (ret) { |
2947 | printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n", | 2944 | btrfs_err(fs_info, "failed to init dev_stats: %d", ret); |
2948 | ret); | ||
2949 | goto fail_block_groups; | 2945 | goto fail_block_groups; |
2950 | } | 2946 | } |
2951 | 2947 | ||
2952 | ret = btrfs_init_dev_replace(fs_info); | 2948 | ret = btrfs_init_dev_replace(fs_info); |
2953 | if (ret) { | 2949 | if (ret) { |
2954 | pr_err("BTRFS: failed to init dev_replace: %d\n", ret); | 2950 | btrfs_err(fs_info, "failed to init dev_replace: %d", ret); |
2955 | goto fail_block_groups; | 2951 | goto fail_block_groups; |
2956 | } | 2952 | } |
2957 | 2953 | ||
@@ -2959,31 +2955,33 @@ retry_root_backup: | |||
2959 | 2955 | ||
2960 | ret = btrfs_sysfs_add_fsid(fs_devices, NULL); | 2956 | ret = btrfs_sysfs_add_fsid(fs_devices, NULL); |
2961 | if (ret) { | 2957 | if (ret) { |
2962 | pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret); | 2958 | btrfs_err(fs_info, "failed to init sysfs fsid interface: %d", |
2959 | ret); | ||
2963 | goto fail_block_groups; | 2960 | goto fail_block_groups; |
2964 | } | 2961 | } |
2965 | 2962 | ||
2966 | ret = btrfs_sysfs_add_device(fs_devices); | 2963 | ret = btrfs_sysfs_add_device(fs_devices); |
2967 | if (ret) { | 2964 | if (ret) { |
2968 | pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret); | 2965 | btrfs_err(fs_info, "failed to init sysfs device interface: %d", |
2966 | ret); | ||
2969 | goto fail_fsdev_sysfs; | 2967 | goto fail_fsdev_sysfs; |
2970 | } | 2968 | } |
2971 | 2969 | ||
2972 | ret = btrfs_sysfs_add_mounted(fs_info); | 2970 | ret = btrfs_sysfs_add_mounted(fs_info); |
2973 | if (ret) { | 2971 | if (ret) { |
2974 | pr_err("BTRFS: failed to init sysfs interface: %d\n", ret); | 2972 | btrfs_err(fs_info, "failed to init sysfs interface: %d", ret); |
2975 | goto fail_fsdev_sysfs; | 2973 | goto fail_fsdev_sysfs; |
2976 | } | 2974 | } |
2977 | 2975 | ||
2978 | ret = btrfs_init_space_info(fs_info); | 2976 | ret = btrfs_init_space_info(fs_info); |
2979 | if (ret) { | 2977 | if (ret) { |
2980 | printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret); | 2978 | btrfs_err(fs_info, "failed to initialize space info: %d", ret); |
2981 | goto fail_sysfs; | 2979 | goto fail_sysfs; |
2982 | } | 2980 | } |
2983 | 2981 | ||
2984 | ret = btrfs_read_block_groups(fs_info->extent_root); | 2982 | ret = btrfs_read_block_groups(fs_info->extent_root); |
2985 | if (ret) { | 2983 | if (ret) { |
2986 | printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); | 2984 | btrfs_err(fs_info, "failed to read block groups: %d", ret); |
2987 | goto fail_sysfs; | 2985 | goto fail_sysfs; |
2988 | } | 2986 | } |
2989 | fs_info->num_tolerated_disk_barrier_failures = | 2987 | fs_info->num_tolerated_disk_barrier_failures = |
@@ -2991,12 +2989,20 @@ retry_root_backup: | |||
2991 | if (fs_info->fs_devices->missing_devices > | 2989 | if (fs_info->fs_devices->missing_devices > |
2992 | fs_info->num_tolerated_disk_barrier_failures && | 2990 | fs_info->num_tolerated_disk_barrier_failures && |
2993 | !(sb->s_flags & MS_RDONLY)) { | 2991 | !(sb->s_flags & MS_RDONLY)) { |
2994 | pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n", | 2992 | btrfs_warn(fs_info, |
2993 | "missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed", | ||
2995 | fs_info->fs_devices->missing_devices, | 2994 | fs_info->fs_devices->missing_devices, |
2996 | fs_info->num_tolerated_disk_barrier_failures); | 2995 | fs_info->num_tolerated_disk_barrier_failures); |
2997 | goto fail_sysfs; | 2996 | goto fail_sysfs; |
2998 | } | 2997 | } |
2999 | 2998 | ||
2999 | /* | ||
3000 | * Hold the cleaner_mutex thread here so that we don't block | ||
3001 | * for a long time on btrfs_recover_relocation. cleaner_kthread | ||
3002 | * will wait for us to finish mounting the filesystem. | ||
3003 | */ | ||
3004 | mutex_lock(&fs_info->cleaner_mutex); | ||
3005 | cleaner_mutex_locked = true; | ||
3000 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 3006 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
3001 | "btrfs-cleaner"); | 3007 | "btrfs-cleaner"); |
3002 | if (IS_ERR(fs_info->cleaner_kthread)) | 3008 | if (IS_ERR(fs_info->cleaner_kthread)) |
@@ -3011,8 +3017,7 @@ retry_root_backup: | |||
3011 | if (!btrfs_test_opt(tree_root, SSD) && | 3017 | if (!btrfs_test_opt(tree_root, SSD) && |
3012 | !btrfs_test_opt(tree_root, NOSSD) && | 3018 | !btrfs_test_opt(tree_root, NOSSD) && |
3013 | !fs_info->fs_devices->rotating) { | 3019 | !fs_info->fs_devices->rotating) { |
3014 | printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD " | 3020 | btrfs_info(fs_info, "detected SSD devices, enabling SSD mode"); |
3015 | "mode\n"); | ||
3016 | btrfs_set_opt(fs_info->mount_opt, SSD); | 3021 | btrfs_set_opt(fs_info->mount_opt, SSD); |
3017 | } | 3022 | } |
3018 | 3023 | ||
@@ -3030,8 +3035,9 @@ retry_root_backup: | |||
3030 | 1 : 0, | 3035 | 1 : 0, |
3031 | fs_info->check_integrity_print_mask); | 3036 | fs_info->check_integrity_print_mask); |
3032 | if (ret) | 3037 | if (ret) |
3033 | printk(KERN_WARNING "BTRFS: failed to initialize" | 3038 | btrfs_warn(fs_info, |
3034 | " integrity check module %s\n", sb->s_id); | 3039 | "failed to initialize integrity check module: %d", |
3040 | ret); | ||
3035 | } | 3041 | } |
3036 | #endif | 3042 | #endif |
3037 | ret = btrfs_read_qgroup_config(fs_info); | 3043 | ret = btrfs_read_qgroup_config(fs_info); |
@@ -3056,17 +3062,17 @@ retry_root_backup: | |||
3056 | ret = btrfs_cleanup_fs_roots(fs_info); | 3062 | ret = btrfs_cleanup_fs_roots(fs_info); |
3057 | if (ret) | 3063 | if (ret) |
3058 | goto fail_qgroup; | 3064 | goto fail_qgroup; |
3059 | 3065 | /* We locked cleaner_mutex before creating cleaner_kthread. */ | |
3060 | mutex_lock(&fs_info->cleaner_mutex); | ||
3061 | ret = btrfs_recover_relocation(tree_root); | 3066 | ret = btrfs_recover_relocation(tree_root); |
3062 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3063 | if (ret < 0) { | 3067 | if (ret < 0) { |
3064 | printk(KERN_WARNING | 3068 | btrfs_warn(fs_info, "failed to recover relocation: %d", |
3065 | "BTRFS: failed to recover relocation\n"); | 3069 | ret); |
3066 | err = -EINVAL; | 3070 | err = -EINVAL; |
3067 | goto fail_qgroup; | 3071 | goto fail_qgroup; |
3068 | } | 3072 | } |
3069 | } | 3073 | } |
3074 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3075 | cleaner_mutex_locked = false; | ||
3070 | 3076 | ||
3071 | location.objectid = BTRFS_FS_TREE_OBJECTID; | 3077 | location.objectid = BTRFS_FS_TREE_OBJECTID; |
3072 | location.type = BTRFS_ROOT_ITEM_KEY; | 3078 | location.type = BTRFS_ROOT_ITEM_KEY; |
@@ -3083,11 +3089,11 @@ retry_root_backup: | |||
3083 | 3089 | ||
3084 | if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && | 3090 | if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && |
3085 | !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { | 3091 | !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { |
3086 | pr_info("BTRFS: creating free space tree\n"); | 3092 | btrfs_info(fs_info, "creating free space tree"); |
3087 | ret = btrfs_create_free_space_tree(fs_info); | 3093 | ret = btrfs_create_free_space_tree(fs_info); |
3088 | if (ret) { | 3094 | if (ret) { |
3089 | pr_warn("BTRFS: failed to create free space tree %d\n", | 3095 | btrfs_warn(fs_info, |
3090 | ret); | 3096 | "failed to create free space tree: %d", ret); |
3091 | close_ctree(tree_root); | 3097 | close_ctree(tree_root); |
3092 | return ret; | 3098 | return ret; |
3093 | } | 3099 | } |
@@ -3104,14 +3110,14 @@ retry_root_backup: | |||
3104 | 3110 | ||
3105 | ret = btrfs_resume_balance_async(fs_info); | 3111 | ret = btrfs_resume_balance_async(fs_info); |
3106 | if (ret) { | 3112 | if (ret) { |
3107 | printk(KERN_WARNING "BTRFS: failed to resume balance\n"); | 3113 | btrfs_warn(fs_info, "failed to resume balance: %d", ret); |
3108 | close_ctree(tree_root); | 3114 | close_ctree(tree_root); |
3109 | return ret; | 3115 | return ret; |
3110 | } | 3116 | } |
3111 | 3117 | ||
3112 | ret = btrfs_resume_dev_replace_async(fs_info); | 3118 | ret = btrfs_resume_dev_replace_async(fs_info); |
3113 | if (ret) { | 3119 | if (ret) { |
3114 | pr_warn("BTRFS: failed to resume dev_replace\n"); | 3120 | btrfs_warn(fs_info, "failed to resume device replace: %d", ret); |
3115 | close_ctree(tree_root); | 3121 | close_ctree(tree_root); |
3116 | return ret; | 3122 | return ret; |
3117 | } | 3123 | } |
@@ -3120,33 +3126,33 @@ retry_root_backup: | |||
3120 | 3126 | ||
3121 | if (btrfs_test_opt(tree_root, CLEAR_CACHE) && | 3127 | if (btrfs_test_opt(tree_root, CLEAR_CACHE) && |
3122 | btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { | 3128 | btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { |
3123 | pr_info("BTRFS: clearing free space tree\n"); | 3129 | btrfs_info(fs_info, "clearing free space tree"); |
3124 | ret = btrfs_clear_free_space_tree(fs_info); | 3130 | ret = btrfs_clear_free_space_tree(fs_info); |
3125 | if (ret) { | 3131 | if (ret) { |
3126 | pr_warn("BTRFS: failed to clear free space tree %d\n", | 3132 | btrfs_warn(fs_info, |
3127 | ret); | 3133 | "failed to clear free space tree: %d", ret); |
3128 | close_ctree(tree_root); | 3134 | close_ctree(tree_root); |
3129 | return ret; | 3135 | return ret; |
3130 | } | 3136 | } |
3131 | } | 3137 | } |
3132 | 3138 | ||
3133 | if (!fs_info->uuid_root) { | 3139 | if (!fs_info->uuid_root) { |
3134 | pr_info("BTRFS: creating UUID tree\n"); | 3140 | btrfs_info(fs_info, "creating UUID tree"); |
3135 | ret = btrfs_create_uuid_tree(fs_info); | 3141 | ret = btrfs_create_uuid_tree(fs_info); |
3136 | if (ret) { | 3142 | if (ret) { |
3137 | pr_warn("BTRFS: failed to create the UUID tree %d\n", | 3143 | btrfs_warn(fs_info, |
3138 | ret); | 3144 | "failed to create the UUID tree: %d", ret); |
3139 | close_ctree(tree_root); | 3145 | close_ctree(tree_root); |
3140 | return ret; | 3146 | return ret; |
3141 | } | 3147 | } |
3142 | } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) || | 3148 | } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) || |
3143 | fs_info->generation != | 3149 | fs_info->generation != |
3144 | btrfs_super_uuid_tree_generation(disk_super)) { | 3150 | btrfs_super_uuid_tree_generation(disk_super)) { |
3145 | pr_info("BTRFS: checking UUID tree\n"); | 3151 | btrfs_info(fs_info, "checking UUID tree"); |
3146 | ret = btrfs_check_uuid_tree(fs_info); | 3152 | ret = btrfs_check_uuid_tree(fs_info); |
3147 | if (ret) { | 3153 | if (ret) { |
3148 | pr_warn("BTRFS: failed to check the UUID tree %d\n", | 3154 | btrfs_warn(fs_info, |
3149 | ret); | 3155 | "failed to check the UUID tree: %d", ret); |
3150 | close_ctree(tree_root); | 3156 | close_ctree(tree_root); |
3151 | return ret; | 3157 | return ret; |
3152 | } | 3158 | } |
@@ -3180,6 +3186,10 @@ fail_cleaner: | |||
3180 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); | 3186 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); |
3181 | 3187 | ||
3182 | fail_sysfs: | 3188 | fail_sysfs: |
3189 | if (cleaner_mutex_locked) { | ||
3190 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3191 | cleaner_mutex_locked = false; | ||
3192 | } | ||
3183 | btrfs_sysfs_remove_mounted(fs_info); | 3193 | btrfs_sysfs_remove_mounted(fs_info); |
3184 | 3194 | ||
3185 | fail_fsdev_sysfs: | 3195 | fail_fsdev_sysfs: |
@@ -3646,7 +3656,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
3646 | if (ret) { | 3656 | if (ret) { |
3647 | mutex_unlock( | 3657 | mutex_unlock( |
3648 | &root->fs_info->fs_devices->device_list_mutex); | 3658 | &root->fs_info->fs_devices->device_list_mutex); |
3649 | btrfs_std_error(root->fs_info, ret, | 3659 | btrfs_handle_fs_error(root->fs_info, ret, |
3650 | "errors while submitting device barriers."); | 3660 | "errors while submitting device barriers."); |
3651 | return ret; | 3661 | return ret; |
3652 | } | 3662 | } |
@@ -3686,7 +3696,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
3686 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 3696 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
3687 | 3697 | ||
3688 | /* FUA is masked off if unsupported and can't be the reason */ | 3698 | /* FUA is masked off if unsupported and can't be the reason */ |
3689 | btrfs_std_error(root->fs_info, -EIO, | 3699 | btrfs_handle_fs_error(root->fs_info, -EIO, |
3690 | "%d errors while writing supers", total_errors); | 3700 | "%d errors while writing supers", total_errors); |
3691 | return -EIO; | 3701 | return -EIO; |
3692 | } | 3702 | } |
@@ -3704,7 +3714,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
3704 | } | 3714 | } |
3705 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 3715 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
3706 | if (total_errors > max_errors) { | 3716 | if (total_errors > max_errors) { |
3707 | btrfs_std_error(root->fs_info, -EIO, | 3717 | btrfs_handle_fs_error(root->fs_info, -EIO, |
3708 | "%d errors while writing supers", total_errors); | 3718 | "%d errors while writing supers", total_errors); |
3709 | return -EIO; | 3719 | return -EIO; |
3710 | } | 3720 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 84e060eb0de8..9424864fd01a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3824,6 +3824,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | |||
3824 | return readonly; | 3824 | return readonly; |
3825 | } | 3825 | } |
3826 | 3826 | ||
3827 | bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) | ||
3828 | { | ||
3829 | struct btrfs_block_group_cache *bg; | ||
3830 | bool ret = true; | ||
3831 | |||
3832 | bg = btrfs_lookup_block_group(fs_info, bytenr); | ||
3833 | if (!bg) | ||
3834 | return false; | ||
3835 | |||
3836 | spin_lock(&bg->lock); | ||
3837 | if (bg->ro) | ||
3838 | ret = false; | ||
3839 | else | ||
3840 | atomic_inc(&bg->nocow_writers); | ||
3841 | spin_unlock(&bg->lock); | ||
3842 | |||
3843 | /* no put on block group, done by btrfs_dec_nocow_writers */ | ||
3844 | if (!ret) | ||
3845 | btrfs_put_block_group(bg); | ||
3846 | |||
3847 | return ret; | ||
3848 | |||
3849 | } | ||
3850 | |||
3851 | void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) | ||
3852 | { | ||
3853 | struct btrfs_block_group_cache *bg; | ||
3854 | |||
3855 | bg = btrfs_lookup_block_group(fs_info, bytenr); | ||
3856 | ASSERT(bg); | ||
3857 | if (atomic_dec_and_test(&bg->nocow_writers)) | ||
3858 | wake_up_atomic_t(&bg->nocow_writers); | ||
3859 | /* | ||
3860 | * Once for our lookup and once for the lookup done by a previous call | ||
3861 | * to btrfs_inc_nocow_writers() | ||
3862 | */ | ||
3863 | btrfs_put_block_group(bg); | ||
3864 | btrfs_put_block_group(bg); | ||
3865 | } | ||
3866 | |||
3867 | static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a) | ||
3868 | { | ||
3869 | schedule(); | ||
3870 | return 0; | ||
3871 | } | ||
3872 | |||
3873 | void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) | ||
3874 | { | ||
3875 | wait_on_atomic_t(&bg->nocow_writers, | ||
3876 | btrfs_wait_nocow_writers_atomic_t, | ||
3877 | TASK_UNINTERRUPTIBLE); | ||
3878 | } | ||
3879 | |||
3827 | static const char *alloc_name(u64 flags) | 3880 | static const char *alloc_name(u64 flags) |
3828 | { | 3881 | { |
3829 | switch (flags) { | 3882 | switch (flags) { |
@@ -4141,7 +4194,7 @@ commit_trans: | |||
4141 | 4194 | ||
4142 | if (need_commit > 0) { | 4195 | if (need_commit > 0) { |
4143 | btrfs_start_delalloc_roots(fs_info, 0, -1); | 4196 | btrfs_start_delalloc_roots(fs_info, 0, -1); |
4144 | btrfs_wait_ordered_roots(fs_info, -1); | 4197 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); |
4145 | } | 4198 | } |
4146 | 4199 | ||
4147 | trans = btrfs_join_transaction(root); | 4200 | trans = btrfs_join_transaction(root); |
@@ -4583,7 +4636,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
4583 | */ | 4636 | */ |
4584 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); | 4637 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); |
4585 | if (!current->journal_info) | 4638 | if (!current->journal_info) |
4586 | btrfs_wait_ordered_roots(root->fs_info, nr_items); | 4639 | btrfs_wait_ordered_roots(root->fs_info, nr_items, |
4640 | 0, (u64)-1); | ||
4587 | } | 4641 | } |
4588 | } | 4642 | } |
4589 | 4643 | ||
@@ -4620,7 +4674,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
4620 | 4674 | ||
4621 | /* Calc the number of the pages we need flush for space reservation */ | 4675 | /* Calc the number of the pages we need flush for space reservation */ |
4622 | items = calc_reclaim_items_nr(root, to_reclaim); | 4676 | items = calc_reclaim_items_nr(root, to_reclaim); |
4623 | to_reclaim = items * EXTENT_SIZE_PER_ITEM; | 4677 | to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM; |
4624 | 4678 | ||
4625 | trans = (struct btrfs_trans_handle *)current->journal_info; | 4679 | trans = (struct btrfs_trans_handle *)current->journal_info; |
4626 | block_rsv = &root->fs_info->delalloc_block_rsv; | 4680 | block_rsv = &root->fs_info->delalloc_block_rsv; |
@@ -4632,7 +4686,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
4632 | if (trans) | 4686 | if (trans) |
4633 | return; | 4687 | return; |
4634 | if (wait_ordered) | 4688 | if (wait_ordered) |
4635 | btrfs_wait_ordered_roots(root->fs_info, items); | 4689 | btrfs_wait_ordered_roots(root->fs_info, items, |
4690 | 0, (u64)-1); | ||
4636 | return; | 4691 | return; |
4637 | } | 4692 | } |
4638 | 4693 | ||
@@ -4671,7 +4726,8 @@ skip_async: | |||
4671 | 4726 | ||
4672 | loops++; | 4727 | loops++; |
4673 | if (wait_ordered && !trans) { | 4728 | if (wait_ordered && !trans) { |
4674 | btrfs_wait_ordered_roots(root->fs_info, items); | 4729 | btrfs_wait_ordered_roots(root->fs_info, items, |
4730 | 0, (u64)-1); | ||
4675 | } else { | 4731 | } else { |
4676 | time_left = schedule_timeout_killable(1); | 4732 | time_left = schedule_timeout_killable(1); |
4677 | if (time_left) | 4733 | if (time_left) |
@@ -6172,6 +6228,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, | |||
6172 | return 0; | 6228 | return 0; |
6173 | } | 6229 | } |
6174 | 6230 | ||
6231 | static void | ||
6232 | btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg) | ||
6233 | { | ||
6234 | atomic_inc(&bg->reservations); | ||
6235 | } | ||
6236 | |||
6237 | void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, | ||
6238 | const u64 start) | ||
6239 | { | ||
6240 | struct btrfs_block_group_cache *bg; | ||
6241 | |||
6242 | bg = btrfs_lookup_block_group(fs_info, start); | ||
6243 | ASSERT(bg); | ||
6244 | if (atomic_dec_and_test(&bg->reservations)) | ||
6245 | wake_up_atomic_t(&bg->reservations); | ||
6246 | btrfs_put_block_group(bg); | ||
6247 | } | ||
6248 | |||
6249 | static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a) | ||
6250 | { | ||
6251 | schedule(); | ||
6252 | return 0; | ||
6253 | } | ||
6254 | |||
6255 | void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) | ||
6256 | { | ||
6257 | struct btrfs_space_info *space_info = bg->space_info; | ||
6258 | |||
6259 | ASSERT(bg->ro); | ||
6260 | |||
6261 | if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA)) | ||
6262 | return; | ||
6263 | |||
6264 | /* | ||
6265 | * Our block group is read only but before we set it to read only, | ||
6266 | * some task might have had allocated an extent from it already, but it | ||
6267 | * has not yet created a respective ordered extent (and added it to a | ||
6268 | * root's list of ordered extents). | ||
6269 | * Therefore wait for any task currently allocating extents, since the | ||
6270 | * block group's reservations counter is incremented while a read lock | ||
6271 | * on the groups' semaphore is held and decremented after releasing | ||
6272 | * the read access on that semaphore and creating the ordered extent. | ||
6273 | */ | ||
6274 | down_write(&space_info->groups_sem); | ||
6275 | up_write(&space_info->groups_sem); | ||
6276 | |||
6277 | wait_on_atomic_t(&bg->reservations, | ||
6278 | btrfs_wait_bg_reservations_atomic_t, | ||
6279 | TASK_UNINTERRUPTIBLE); | ||
6280 | } | ||
6281 | |||
6175 | /** | 6282 | /** |
6176 | * btrfs_update_reserved_bytes - update the block_group and space info counters | 6283 | * btrfs_update_reserved_bytes - update the block_group and space info counters |
6177 | * @cache: The cache we are manipulating | 6284 | * @cache: The cache we are manipulating |
@@ -7025,36 +7132,35 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, | |||
7025 | int delalloc) | 7132 | int delalloc) |
7026 | { | 7133 | { |
7027 | struct btrfs_block_group_cache *used_bg = NULL; | 7134 | struct btrfs_block_group_cache *used_bg = NULL; |
7028 | bool locked = false; | 7135 | |
7029 | again: | ||
7030 | spin_lock(&cluster->refill_lock); | 7136 | spin_lock(&cluster->refill_lock); |
7031 | if (locked) { | 7137 | while (1) { |
7032 | if (used_bg == cluster->block_group) | 7138 | used_bg = cluster->block_group; |
7139 | if (!used_bg) | ||
7140 | return NULL; | ||
7141 | |||
7142 | if (used_bg == block_group) | ||
7033 | return used_bg; | 7143 | return used_bg; |
7034 | 7144 | ||
7035 | up_read(&used_bg->data_rwsem); | 7145 | btrfs_get_block_group(used_bg); |
7036 | btrfs_put_block_group(used_bg); | ||
7037 | } | ||
7038 | 7146 | ||
7039 | used_bg = cluster->block_group; | 7147 | if (!delalloc) |
7040 | if (!used_bg) | 7148 | return used_bg; |
7041 | return NULL; | ||
7042 | 7149 | ||
7043 | if (used_bg == block_group) | 7150 | if (down_read_trylock(&used_bg->data_rwsem)) |
7044 | return used_bg; | 7151 | return used_bg; |
7045 | 7152 | ||
7046 | btrfs_get_block_group(used_bg); | 7153 | spin_unlock(&cluster->refill_lock); |
7047 | 7154 | ||
7048 | if (!delalloc) | 7155 | down_read(&used_bg->data_rwsem); |
7049 | return used_bg; | ||
7050 | 7156 | ||
7051 | if (down_read_trylock(&used_bg->data_rwsem)) | 7157 | spin_lock(&cluster->refill_lock); |
7052 | return used_bg; | 7158 | if (used_bg == cluster->block_group) |
7159 | return used_bg; | ||
7053 | 7160 | ||
7054 | spin_unlock(&cluster->refill_lock); | 7161 | up_read(&used_bg->data_rwsem); |
7055 | down_read(&used_bg->data_rwsem); | 7162 | btrfs_put_block_group(used_bg); |
7056 | locked = true; | 7163 | } |
7057 | goto again; | ||
7058 | } | 7164 | } |
7059 | 7165 | ||
7060 | static inline void | 7166 | static inline void |
@@ -7431,6 +7537,7 @@ checks: | |||
7431 | btrfs_add_free_space(block_group, offset, num_bytes); | 7537 | btrfs_add_free_space(block_group, offset, num_bytes); |
7432 | goto loop; | 7538 | goto loop; |
7433 | } | 7539 | } |
7540 | btrfs_inc_block_group_reservations(block_group); | ||
7434 | 7541 | ||
7435 | /* we are all good, lets return */ | 7542 | /* we are all good, lets return */ |
7436 | ins->objectid = search_start; | 7543 | ins->objectid = search_start; |
@@ -7612,8 +7719,10 @@ again: | |||
7612 | WARN_ON(num_bytes < root->sectorsize); | 7719 | WARN_ON(num_bytes < root->sectorsize); |
7613 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, | 7720 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, |
7614 | flags, delalloc); | 7721 | flags, delalloc); |
7615 | 7722 | if (!ret && !is_data) { | |
7616 | if (ret == -ENOSPC) { | 7723 | btrfs_dec_block_group_reservations(root->fs_info, |
7724 | ins->objectid); | ||
7725 | } else if (ret == -ENOSPC) { | ||
7617 | if (!final_tried && ins->offset) { | 7726 | if (!final_tried && ins->offset) { |
7618 | num_bytes = min(num_bytes >> 1, ins->offset); | 7727 | num_bytes = min(num_bytes >> 1, ins->offset); |
7619 | num_bytes = round_down(num_bytes, root->sectorsize); | 7728 | num_bytes = round_down(num_bytes, root->sectorsize); |
@@ -9058,7 +9167,7 @@ out: | |||
9058 | if (!for_reloc && root_dropped == false) | 9167 | if (!for_reloc && root_dropped == false) |
9059 | btrfs_add_dead_root(root); | 9168 | btrfs_add_dead_root(root); |
9060 | if (err && err != -EAGAIN) | 9169 | if (err && err != -EAGAIN) |
9061 | btrfs_std_error(root->fs_info, err, NULL); | 9170 | btrfs_handle_fs_error(root->fs_info, err, NULL); |
9062 | return err; | 9171 | return err; |
9063 | } | 9172 | } |
9064 | 9173 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d247fc0eea19..2f83448d34fe 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3200,14 +3200,10 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
3200 | return ret; | 3200 | return ret; |
3201 | } | 3201 | } |
3202 | 3202 | ||
3203 | static noinline void update_nr_written(struct page *page, | 3203 | static void update_nr_written(struct page *page, struct writeback_control *wbc, |
3204 | struct writeback_control *wbc, | 3204 | unsigned long nr_written) |
3205 | unsigned long nr_written) | ||
3206 | { | 3205 | { |
3207 | wbc->nr_to_write -= nr_written; | 3206 | wbc->nr_to_write -= nr_written; |
3208 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
3209 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
3210 | page->mapping->writeback_index = page->index + nr_written; | ||
3211 | } | 3207 | } |
3212 | 3208 | ||
3213 | /* | 3209 | /* |
@@ -3368,6 +3364,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3368 | 3364 | ||
3369 | while (cur <= end) { | 3365 | while (cur <= end) { |
3370 | u64 em_end; | 3366 | u64 em_end; |
3367 | unsigned long max_nr; | ||
3368 | |||
3371 | if (cur >= i_size) { | 3369 | if (cur >= i_size) { |
3372 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3370 | if (tree->ops && tree->ops->writepage_end_io_hook) |
3373 | tree->ops->writepage_end_io_hook(page, cur, | 3371 | tree->ops->writepage_end_io_hook(page, cur, |
@@ -3423,32 +3421,23 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3423 | continue; | 3421 | continue; |
3424 | } | 3422 | } |
3425 | 3423 | ||
3426 | if (tree->ops && tree->ops->writepage_io_hook) { | 3424 | max_nr = (i_size >> PAGE_SHIFT) + 1; |
3427 | ret = tree->ops->writepage_io_hook(page, cur, | 3425 | |
3428 | cur + iosize - 1); | 3426 | set_range_writeback(tree, cur, cur + iosize - 1); |
3429 | } else { | 3427 | if (!PageWriteback(page)) { |
3430 | ret = 0; | 3428 | btrfs_err(BTRFS_I(inode)->root->fs_info, |
3429 | "page %lu not writeback, cur %llu end %llu", | ||
3430 | page->index, cur, end); | ||
3431 | } | 3431 | } |
3432 | if (ret) { | ||
3433 | SetPageError(page); | ||
3434 | } else { | ||
3435 | unsigned long max_nr = (i_size >> PAGE_SHIFT) + 1; | ||
3436 | 3432 | ||
3437 | set_range_writeback(tree, cur, cur + iosize - 1); | 3433 | ret = submit_extent_page(write_flags, tree, wbc, page, |
3438 | if (!PageWriteback(page)) { | 3434 | sector, iosize, pg_offset, |
3439 | btrfs_err(BTRFS_I(inode)->root->fs_info, | 3435 | bdev, &epd->bio, max_nr, |
3440 | "page %lu not writeback, cur %llu end %llu", | 3436 | end_bio_extent_writepage, |
3441 | page->index, cur, end); | 3437 | 0, 0, 0, false); |
3442 | } | 3438 | if (ret) |
3439 | SetPageError(page); | ||
3443 | 3440 | ||
3444 | ret = submit_extent_page(write_flags, tree, wbc, page, | ||
3445 | sector, iosize, pg_offset, | ||
3446 | bdev, &epd->bio, max_nr, | ||
3447 | end_bio_extent_writepage, | ||
3448 | 0, 0, 0, false); | ||
3449 | if (ret) | ||
3450 | SetPageError(page); | ||
3451 | } | ||
3452 | cur = cur + iosize; | 3441 | cur = cur + iosize; |
3453 | pg_offset += iosize; | 3442 | pg_offset += iosize; |
3454 | nr++; | 3443 | nr++; |
@@ -3920,12 +3909,13 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
3920 | struct inode *inode = mapping->host; | 3909 | struct inode *inode = mapping->host; |
3921 | int ret = 0; | 3910 | int ret = 0; |
3922 | int done = 0; | 3911 | int done = 0; |
3923 | int err = 0; | ||
3924 | int nr_to_write_done = 0; | 3912 | int nr_to_write_done = 0; |
3925 | struct pagevec pvec; | 3913 | struct pagevec pvec; |
3926 | int nr_pages; | 3914 | int nr_pages; |
3927 | pgoff_t index; | 3915 | pgoff_t index; |
3928 | pgoff_t end; /* Inclusive */ | 3916 | pgoff_t end; /* Inclusive */ |
3917 | pgoff_t done_index; | ||
3918 | int range_whole = 0; | ||
3929 | int scanned = 0; | 3919 | int scanned = 0; |
3930 | int tag; | 3920 | int tag; |
3931 | 3921 | ||
@@ -3948,6 +3938,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
3948 | } else { | 3938 | } else { |
3949 | index = wbc->range_start >> PAGE_SHIFT; | 3939 | index = wbc->range_start >> PAGE_SHIFT; |
3950 | end = wbc->range_end >> PAGE_SHIFT; | 3940 | end = wbc->range_end >> PAGE_SHIFT; |
3941 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
3942 | range_whole = 1; | ||
3951 | scanned = 1; | 3943 | scanned = 1; |
3952 | } | 3944 | } |
3953 | if (wbc->sync_mode == WB_SYNC_ALL) | 3945 | if (wbc->sync_mode == WB_SYNC_ALL) |
@@ -3957,6 +3949,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
3957 | retry: | 3949 | retry: |
3958 | if (wbc->sync_mode == WB_SYNC_ALL) | 3950 | if (wbc->sync_mode == WB_SYNC_ALL) |
3959 | tag_pages_for_writeback(mapping, index, end); | 3951 | tag_pages_for_writeback(mapping, index, end); |
3952 | done_index = index; | ||
3960 | while (!done && !nr_to_write_done && (index <= end) && | 3953 | while (!done && !nr_to_write_done && (index <= end) && |
3961 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 3954 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
3962 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 3955 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
@@ -3966,6 +3959,7 @@ retry: | |||
3966 | for (i = 0; i < nr_pages; i++) { | 3959 | for (i = 0; i < nr_pages; i++) { |
3967 | struct page *page = pvec.pages[i]; | 3960 | struct page *page = pvec.pages[i]; |
3968 | 3961 | ||
3962 | done_index = page->index; | ||
3969 | /* | 3963 | /* |
3970 | * At this point we hold neither mapping->tree_lock nor | 3964 | * At this point we hold neither mapping->tree_lock nor |
3971 | * lock on the page itself: the page may be truncated or | 3965 | * lock on the page itself: the page may be truncated or |
@@ -4007,8 +4001,20 @@ retry: | |||
4007 | unlock_page(page); | 4001 | unlock_page(page); |
4008 | ret = 0; | 4002 | ret = 0; |
4009 | } | 4003 | } |
4010 | if (!err && ret < 0) | 4004 | if (ret < 0) { |
4011 | err = ret; | 4005 | /* |
4006 | * done_index is set past this page, | ||
4007 | * so media errors will not choke | ||
4008 | * background writeout for the entire | ||
4009 | * file. This has consequences for | ||
4010 | * range_cyclic semantics (ie. it may | ||
4011 | * not be suitable for data integrity | ||
4012 | * writeout). | ||
4013 | */ | ||
4014 | done_index = page->index + 1; | ||
4015 | done = 1; | ||
4016 | break; | ||
4017 | } | ||
4012 | 4018 | ||
4013 | /* | 4019 | /* |
4014 | * the filesystem may choose to bump up nr_to_write. | 4020 | * the filesystem may choose to bump up nr_to_write. |
@@ -4020,7 +4026,7 @@ retry: | |||
4020 | pagevec_release(&pvec); | 4026 | pagevec_release(&pvec); |
4021 | cond_resched(); | 4027 | cond_resched(); |
4022 | } | 4028 | } |
4023 | if (!scanned && !done && !err) { | 4029 | if (!scanned && !done) { |
4024 | /* | 4030 | /* |
4025 | * We hit the last page and there is more work to be done: wrap | 4031 | * We hit the last page and there is more work to be done: wrap |
4026 | * back to the start of the file | 4032 | * back to the start of the file |
@@ -4029,8 +4035,12 @@ retry: | |||
4029 | index = 0; | 4035 | index = 0; |
4030 | goto retry; | 4036 | goto retry; |
4031 | } | 4037 | } |
4038 | |||
4039 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) | ||
4040 | mapping->writeback_index = done_index; | ||
4041 | |||
4032 | btrfs_add_delayed_iput(inode); | 4042 | btrfs_add_delayed_iput(inode); |
4033 | return err; | 4043 | return ret; |
4034 | } | 4044 | } |
4035 | 4045 | ||
4036 | static void flush_epd_write_bio(struct extent_page_data *epd) | 4046 | static void flush_epd_write_bio(struct extent_page_data *epd) |
@@ -4822,7 +4832,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4822 | return NULL; | 4832 | return NULL; |
4823 | eb->fs_info = fs_info; | 4833 | eb->fs_info = fs_info; |
4824 | again: | 4834 | again: |
4825 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 4835 | ret = radix_tree_preload(GFP_NOFS); |
4826 | if (ret) | 4836 | if (ret) |
4827 | goto free_eb; | 4837 | goto free_eb; |
4828 | spin_lock(&fs_info->buffer_lock); | 4838 | spin_lock(&fs_info->buffer_lock); |
@@ -4923,7 +4933,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, | |||
4923 | if (uptodate) | 4933 | if (uptodate) |
4924 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 4934 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
4925 | again: | 4935 | again: |
4926 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 4936 | ret = radix_tree_preload(GFP_NOFS); |
4927 | if (ret) | 4937 | if (ret) |
4928 | goto free_eb; | 4938 | goto free_eb; |
4929 | 4939 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b5e0ade90e88..981f402bf754 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -71,7 +71,6 @@ struct extent_io_ops { | |||
71 | u64 start, u64 end, int *page_started, | 71 | u64 start, u64 end, int *page_started, |
72 | unsigned long *nr_written); | 72 | unsigned long *nr_written); |
73 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); | 73 | int (*writepage_start_hook)(struct page *page, u64 start, u64 end); |
74 | int (*writepage_io_hook)(struct page *page, u64 start, u64 end); | ||
75 | extent_submit_bio_hook_t *submit_bio_hook; | 74 | extent_submit_bio_hook_t *submit_bio_hook; |
76 | int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset, | 75 | int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset, |
77 | size_t size, struct bio *bio, | 76 | size_t size, struct bio *bio, |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ea9f10bb089c..c98805c35bab 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1696,7 +1696,9 @@ again: | |||
1696 | btrfs_end_write_no_snapshoting(root); | 1696 | btrfs_end_write_no_snapshoting(root); |
1697 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1697 | btrfs_delalloc_release_metadata(inode, release_bytes); |
1698 | } else { | 1698 | } else { |
1699 | btrfs_delalloc_release_space(inode, pos, release_bytes); | 1699 | btrfs_delalloc_release_space(inode, |
1700 | round_down(pos, root->sectorsize), | ||
1701 | release_bytes); | ||
1700 | } | 1702 | } |
1701 | } | 1703 | } |
1702 | 1704 | ||
@@ -2952,7 +2954,7 @@ const struct file_operations btrfs_file_operations = { | |||
2952 | .fallocate = btrfs_fallocate, | 2954 | .fallocate = btrfs_fallocate, |
2953 | .unlocked_ioctl = btrfs_ioctl, | 2955 | .unlocked_ioctl = btrfs_ioctl, |
2954 | #ifdef CONFIG_COMPAT | 2956 | #ifdef CONFIG_COMPAT |
2955 | .compat_ioctl = btrfs_ioctl, | 2957 | .compat_ioctl = btrfs_compat_ioctl, |
2956 | #endif | 2958 | #endif |
2957 | .copy_file_range = btrfs_copy_file_range, | 2959 | .copy_file_range = btrfs_copy_file_range, |
2958 | .clone_file_range = btrfs_clone_file_range, | 2960 | .clone_file_range = btrfs_clone_file_range, |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index be4d22a5022f..b8acc07ac6c2 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -157,7 +157,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | |||
157 | */ | 157 | */ |
158 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, | 158 | if (!btrfs_find_name_in_ext_backref(path, ref_objectid, |
159 | name, name_len, &extref)) { | 159 | name, name_len, &extref)) { |
160 | btrfs_std_error(root->fs_info, -ENOENT, NULL); | 160 | btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL); |
161 | ret = -EROFS; | 161 | ret = -EROFS; |
162 | goto out; | 162 | goto out; |
163 | } | 163 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6b7fe291a174..91419ef79b00 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -824,6 +824,7 @@ retry: | |||
824 | async_extent->ram_size - 1, 0); | 824 | async_extent->ram_size - 1, 0); |
825 | goto out_free_reserve; | 825 | goto out_free_reserve; |
826 | } | 826 | } |
827 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
827 | 828 | ||
828 | /* | 829 | /* |
829 | * clear dirty, set writeback and unlock the pages. | 830 | * clear dirty, set writeback and unlock the pages. |
@@ -861,6 +862,7 @@ retry: | |||
861 | } | 862 | } |
862 | return; | 863 | return; |
863 | out_free_reserve: | 864 | out_free_reserve: |
865 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
864 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 866 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
865 | out_free: | 867 | out_free: |
866 | extent_clear_unlock_delalloc(inode, async_extent->start, | 868 | extent_clear_unlock_delalloc(inode, async_extent->start, |
@@ -1038,6 +1040,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
1038 | goto out_drop_extent_cache; | 1040 | goto out_drop_extent_cache; |
1039 | } | 1041 | } |
1040 | 1042 | ||
1043 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
1044 | |||
1041 | if (disk_num_bytes < cur_alloc_size) | 1045 | if (disk_num_bytes < cur_alloc_size) |
1042 | break; | 1046 | break; |
1043 | 1047 | ||
@@ -1066,6 +1070,7 @@ out: | |||
1066 | out_drop_extent_cache: | 1070 | out_drop_extent_cache: |
1067 | btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); | 1071 | btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); |
1068 | out_reserve: | 1072 | out_reserve: |
1073 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
1069 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 1074 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
1070 | out_unlock: | 1075 | out_unlock: |
1071 | extent_clear_unlock_delalloc(inode, start, end, locked_page, | 1076 | extent_clear_unlock_delalloc(inode, start, end, locked_page, |
@@ -1377,6 +1382,9 @@ next_slot: | |||
1377 | */ | 1382 | */ |
1378 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | 1383 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) |
1379 | goto out_check; | 1384 | goto out_check; |
1385 | if (!btrfs_inc_nocow_writers(root->fs_info, | ||
1386 | disk_bytenr)) | ||
1387 | goto out_check; | ||
1380 | nocow = 1; | 1388 | nocow = 1; |
1381 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 1389 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
1382 | extent_end = found_key.offset + | 1390 | extent_end = found_key.offset + |
@@ -1391,6 +1399,9 @@ out_check: | |||
1391 | path->slots[0]++; | 1399 | path->slots[0]++; |
1392 | if (!nolock && nocow) | 1400 | if (!nolock && nocow) |
1393 | btrfs_end_write_no_snapshoting(root); | 1401 | btrfs_end_write_no_snapshoting(root); |
1402 | if (nocow) | ||
1403 | btrfs_dec_nocow_writers(root->fs_info, | ||
1404 | disk_bytenr); | ||
1394 | goto next_slot; | 1405 | goto next_slot; |
1395 | } | 1406 | } |
1396 | if (!nocow) { | 1407 | if (!nocow) { |
@@ -1411,6 +1422,9 @@ out_check: | |||
1411 | if (ret) { | 1422 | if (ret) { |
1412 | if (!nolock && nocow) | 1423 | if (!nolock && nocow) |
1413 | btrfs_end_write_no_snapshoting(root); | 1424 | btrfs_end_write_no_snapshoting(root); |
1425 | if (nocow) | ||
1426 | btrfs_dec_nocow_writers(root->fs_info, | ||
1427 | disk_bytenr); | ||
1414 | goto error; | 1428 | goto error; |
1415 | } | 1429 | } |
1416 | cow_start = (u64)-1; | 1430 | cow_start = (u64)-1; |
@@ -1453,6 +1467,8 @@ out_check: | |||
1453 | 1467 | ||
1454 | ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, | 1468 | ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, |
1455 | num_bytes, num_bytes, type); | 1469 | num_bytes, num_bytes, type); |
1470 | if (nocow) | ||
1471 | btrfs_dec_nocow_writers(root->fs_info, disk_bytenr); | ||
1456 | BUG_ON(ret); /* -ENOMEM */ | 1472 | BUG_ON(ret); /* -ENOMEM */ |
1457 | 1473 | ||
1458 | if (root->root_key.objectid == | 1474 | if (root->root_key.objectid == |
@@ -7129,6 +7145,43 @@ out: | |||
7129 | return em; | 7145 | return em; |
7130 | } | 7146 | } |
7131 | 7147 | ||
7148 | static struct extent_map *btrfs_create_dio_extent(struct inode *inode, | ||
7149 | const u64 start, | ||
7150 | const u64 len, | ||
7151 | const u64 orig_start, | ||
7152 | const u64 block_start, | ||
7153 | const u64 block_len, | ||
7154 | const u64 orig_block_len, | ||
7155 | const u64 ram_bytes, | ||
7156 | const int type) | ||
7157 | { | ||
7158 | struct extent_map *em = NULL; | ||
7159 | int ret; | ||
7160 | |||
7161 | down_read(&BTRFS_I(inode)->dio_sem); | ||
7162 | if (type != BTRFS_ORDERED_NOCOW) { | ||
7163 | em = create_pinned_em(inode, start, len, orig_start, | ||
7164 | block_start, block_len, orig_block_len, | ||
7165 | ram_bytes, type); | ||
7166 | if (IS_ERR(em)) | ||
7167 | goto out; | ||
7168 | } | ||
7169 | ret = btrfs_add_ordered_extent_dio(inode, start, block_start, | ||
7170 | len, block_len, type); | ||
7171 | if (ret) { | ||
7172 | if (em) { | ||
7173 | free_extent_map(em); | ||
7174 | btrfs_drop_extent_cache(inode, start, | ||
7175 | start + len - 1, 0); | ||
7176 | } | ||
7177 | em = ERR_PTR(ret); | ||
7178 | } | ||
7179 | out: | ||
7180 | up_read(&BTRFS_I(inode)->dio_sem); | ||
7181 | |||
7182 | return em; | ||
7183 | } | ||
7184 | |||
7132 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | 7185 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
7133 | u64 start, u64 len) | 7186 | u64 start, u64 len) |
7134 | { | 7187 | { |
@@ -7144,41 +7197,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
7144 | if (ret) | 7197 | if (ret) |
7145 | return ERR_PTR(ret); | 7198 | return ERR_PTR(ret); |
7146 | 7199 | ||
7147 | /* | 7200 | em = btrfs_create_dio_extent(inode, start, ins.offset, start, |
7148 | * Create the ordered extent before the extent map. This is to avoid | 7201 | ins.objectid, ins.offset, ins.offset, |
7149 | * races with the fast fsync path that would lead to it logging file | 7202 | ins.offset, 0); |
7150 | * extent items that point to disk extents that were not yet written to. | 7203 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); |
7151 | * The fast fsync path collects ordered extents into a local list and | 7204 | if (IS_ERR(em)) |
7152 | * then collects all the new extent maps, so we must create the ordered | ||
7153 | * extent first and make sure the fast fsync path collects any new | ||
7154 | * ordered extents after collecting new extent maps as well. | ||
7155 | * The fsync path simply can not rely on inode_dio_wait() because it | ||
7156 | * causes deadlock with AIO. | ||
7157 | */ | ||
7158 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
7159 | ins.offset, ins.offset, 0); | ||
7160 | if (ret) { | ||
7161 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 7205 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
7162 | return ERR_PTR(ret); | ||
7163 | } | ||
7164 | |||
7165 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, | ||
7166 | ins.offset, ins.offset, ins.offset, 0); | ||
7167 | if (IS_ERR(em)) { | ||
7168 | struct btrfs_ordered_extent *oe; | ||
7169 | 7206 | ||
7170 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | ||
7171 | oe = btrfs_lookup_ordered_extent(inode, start); | ||
7172 | ASSERT(oe); | ||
7173 | if (WARN_ON(!oe)) | ||
7174 | return em; | ||
7175 | set_bit(BTRFS_ORDERED_IOERR, &oe->flags); | ||
7176 | set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags); | ||
7177 | btrfs_remove_ordered_extent(inode, oe); | ||
7178 | /* Once for our lookup and once for the ordered extents tree. */ | ||
7179 | btrfs_put_ordered_extent(oe); | ||
7180 | btrfs_put_ordered_extent(oe); | ||
7181 | } | ||
7182 | return em; | 7207 | return em; |
7183 | } | 7208 | } |
7184 | 7209 | ||
@@ -7650,24 +7675,21 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7650 | block_start = em->block_start + (start - em->start); | 7675 | block_start = em->block_start + (start - em->start); |
7651 | 7676 | ||
7652 | if (can_nocow_extent(inode, start, &len, &orig_start, | 7677 | if (can_nocow_extent(inode, start, &len, &orig_start, |
7653 | &orig_block_len, &ram_bytes) == 1) { | 7678 | &orig_block_len, &ram_bytes) == 1 && |
7679 | btrfs_inc_nocow_writers(root->fs_info, block_start)) { | ||
7680 | struct extent_map *em2; | ||
7681 | |||
7682 | em2 = btrfs_create_dio_extent(inode, start, len, | ||
7683 | orig_start, block_start, | ||
7684 | len, orig_block_len, | ||
7685 | ram_bytes, type); | ||
7686 | btrfs_dec_nocow_writers(root->fs_info, block_start); | ||
7654 | if (type == BTRFS_ORDERED_PREALLOC) { | 7687 | if (type == BTRFS_ORDERED_PREALLOC) { |
7655 | free_extent_map(em); | 7688 | free_extent_map(em); |
7656 | em = create_pinned_em(inode, start, len, | 7689 | em = em2; |
7657 | orig_start, | ||
7658 | block_start, len, | ||
7659 | orig_block_len, | ||
7660 | ram_bytes, type); | ||
7661 | if (IS_ERR(em)) { | ||
7662 | ret = PTR_ERR(em); | ||
7663 | goto unlock_err; | ||
7664 | } | ||
7665 | } | 7690 | } |
7666 | 7691 | if (em2 && IS_ERR(em2)) { | |
7667 | ret = btrfs_add_ordered_extent_dio(inode, start, | 7692 | ret = PTR_ERR(em2); |
7668 | block_start, len, len, type); | ||
7669 | if (ret) { | ||
7670 | free_extent_map(em); | ||
7671 | goto unlock_err; | 7693 | goto unlock_err; |
7672 | } | 7694 | } |
7673 | goto unlock; | 7695 | goto unlock; |
@@ -9230,6 +9252,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
9230 | INIT_LIST_HEAD(&ei->delalloc_inodes); | 9252 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
9231 | INIT_LIST_HEAD(&ei->delayed_iput); | 9253 | INIT_LIST_HEAD(&ei->delayed_iput); |
9232 | RB_CLEAR_NODE(&ei->rb_node); | 9254 | RB_CLEAR_NODE(&ei->rb_node); |
9255 | init_rwsem(&ei->dio_sem); | ||
9233 | 9256 | ||
9234 | return inode; | 9257 | return inode; |
9235 | } | 9258 | } |
@@ -9387,10 +9410,281 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
9387 | return 0; | 9410 | return 0; |
9388 | } | 9411 | } |
9389 | 9412 | ||
9413 | static int btrfs_rename_exchange(struct inode *old_dir, | ||
9414 | struct dentry *old_dentry, | ||
9415 | struct inode *new_dir, | ||
9416 | struct dentry *new_dentry) | ||
9417 | { | ||
9418 | struct btrfs_trans_handle *trans; | ||
9419 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | ||
9420 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | ||
9421 | struct inode *new_inode = new_dentry->d_inode; | ||
9422 | struct inode *old_inode = old_dentry->d_inode; | ||
9423 | struct timespec ctime = CURRENT_TIME; | ||
9424 | struct dentry *parent; | ||
9425 | u64 old_ino = btrfs_ino(old_inode); | ||
9426 | u64 new_ino = btrfs_ino(new_inode); | ||
9427 | u64 old_idx = 0; | ||
9428 | u64 new_idx = 0; | ||
9429 | u64 root_objectid; | ||
9430 | int ret; | ||
9431 | bool root_log_pinned = false; | ||
9432 | bool dest_log_pinned = false; | ||
9433 | |||
9434 | /* we only allow rename subvolume link between subvolumes */ | ||
9435 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) | ||
9436 | return -EXDEV; | ||
9437 | |||
9438 | /* close the race window with snapshot create/destroy ioctl */ | ||
9439 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9440 | down_read(&root->fs_info->subvol_sem); | ||
9441 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9442 | down_read(&dest->fs_info->subvol_sem); | ||
9443 | |||
9444 | /* | ||
9445 | * We want to reserve the absolute worst case amount of items. So if | ||
9446 | * both inodes are subvols and we need to unlink them then that would | ||
9447 | * require 4 item modifications, but if they are both normal inodes it | ||
9448 | * would require 5 item modifications, so we'll assume their normal | ||
9449 | * inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items | ||
9450 | * should cover the worst case number of items we'll modify. | ||
9451 | */ | ||
9452 | trans = btrfs_start_transaction(root, 12); | ||
9453 | if (IS_ERR(trans)) { | ||
9454 | ret = PTR_ERR(trans); | ||
9455 | goto out_notrans; | ||
9456 | } | ||
9457 | |||
9458 | /* | ||
9459 | * We need to find a free sequence number both in the source and | ||
9460 | * in the destination directory for the exchange. | ||
9461 | */ | ||
9462 | ret = btrfs_set_inode_index(new_dir, &old_idx); | ||
9463 | if (ret) | ||
9464 | goto out_fail; | ||
9465 | ret = btrfs_set_inode_index(old_dir, &new_idx); | ||
9466 | if (ret) | ||
9467 | goto out_fail; | ||
9468 | |||
9469 | BTRFS_I(old_inode)->dir_index = 0ULL; | ||
9470 | BTRFS_I(new_inode)->dir_index = 0ULL; | ||
9471 | |||
9472 | /* Reference for the source. */ | ||
9473 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9474 | /* force full log commit if subvolume involved. */ | ||
9475 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
9476 | } else { | ||
9477 | btrfs_pin_log_trans(root); | ||
9478 | root_log_pinned = true; | ||
9479 | ret = btrfs_insert_inode_ref(trans, dest, | ||
9480 | new_dentry->d_name.name, | ||
9481 | new_dentry->d_name.len, | ||
9482 | old_ino, | ||
9483 | btrfs_ino(new_dir), old_idx); | ||
9484 | if (ret) | ||
9485 | goto out_fail; | ||
9486 | } | ||
9487 | |||
9488 | /* And now for the dest. */ | ||
9489 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9490 | /* force full log commit if subvolume involved. */ | ||
9491 | btrfs_set_log_full_commit(dest->fs_info, trans); | ||
9492 | } else { | ||
9493 | btrfs_pin_log_trans(dest); | ||
9494 | dest_log_pinned = true; | ||
9495 | ret = btrfs_insert_inode_ref(trans, root, | ||
9496 | old_dentry->d_name.name, | ||
9497 | old_dentry->d_name.len, | ||
9498 | new_ino, | ||
9499 | btrfs_ino(old_dir), new_idx); | ||
9500 | if (ret) | ||
9501 | goto out_fail; | ||
9502 | } | ||
9503 | |||
9504 | /* Update inode version and ctime/mtime. */ | ||
9505 | inode_inc_iversion(old_dir); | ||
9506 | inode_inc_iversion(new_dir); | ||
9507 | inode_inc_iversion(old_inode); | ||
9508 | inode_inc_iversion(new_inode); | ||
9509 | old_dir->i_ctime = old_dir->i_mtime = ctime; | ||
9510 | new_dir->i_ctime = new_dir->i_mtime = ctime; | ||
9511 | old_inode->i_ctime = ctime; | ||
9512 | new_inode->i_ctime = ctime; | ||
9513 | |||
9514 | if (old_dentry->d_parent != new_dentry->d_parent) { | ||
9515 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | ||
9516 | btrfs_record_unlink_dir(trans, new_dir, new_inode, 1); | ||
9517 | } | ||
9518 | |||
9519 | /* src is a subvolume */ | ||
9520 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9521 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; | ||
9522 | ret = btrfs_unlink_subvol(trans, root, old_dir, | ||
9523 | root_objectid, | ||
9524 | old_dentry->d_name.name, | ||
9525 | old_dentry->d_name.len); | ||
9526 | } else { /* src is an inode */ | ||
9527 | ret = __btrfs_unlink_inode(trans, root, old_dir, | ||
9528 | old_dentry->d_inode, | ||
9529 | old_dentry->d_name.name, | ||
9530 | old_dentry->d_name.len); | ||
9531 | if (!ret) | ||
9532 | ret = btrfs_update_inode(trans, root, old_inode); | ||
9533 | } | ||
9534 | if (ret) { | ||
9535 | btrfs_abort_transaction(trans, root, ret); | ||
9536 | goto out_fail; | ||
9537 | } | ||
9538 | |||
9539 | /* dest is a subvolume */ | ||
9540 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
9541 | root_objectid = BTRFS_I(new_inode)->root->root_key.objectid; | ||
9542 | ret = btrfs_unlink_subvol(trans, dest, new_dir, | ||
9543 | root_objectid, | ||
9544 | new_dentry->d_name.name, | ||
9545 | new_dentry->d_name.len); | ||
9546 | } else { /* dest is an inode */ | ||
9547 | ret = __btrfs_unlink_inode(trans, dest, new_dir, | ||
9548 | new_dentry->d_inode, | ||
9549 | new_dentry->d_name.name, | ||
9550 | new_dentry->d_name.len); | ||
9551 | if (!ret) | ||
9552 | ret = btrfs_update_inode(trans, dest, new_inode); | ||
9553 | } | ||
9554 | if (ret) { | ||
9555 | btrfs_abort_transaction(trans, root, ret); | ||
9556 | goto out_fail; | ||
9557 | } | ||
9558 | |||
9559 | ret = btrfs_add_link(trans, new_dir, old_inode, | ||
9560 | new_dentry->d_name.name, | ||
9561 | new_dentry->d_name.len, 0, old_idx); | ||
9562 | if (ret) { | ||
9563 | btrfs_abort_transaction(trans, root, ret); | ||
9564 | goto out_fail; | ||
9565 | } | ||
9566 | |||
9567 | ret = btrfs_add_link(trans, old_dir, new_inode, | ||
9568 | old_dentry->d_name.name, | ||
9569 | old_dentry->d_name.len, 0, new_idx); | ||
9570 | if (ret) { | ||
9571 | btrfs_abort_transaction(trans, root, ret); | ||
9572 | goto out_fail; | ||
9573 | } | ||
9574 | |||
9575 | if (old_inode->i_nlink == 1) | ||
9576 | BTRFS_I(old_inode)->dir_index = old_idx; | ||
9577 | if (new_inode->i_nlink == 1) | ||
9578 | BTRFS_I(new_inode)->dir_index = new_idx; | ||
9579 | |||
9580 | if (root_log_pinned) { | ||
9581 | parent = new_dentry->d_parent; | ||
9582 | btrfs_log_new_name(trans, old_inode, old_dir, parent); | ||
9583 | btrfs_end_log_trans(root); | ||
9584 | root_log_pinned = false; | ||
9585 | } | ||
9586 | if (dest_log_pinned) { | ||
9587 | parent = old_dentry->d_parent; | ||
9588 | btrfs_log_new_name(trans, new_inode, new_dir, parent); | ||
9589 | btrfs_end_log_trans(dest); | ||
9590 | dest_log_pinned = false; | ||
9591 | } | ||
9592 | out_fail: | ||
9593 | /* | ||
9594 | * If we have pinned a log and an error happened, we unpin tasks | ||
9595 | * trying to sync the log and force them to fallback to a transaction | ||
9596 | * commit if the log currently contains any of the inodes involved in | ||
9597 | * this rename operation (to ensure we do not persist a log with an | ||
9598 | * inconsistent state for any of these inodes or leading to any | ||
9599 | * inconsistencies when replayed). If the transaction was aborted, the | ||
9600 | * abortion reason is propagated to userspace when attempting to commit | ||
9601 | * the transaction. If the log does not contain any of these inodes, we | ||
9602 | * allow the tasks to sync it. | ||
9603 | */ | ||
9604 | if (ret && (root_log_pinned || dest_log_pinned)) { | ||
9605 | if (btrfs_inode_in_log(old_dir, root->fs_info->generation) || | ||
9606 | btrfs_inode_in_log(new_dir, root->fs_info->generation) || | ||
9607 | btrfs_inode_in_log(old_inode, root->fs_info->generation) || | ||
9608 | (new_inode && | ||
9609 | btrfs_inode_in_log(new_inode, root->fs_info->generation))) | ||
9610 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
9611 | |||
9612 | if (root_log_pinned) { | ||
9613 | btrfs_end_log_trans(root); | ||
9614 | root_log_pinned = false; | ||
9615 | } | ||
9616 | if (dest_log_pinned) { | ||
9617 | btrfs_end_log_trans(dest); | ||
9618 | dest_log_pinned = false; | ||
9619 | } | ||
9620 | } | ||
9621 | ret = btrfs_end_transaction(trans, root); | ||
9622 | out_notrans: | ||
9623 | if (new_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9624 | up_read(&dest->fs_info->subvol_sem); | ||
9625 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
9626 | up_read(&root->fs_info->subvol_sem); | ||
9627 | |||
9628 | return ret; | ||
9629 | } | ||
9630 | |||
9631 | static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans, | ||
9632 | struct btrfs_root *root, | ||
9633 | struct inode *dir, | ||
9634 | struct dentry *dentry) | ||
9635 | { | ||
9636 | int ret; | ||
9637 | struct inode *inode; | ||
9638 | u64 objectid; | ||
9639 | u64 index; | ||
9640 | |||
9641 | ret = btrfs_find_free_ino(root, &objectid); | ||
9642 | if (ret) | ||
9643 | return ret; | ||
9644 | |||
9645 | inode = btrfs_new_inode(trans, root, dir, | ||
9646 | dentry->d_name.name, | ||
9647 | dentry->d_name.len, | ||
9648 | btrfs_ino(dir), | ||
9649 | objectid, | ||
9650 | S_IFCHR | WHITEOUT_MODE, | ||
9651 | &index); | ||
9652 | |||
9653 | if (IS_ERR(inode)) { | ||
9654 | ret = PTR_ERR(inode); | ||
9655 | return ret; | ||
9656 | } | ||
9657 | |||
9658 | inode->i_op = &btrfs_special_inode_operations; | ||
9659 | init_special_inode(inode, inode->i_mode, | ||
9660 | WHITEOUT_DEV); | ||
9661 | |||
9662 | ret = btrfs_init_inode_security(trans, inode, dir, | ||
9663 | &dentry->d_name); | ||
9664 | if (ret) | ||
9665 | goto out; | ||
9666 | |||
9667 | ret = btrfs_add_nondir(trans, dir, dentry, | ||
9668 | inode, 0, index); | ||
9669 | if (ret) | ||
9670 | goto out; | ||
9671 | |||
9672 | ret = btrfs_update_inode(trans, root, inode); | ||
9673 | out: | ||
9674 | unlock_new_inode(inode); | ||
9675 | if (ret) | ||
9676 | inode_dec_link_count(inode); | ||
9677 | iput(inode); | ||
9678 | |||
9679 | return ret; | ||
9680 | } | ||
9681 | |||
9390 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 9682 | static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
9391 | struct inode *new_dir, struct dentry *new_dentry) | 9683 | struct inode *new_dir, struct dentry *new_dentry, |
9684 | unsigned int flags) | ||
9392 | { | 9685 | { |
9393 | struct btrfs_trans_handle *trans; | 9686 | struct btrfs_trans_handle *trans; |
9687 | unsigned int trans_num_items; | ||
9394 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | 9688 | struct btrfs_root *root = BTRFS_I(old_dir)->root; |
9395 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | 9689 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; |
9396 | struct inode *new_inode = d_inode(new_dentry); | 9690 | struct inode *new_inode = d_inode(new_dentry); |
@@ -9399,6 +9693,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9399 | u64 root_objectid; | 9693 | u64 root_objectid; |
9400 | int ret; | 9694 | int ret; |
9401 | u64 old_ino = btrfs_ino(old_inode); | 9695 | u64 old_ino = btrfs_ino(old_inode); |
9696 | bool log_pinned = false; | ||
9402 | 9697 | ||
9403 | if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) | 9698 | if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
9404 | return -EPERM; | 9699 | return -EPERM; |
@@ -9449,15 +9744,21 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9449 | * We want to reserve the absolute worst case amount of items. So if | 9744 | * We want to reserve the absolute worst case amount of items. So if |
9450 | * both inodes are subvols and we need to unlink them then that would | 9745 | * both inodes are subvols and we need to unlink them then that would |
9451 | * require 4 item modifications, but if they are both normal inodes it | 9746 | * require 4 item modifications, but if they are both normal inodes it |
9452 | * would require 5 item modifications, so we'll assume their normal | 9747 | * would require 5 item modifications, so we'll assume they are normal |
9453 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | 9748 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items |
9454 | * should cover the worst case number of items we'll modify. | 9749 | * should cover the worst case number of items we'll modify. |
9750 | * If our rename has the whiteout flag, we need more 5 units for the | ||
9751 | * new inode (1 inode item, 1 inode ref, 2 dir items and 1 xattr item | ||
9752 | * when selinux is enabled). | ||
9455 | */ | 9753 | */ |
9456 | trans = btrfs_start_transaction(root, 11); | 9754 | trans_num_items = 11; |
9755 | if (flags & RENAME_WHITEOUT) | ||
9756 | trans_num_items += 5; | ||
9757 | trans = btrfs_start_transaction(root, trans_num_items); | ||
9457 | if (IS_ERR(trans)) { | 9758 | if (IS_ERR(trans)) { |
9458 | ret = PTR_ERR(trans); | 9759 | ret = PTR_ERR(trans); |
9459 | goto out_notrans; | 9760 | goto out_notrans; |
9460 | } | 9761 | } |
9461 | 9762 | ||
9462 | if (dest != root) | 9763 | if (dest != root) |
9463 | btrfs_record_root_in_trans(trans, dest); | 9764 | btrfs_record_root_in_trans(trans, dest); |
@@ -9471,6 +9772,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9471 | /* force full log commit if subvolume involved. */ | 9772 | /* force full log commit if subvolume involved. */ |
9472 | btrfs_set_log_full_commit(root->fs_info, trans); | 9773 | btrfs_set_log_full_commit(root->fs_info, trans); |
9473 | } else { | 9774 | } else { |
9775 | btrfs_pin_log_trans(root); | ||
9776 | log_pinned = true; | ||
9474 | ret = btrfs_insert_inode_ref(trans, dest, | 9777 | ret = btrfs_insert_inode_ref(trans, dest, |
9475 | new_dentry->d_name.name, | 9778 | new_dentry->d_name.name, |
9476 | new_dentry->d_name.len, | 9779 | new_dentry->d_name.len, |
@@ -9478,14 +9781,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9478 | btrfs_ino(new_dir), index); | 9781 | btrfs_ino(new_dir), index); |
9479 | if (ret) | 9782 | if (ret) |
9480 | goto out_fail; | 9783 | goto out_fail; |
9481 | /* | ||
9482 | * this is an ugly little race, but the rename is required | ||
9483 | * to make sure that if we crash, the inode is either at the | ||
9484 | * old name or the new one. pinning the log transaction lets | ||
9485 | * us make sure we don't allow a log commit to come in after | ||
9486 | * we unlink the name but before we add the new name back in. | ||
9487 | */ | ||
9488 | btrfs_pin_log_trans(root); | ||
9489 | } | 9784 | } |
9490 | 9785 | ||
9491 | inode_inc_iversion(old_dir); | 9786 | inode_inc_iversion(old_dir); |
@@ -9552,12 +9847,46 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
9552 | if (old_inode->i_nlink == 1) | 9847 | if (old_inode->i_nlink == 1) |
9553 | BTRFS_I(old_inode)->dir_index = index; | 9848 | BTRFS_I(old_inode)->dir_index = index; |
9554 | 9849 | ||
9555 | if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { | 9850 | if (log_pinned) { |
9556 | struct dentry *parent = new_dentry->d_parent; | 9851 | struct dentry *parent = new_dentry->d_parent; |
9852 | |||
9557 | btrfs_log_new_name(trans, old_inode, old_dir, parent); | 9853 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
9558 | btrfs_end_log_trans(root); | 9854 | btrfs_end_log_trans(root); |
9855 | log_pinned = false; | ||
9856 | } | ||
9857 | |||
9858 | if (flags & RENAME_WHITEOUT) { | ||
9859 | ret = btrfs_whiteout_for_rename(trans, root, old_dir, | ||
9860 | old_dentry); | ||
9861 | |||
9862 | if (ret) { | ||
9863 | btrfs_abort_transaction(trans, root, ret); | ||
9864 | goto out_fail; | ||
9865 | } | ||
9559 | } | 9866 | } |
9560 | out_fail: | 9867 | out_fail: |
9868 | /* | ||
9869 | * If we have pinned the log and an error happened, we unpin tasks | ||
9870 | * trying to sync the log and force them to fallback to a transaction | ||
9871 | * commit if the log currently contains any of the inodes involved in | ||
9872 | * this rename operation (to ensure we do not persist a log with an | ||
9873 | * inconsistent state for any of these inodes or leading to any | ||
9874 | * inconsistencies when replayed). If the transaction was aborted, the | ||
9875 | * abortion reason is propagated to userspace when attempting to commit | ||
9876 | * the transaction. If the log does not contain any of these inodes, we | ||
9877 | * allow the tasks to sync it. | ||
9878 | */ | ||
9879 | if (ret && log_pinned) { | ||
9880 | if (btrfs_inode_in_log(old_dir, root->fs_info->generation) || | ||
9881 | btrfs_inode_in_log(new_dir, root->fs_info->generation) || | ||
9882 | btrfs_inode_in_log(old_inode, root->fs_info->generation) || | ||
9883 | (new_inode && | ||
9884 | btrfs_inode_in_log(new_inode, root->fs_info->generation))) | ||
9885 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
9886 | |||
9887 | btrfs_end_log_trans(root); | ||
9888 | log_pinned = false; | ||
9889 | } | ||
9561 | btrfs_end_transaction(trans, root); | 9890 | btrfs_end_transaction(trans, root); |
9562 | out_notrans: | 9891 | out_notrans: |
9563 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) | 9892 | if (old_ino == BTRFS_FIRST_FREE_OBJECTID) |
@@ -9570,10 +9899,14 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry, | |||
9570 | struct inode *new_dir, struct dentry *new_dentry, | 9899 | struct inode *new_dir, struct dentry *new_dentry, |
9571 | unsigned int flags) | 9900 | unsigned int flags) |
9572 | { | 9901 | { |
9573 | if (flags & ~RENAME_NOREPLACE) | 9902 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) |
9574 | return -EINVAL; | 9903 | return -EINVAL; |
9575 | 9904 | ||
9576 | return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry); | 9905 | if (flags & RENAME_EXCHANGE) |
9906 | return btrfs_rename_exchange(old_dir, old_dentry, new_dir, | ||
9907 | new_dentry); | ||
9908 | |||
9909 | return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags); | ||
9577 | } | 9910 | } |
9578 | 9911 | ||
9579 | static void btrfs_run_delalloc_work(struct btrfs_work *work) | 9912 | static void btrfs_run_delalloc_work(struct btrfs_work *work) |
@@ -9942,6 +10275,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
9942 | btrfs_end_transaction(trans, root); | 10275 | btrfs_end_transaction(trans, root); |
9943 | break; | 10276 | break; |
9944 | } | 10277 | } |
10278 | btrfs_dec_block_group_reservations(root->fs_info, ins.objectid); | ||
9945 | 10279 | ||
9946 | last_alloc = ins.offset; | 10280 | last_alloc = ins.offset; |
9947 | ret = insert_reserved_file_extent(trans, inode, | 10281 | ret = insert_reserved_file_extent(trans, inode, |
@@ -10184,7 +10518,7 @@ static const struct file_operations btrfs_dir_file_operations = { | |||
10184 | .iterate = btrfs_real_readdir, | 10518 | .iterate = btrfs_real_readdir, |
10185 | .unlocked_ioctl = btrfs_ioctl, | 10519 | .unlocked_ioctl = btrfs_ioctl, |
10186 | #ifdef CONFIG_COMPAT | 10520 | #ifdef CONFIG_COMPAT |
10187 | .compat_ioctl = btrfs_ioctl, | 10521 | .compat_ioctl = btrfs_compat_ioctl, |
10188 | #endif | 10522 | #endif |
10189 | .release = btrfs_release_file, | 10523 | .release = btrfs_release_file, |
10190 | .fsync = btrfs_sync_file, | 10524 | .fsync = btrfs_sync_file, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b8ba717175b..4e700694b741 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -125,10 +125,10 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags) | |||
125 | if (flags & BTRFS_INODE_NODATACOW) | 125 | if (flags & BTRFS_INODE_NODATACOW) |
126 | iflags |= FS_NOCOW_FL; | 126 | iflags |= FS_NOCOW_FL; |
127 | 127 | ||
128 | if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) | 128 | if (flags & BTRFS_INODE_NOCOMPRESS) |
129 | iflags |= FS_COMPR_FL; | ||
130 | else if (flags & BTRFS_INODE_NOCOMPRESS) | ||
131 | iflags |= FS_NOCOMP_FL; | 129 | iflags |= FS_NOCOMP_FL; |
130 | else if (flags & BTRFS_INODE_COMPRESS) | ||
131 | iflags |= FS_COMPR_FL; | ||
132 | 132 | ||
133 | return iflags; | 133 | return iflags; |
134 | } | 134 | } |
@@ -439,7 +439,7 @@ static noinline int create_subvol(struct inode *dir, | |||
439 | { | 439 | { |
440 | struct btrfs_trans_handle *trans; | 440 | struct btrfs_trans_handle *trans; |
441 | struct btrfs_key key; | 441 | struct btrfs_key key; |
442 | struct btrfs_root_item root_item; | 442 | struct btrfs_root_item *root_item; |
443 | struct btrfs_inode_item *inode_item; | 443 | struct btrfs_inode_item *inode_item; |
444 | struct extent_buffer *leaf; | 444 | struct extent_buffer *leaf; |
445 | struct btrfs_root *root = BTRFS_I(dir)->root; | 445 | struct btrfs_root *root = BTRFS_I(dir)->root; |
@@ -455,16 +455,22 @@ static noinline int create_subvol(struct inode *dir, | |||
455 | u64 qgroup_reserved; | 455 | u64 qgroup_reserved; |
456 | uuid_le new_uuid; | 456 | uuid_le new_uuid; |
457 | 457 | ||
458 | root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); | ||
459 | if (!root_item) | ||
460 | return -ENOMEM; | ||
461 | |||
458 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); | 462 | ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); |
459 | if (ret) | 463 | if (ret) |
460 | return ret; | 464 | goto fail_free; |
461 | 465 | ||
462 | /* | 466 | /* |
463 | * Don't create subvolume whose level is not zero. Or qgroup will be | 467 | * Don't create subvolume whose level is not zero. Or qgroup will be |
464 | * screwed up since it assume subvolme qgroup's level to be 0. | 468 | * screwed up since it assume subvolme qgroup's level to be 0. |
465 | */ | 469 | */ |
466 | if (btrfs_qgroup_level(objectid)) | 470 | if (btrfs_qgroup_level(objectid)) { |
467 | return -ENOSPC; | 471 | ret = -ENOSPC; |
472 | goto fail_free; | ||
473 | } | ||
468 | 474 | ||
469 | btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); | 475 | btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); |
470 | /* | 476 | /* |
@@ -474,14 +480,14 @@ static noinline int create_subvol(struct inode *dir, | |||
474 | ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, | 480 | ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, |
475 | 8, &qgroup_reserved, false); | 481 | 8, &qgroup_reserved, false); |
476 | if (ret) | 482 | if (ret) |
477 | return ret; | 483 | goto fail_free; |
478 | 484 | ||
479 | trans = btrfs_start_transaction(root, 0); | 485 | trans = btrfs_start_transaction(root, 0); |
480 | if (IS_ERR(trans)) { | 486 | if (IS_ERR(trans)) { |
481 | ret = PTR_ERR(trans); | 487 | ret = PTR_ERR(trans); |
482 | btrfs_subvolume_release_metadata(root, &block_rsv, | 488 | btrfs_subvolume_release_metadata(root, &block_rsv, |
483 | qgroup_reserved); | 489 | qgroup_reserved); |
484 | return ret; | 490 | goto fail_free; |
485 | } | 491 | } |
486 | trans->block_rsv = &block_rsv; | 492 | trans->block_rsv = &block_rsv; |
487 | trans->bytes_reserved = block_rsv.size; | 493 | trans->bytes_reserved = block_rsv.size; |
@@ -509,47 +515,45 @@ static noinline int create_subvol(struct inode *dir, | |||
509 | BTRFS_UUID_SIZE); | 515 | BTRFS_UUID_SIZE); |
510 | btrfs_mark_buffer_dirty(leaf); | 516 | btrfs_mark_buffer_dirty(leaf); |
511 | 517 | ||
512 | memset(&root_item, 0, sizeof(root_item)); | 518 | inode_item = &root_item->inode; |
513 | |||
514 | inode_item = &root_item.inode; | ||
515 | btrfs_set_stack_inode_generation(inode_item, 1); | 519 | btrfs_set_stack_inode_generation(inode_item, 1); |
516 | btrfs_set_stack_inode_size(inode_item, 3); | 520 | btrfs_set_stack_inode_size(inode_item, 3); |
517 | btrfs_set_stack_inode_nlink(inode_item, 1); | 521 | btrfs_set_stack_inode_nlink(inode_item, 1); |
518 | btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); | 522 | btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); |
519 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); | 523 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); |
520 | 524 | ||
521 | btrfs_set_root_flags(&root_item, 0); | 525 | btrfs_set_root_flags(root_item, 0); |
522 | btrfs_set_root_limit(&root_item, 0); | 526 | btrfs_set_root_limit(root_item, 0); |
523 | btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); | 527 | btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); |
524 | 528 | ||
525 | btrfs_set_root_bytenr(&root_item, leaf->start); | 529 | btrfs_set_root_bytenr(root_item, leaf->start); |
526 | btrfs_set_root_generation(&root_item, trans->transid); | 530 | btrfs_set_root_generation(root_item, trans->transid); |
527 | btrfs_set_root_level(&root_item, 0); | 531 | btrfs_set_root_level(root_item, 0); |
528 | btrfs_set_root_refs(&root_item, 1); | 532 | btrfs_set_root_refs(root_item, 1); |
529 | btrfs_set_root_used(&root_item, leaf->len); | 533 | btrfs_set_root_used(root_item, leaf->len); |
530 | btrfs_set_root_last_snapshot(&root_item, 0); | 534 | btrfs_set_root_last_snapshot(root_item, 0); |
531 | 535 | ||
532 | btrfs_set_root_generation_v2(&root_item, | 536 | btrfs_set_root_generation_v2(root_item, |
533 | btrfs_root_generation(&root_item)); | 537 | btrfs_root_generation(root_item)); |
534 | uuid_le_gen(&new_uuid); | 538 | uuid_le_gen(&new_uuid); |
535 | memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); | 539 | memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); |
536 | btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); | 540 | btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec); |
537 | btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); | 541 | btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec); |
538 | root_item.ctime = root_item.otime; | 542 | root_item->ctime = root_item->otime; |
539 | btrfs_set_root_ctransid(&root_item, trans->transid); | 543 | btrfs_set_root_ctransid(root_item, trans->transid); |
540 | btrfs_set_root_otransid(&root_item, trans->transid); | 544 | btrfs_set_root_otransid(root_item, trans->transid); |
541 | 545 | ||
542 | btrfs_tree_unlock(leaf); | 546 | btrfs_tree_unlock(leaf); |
543 | free_extent_buffer(leaf); | 547 | free_extent_buffer(leaf); |
544 | leaf = NULL; | 548 | leaf = NULL; |
545 | 549 | ||
546 | btrfs_set_root_dirid(&root_item, new_dirid); | 550 | btrfs_set_root_dirid(root_item, new_dirid); |
547 | 551 | ||
548 | key.objectid = objectid; | 552 | key.objectid = objectid; |
549 | key.offset = 0; | 553 | key.offset = 0; |
550 | key.type = BTRFS_ROOT_ITEM_KEY; | 554 | key.type = BTRFS_ROOT_ITEM_KEY; |
551 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 555 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, |
552 | &root_item); | 556 | root_item); |
553 | if (ret) | 557 | if (ret) |
554 | goto fail; | 558 | goto fail; |
555 | 559 | ||
@@ -601,12 +605,13 @@ static noinline int create_subvol(struct inode *dir, | |||
601 | BUG_ON(ret); | 605 | BUG_ON(ret); |
602 | 606 | ||
603 | ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, | 607 | ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, |
604 | root_item.uuid, BTRFS_UUID_KEY_SUBVOL, | 608 | root_item->uuid, BTRFS_UUID_KEY_SUBVOL, |
605 | objectid); | 609 | objectid); |
606 | if (ret) | 610 | if (ret) |
607 | btrfs_abort_transaction(trans, root, ret); | 611 | btrfs_abort_transaction(trans, root, ret); |
608 | 612 | ||
609 | fail: | 613 | fail: |
614 | kfree(root_item); | ||
610 | trans->block_rsv = NULL; | 615 | trans->block_rsv = NULL; |
611 | trans->bytes_reserved = 0; | 616 | trans->bytes_reserved = 0; |
612 | btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); | 617 | btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); |
@@ -629,6 +634,10 @@ fail: | |||
629 | d_instantiate(dentry, inode); | 634 | d_instantiate(dentry, inode); |
630 | } | 635 | } |
631 | return ret; | 636 | return ret; |
637 | |||
638 | fail_free: | ||
639 | kfree(root_item); | ||
640 | return ret; | ||
632 | } | 641 | } |
633 | 642 | ||
634 | static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root) | 643 | static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root) |
@@ -681,7 +690,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
681 | if (ret) | 690 | if (ret) |
682 | goto dec_and_free; | 691 | goto dec_and_free; |
683 | 692 | ||
684 | btrfs_wait_ordered_extents(root, -1); | 693 | btrfs_wait_ordered_extents(root, -1, 0, (u64)-1); |
685 | 694 | ||
686 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, | 695 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
687 | BTRFS_BLOCK_RSV_TEMP); | 696 | BTRFS_BLOCK_RSV_TEMP); |
@@ -2671,10 +2680,10 @@ out: | |||
2671 | return ret; | 2680 | return ret; |
2672 | } | 2681 | } |
2673 | 2682 | ||
2674 | static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | 2683 | static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) |
2675 | { | 2684 | { |
2676 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; | 2685 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; |
2677 | struct btrfs_ioctl_vol_args *vol_args; | 2686 | struct btrfs_ioctl_vol_args_v2 *vol_args; |
2678 | int ret; | 2687 | int ret; |
2679 | 2688 | ||
2680 | if (!capable(CAP_SYS_ADMIN)) | 2689 | if (!capable(CAP_SYS_ADMIN)) |
@@ -2690,7 +2699,9 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
2690 | goto err_drop; | 2699 | goto err_drop; |
2691 | } | 2700 | } |
2692 | 2701 | ||
2693 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2702 | /* Check for compatibility reject unknown flags */ |
2703 | if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) | ||
2704 | return -EOPNOTSUPP; | ||
2694 | 2705 | ||
2695 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, | 2706 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
2696 | 1)) { | 2707 | 1)) { |
@@ -2699,13 +2710,23 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
2699 | } | 2710 | } |
2700 | 2711 | ||
2701 | mutex_lock(&root->fs_info->volume_mutex); | 2712 | mutex_lock(&root->fs_info->volume_mutex); |
2702 | ret = btrfs_rm_device(root, vol_args->name); | 2713 | if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { |
2714 | ret = btrfs_rm_device(root, NULL, vol_args->devid); | ||
2715 | } else { | ||
2716 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
2717 | ret = btrfs_rm_device(root, vol_args->name, 0); | ||
2718 | } | ||
2703 | mutex_unlock(&root->fs_info->volume_mutex); | 2719 | mutex_unlock(&root->fs_info->volume_mutex); |
2704 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | 2720 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); |
2705 | 2721 | ||
2706 | if (!ret) | 2722 | if (!ret) { |
2707 | btrfs_info(root->fs_info, "disk deleted %s",vol_args->name); | 2723 | if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) |
2708 | 2724 | btrfs_info(root->fs_info, "device deleted: id %llu", | |
2725 | vol_args->devid); | ||
2726 | else | ||
2727 | btrfs_info(root->fs_info, "device deleted: %s", | ||
2728 | vol_args->name); | ||
2729 | } | ||
2709 | out: | 2730 | out: |
2710 | kfree(vol_args); | 2731 | kfree(vol_args); |
2711 | err_drop: | 2732 | err_drop: |
@@ -2713,6 +2734,47 @@ err_drop: | |||
2713 | return ret; | 2734 | return ret; |
2714 | } | 2735 | } |
2715 | 2736 | ||
2737 | static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | ||
2738 | { | ||
2739 | struct btrfs_root *root = BTRFS_I(file_inode(file))->root; | ||
2740 | struct btrfs_ioctl_vol_args *vol_args; | ||
2741 | int ret; | ||
2742 | |||
2743 | if (!capable(CAP_SYS_ADMIN)) | ||
2744 | return -EPERM; | ||
2745 | |||
2746 | ret = mnt_want_write_file(file); | ||
2747 | if (ret) | ||
2748 | return ret; | ||
2749 | |||
2750 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, | ||
2751 | 1)) { | ||
2752 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; | ||
2753 | goto out_drop_write; | ||
2754 | } | ||
2755 | |||
2756 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
2757 | if (IS_ERR(vol_args)) { | ||
2758 | ret = PTR_ERR(vol_args); | ||
2759 | goto out; | ||
2760 | } | ||
2761 | |||
2762 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
2763 | mutex_lock(&root->fs_info->volume_mutex); | ||
2764 | ret = btrfs_rm_device(root, vol_args->name, 0); | ||
2765 | mutex_unlock(&root->fs_info->volume_mutex); | ||
2766 | |||
2767 | if (!ret) | ||
2768 | btrfs_info(root->fs_info, "disk deleted %s",vol_args->name); | ||
2769 | kfree(vol_args); | ||
2770 | out: | ||
2771 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
2772 | out_drop_write: | ||
2773 | mnt_drop_write_file(file); | ||
2774 | |||
2775 | return ret; | ||
2776 | } | ||
2777 | |||
2716 | static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) | 2778 | static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) |
2717 | { | 2779 | { |
2718 | struct btrfs_ioctl_fs_info_args *fi_args; | 2780 | struct btrfs_ioctl_fs_info_args *fi_args; |
@@ -3472,13 +3534,16 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
3472 | u64 last_dest_end = destoff; | 3534 | u64 last_dest_end = destoff; |
3473 | 3535 | ||
3474 | ret = -ENOMEM; | 3536 | ret = -ENOMEM; |
3475 | buf = vmalloc(root->nodesize); | 3537 | buf = kmalloc(root->nodesize, GFP_KERNEL | __GFP_NOWARN); |
3476 | if (!buf) | 3538 | if (!buf) { |
3477 | return ret; | 3539 | buf = vmalloc(root->nodesize); |
3540 | if (!buf) | ||
3541 | return ret; | ||
3542 | } | ||
3478 | 3543 | ||
3479 | path = btrfs_alloc_path(); | 3544 | path = btrfs_alloc_path(); |
3480 | if (!path) { | 3545 | if (!path) { |
3481 | vfree(buf); | 3546 | kvfree(buf); |
3482 | return ret; | 3547 | return ret; |
3483 | } | 3548 | } |
3484 | 3549 | ||
@@ -3779,7 +3844,7 @@ process_slot: | |||
3779 | 3844 | ||
3780 | out: | 3845 | out: |
3781 | btrfs_free_path(path); | 3846 | btrfs_free_path(path); |
3782 | vfree(buf); | 3847 | kvfree(buf); |
3783 | return ret; | 3848 | return ret; |
3784 | } | 3849 | } |
3785 | 3850 | ||
@@ -4380,7 +4445,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) | |||
4380 | 1)) { | 4445 | 1)) { |
4381 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; | 4446 | ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; |
4382 | } else { | 4447 | } else { |
4383 | ret = btrfs_dev_replace_start(root, p); | 4448 | ret = btrfs_dev_replace_by_ioctl(root, p); |
4384 | atomic_set( | 4449 | atomic_set( |
4385 | &root->fs_info->mutually_exclusive_operation_running, | 4450 | &root->fs_info->mutually_exclusive_operation_running, |
4386 | 0); | 4451 | 0); |
@@ -4851,8 +4916,8 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) | |||
4851 | /* update qgroup status and info */ | 4916 | /* update qgroup status and info */ |
4852 | err = btrfs_run_qgroups(trans, root->fs_info); | 4917 | err = btrfs_run_qgroups(trans, root->fs_info); |
4853 | if (err < 0) | 4918 | if (err < 0) |
4854 | btrfs_std_error(root->fs_info, ret, | 4919 | btrfs_handle_fs_error(root->fs_info, err, |
4855 | "failed to update qgroup status and info\n"); | 4920 | "failed to update qgroup status and info"); |
4856 | err = btrfs_end_transaction(trans, root); | 4921 | err = btrfs_end_transaction(trans, root); |
4857 | if (err && !ret) | 4922 | if (err && !ret) |
4858 | ret = err; | 4923 | ret = err; |
@@ -5398,9 +5463,15 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) | |||
5398 | if (ret) | 5463 | if (ret) |
5399 | return ret; | 5464 | return ret; |
5400 | 5465 | ||
5466 | ret = mnt_want_write_file(file); | ||
5467 | if (ret) | ||
5468 | return ret; | ||
5469 | |||
5401 | trans = btrfs_start_transaction(root, 0); | 5470 | trans = btrfs_start_transaction(root, 0); |
5402 | if (IS_ERR(trans)) | 5471 | if (IS_ERR(trans)) { |
5403 | return PTR_ERR(trans); | 5472 | ret = PTR_ERR(trans); |
5473 | goto out_drop_write; | ||
5474 | } | ||
5404 | 5475 | ||
5405 | spin_lock(&root->fs_info->super_lock); | 5476 | spin_lock(&root->fs_info->super_lock); |
5406 | newflags = btrfs_super_compat_flags(super_block); | 5477 | newflags = btrfs_super_compat_flags(super_block); |
@@ -5419,7 +5490,11 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) | |||
5419 | btrfs_set_super_incompat_flags(super_block, newflags); | 5490 | btrfs_set_super_incompat_flags(super_block, newflags); |
5420 | spin_unlock(&root->fs_info->super_lock); | 5491 | spin_unlock(&root->fs_info->super_lock); |
5421 | 5492 | ||
5422 | return btrfs_commit_transaction(trans, root); | 5493 | ret = btrfs_commit_transaction(trans, root); |
5494 | out_drop_write: | ||
5495 | mnt_drop_write_file(file); | ||
5496 | |||
5497 | return ret; | ||
5423 | } | 5498 | } |
5424 | 5499 | ||
5425 | long btrfs_ioctl(struct file *file, unsigned int | 5500 | long btrfs_ioctl(struct file *file, unsigned int |
@@ -5463,6 +5538,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
5463 | return btrfs_ioctl_add_dev(root, argp); | 5538 | return btrfs_ioctl_add_dev(root, argp); |
5464 | case BTRFS_IOC_RM_DEV: | 5539 | case BTRFS_IOC_RM_DEV: |
5465 | return btrfs_ioctl_rm_dev(file, argp); | 5540 | return btrfs_ioctl_rm_dev(file, argp); |
5541 | case BTRFS_IOC_RM_DEV_V2: | ||
5542 | return btrfs_ioctl_rm_dev_v2(file, argp); | ||
5466 | case BTRFS_IOC_FS_INFO: | 5543 | case BTRFS_IOC_FS_INFO: |
5467 | return btrfs_ioctl_fs_info(root, argp); | 5544 | return btrfs_ioctl_fs_info(root, argp); |
5468 | case BTRFS_IOC_DEV_INFO: | 5545 | case BTRFS_IOC_DEV_INFO: |
@@ -5556,3 +5633,24 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
5556 | 5633 | ||
5557 | return -ENOTTY; | 5634 | return -ENOTTY; |
5558 | } | 5635 | } |
5636 | |||
5637 | #ifdef CONFIG_COMPAT | ||
5638 | long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
5639 | { | ||
5640 | switch (cmd) { | ||
5641 | case FS_IOC32_GETFLAGS: | ||
5642 | cmd = FS_IOC_GETFLAGS; | ||
5643 | break; | ||
5644 | case FS_IOC32_SETFLAGS: | ||
5645 | cmd = FS_IOC_SETFLAGS; | ||
5646 | break; | ||
5647 | case FS_IOC32_GETVERSION: | ||
5648 | cmd = FS_IOC_GETVERSION; | ||
5649 | break; | ||
5650 | default: | ||
5651 | return -ENOIOCTLCMD; | ||
5652 | } | ||
5653 | |||
5654 | return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); | ||
5655 | } | ||
5656 | #endif | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 0de7da5a610d..559170464d7c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) | |||
661 | * wait for all the ordered extents in a root. This is done when balancing | 661 | * wait for all the ordered extents in a root. This is done when balancing |
662 | * space between drives. | 662 | * space between drives. |
663 | */ | 663 | */ |
664 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | 664 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, |
665 | const u64 range_start, const u64 range_len) | ||
665 | { | 666 | { |
666 | struct list_head splice, works; | 667 | LIST_HEAD(splice); |
668 | LIST_HEAD(skipped); | ||
669 | LIST_HEAD(works); | ||
667 | struct btrfs_ordered_extent *ordered, *next; | 670 | struct btrfs_ordered_extent *ordered, *next; |
668 | int count = 0; | 671 | int count = 0; |
669 | 672 | const u64 range_end = range_start + range_len; | |
670 | INIT_LIST_HEAD(&splice); | ||
671 | INIT_LIST_HEAD(&works); | ||
672 | 673 | ||
673 | mutex_lock(&root->ordered_extent_mutex); | 674 | mutex_lock(&root->ordered_extent_mutex); |
674 | spin_lock(&root->ordered_extent_lock); | 675 | spin_lock(&root->ordered_extent_lock); |
@@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
676 | while (!list_empty(&splice) && nr) { | 677 | while (!list_empty(&splice) && nr) { |
677 | ordered = list_first_entry(&splice, struct btrfs_ordered_extent, | 678 | ordered = list_first_entry(&splice, struct btrfs_ordered_extent, |
678 | root_extent_list); | 679 | root_extent_list); |
680 | |||
681 | if (range_end <= ordered->start || | ||
682 | ordered->start + ordered->disk_len <= range_start) { | ||
683 | list_move_tail(&ordered->root_extent_list, &skipped); | ||
684 | cond_resched_lock(&root->ordered_extent_lock); | ||
685 | continue; | ||
686 | } | ||
687 | |||
679 | list_move_tail(&ordered->root_extent_list, | 688 | list_move_tail(&ordered->root_extent_list, |
680 | &root->ordered_extents); | 689 | &root->ordered_extents); |
681 | atomic_inc(&ordered->refs); | 690 | atomic_inc(&ordered->refs); |
@@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
694 | nr--; | 703 | nr--; |
695 | count++; | 704 | count++; |
696 | } | 705 | } |
706 | list_splice_tail(&skipped, &root->ordered_extents); | ||
697 | list_splice_tail(&splice, &root->ordered_extents); | 707 | list_splice_tail(&splice, &root->ordered_extents); |
698 | spin_unlock(&root->ordered_extent_lock); | 708 | spin_unlock(&root->ordered_extent_lock); |
699 | 709 | ||
@@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
708 | return count; | 718 | return count; |
709 | } | 719 | } |
710 | 720 | ||
711 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | 721 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, |
722 | const u64 range_start, const u64 range_len) | ||
712 | { | 723 | { |
713 | struct btrfs_root *root; | 724 | struct btrfs_root *root; |
714 | struct list_head splice; | 725 | struct list_head splice; |
@@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
728 | &fs_info->ordered_roots); | 739 | &fs_info->ordered_roots); |
729 | spin_unlock(&fs_info->ordered_root_lock); | 740 | spin_unlock(&fs_info->ordered_root_lock); |
730 | 741 | ||
731 | done = btrfs_wait_ordered_extents(root, nr); | 742 | done = btrfs_wait_ordered_extents(root, nr, |
743 | range_start, range_len); | ||
732 | btrfs_put_fs_root(root); | 744 | btrfs_put_fs_root(root); |
733 | 745 | ||
734 | spin_lock(&fs_info->ordered_root_lock); | 746 | spin_lock(&fs_info->ordered_root_lock); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 23c96059cef2..8ef12623d65c 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
197 | struct btrfs_ordered_extent *ordered); | 197 | struct btrfs_ordered_extent *ordered); |
198 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | 198 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, |
199 | u32 *sum, int len); | 199 | u32 *sum, int len); |
200 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); | 200 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, |
201 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); | 201 | const u64 range_start, const u64 range_len); |
202 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, | ||
203 | const u64 range_start, const u64 range_len); | ||
202 | void btrfs_get_logged_extents(struct inode *inode, | 204 | void btrfs_get_logged_extents(struct inode *inode, |
203 | struct list_head *logged_list, | 205 | struct list_head *logged_list, |
204 | const loff_t start, | 206 | const loff_t start, |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 08ef890deca6..1cfd35cfac76 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2418,7 +2418,7 @@ again: | |||
2418 | } | 2418 | } |
2419 | out: | 2419 | out: |
2420 | if (ret) { | 2420 | if (ret) { |
2421 | btrfs_std_error(root->fs_info, ret, NULL); | 2421 | btrfs_handle_fs_error(root->fs_info, ret, NULL); |
2422 | if (!list_empty(&reloc_roots)) | 2422 | if (!list_empty(&reloc_roots)) |
2423 | free_reloc_roots(&reloc_roots); | 2423 | free_reloc_roots(&reloc_roots); |
2424 | 2424 | ||
@@ -4254,12 +4254,11 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
4254 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", | 4254 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", |
4255 | rc->block_group->key.objectid, rc->block_group->flags); | 4255 | rc->block_group->key.objectid, rc->block_group->flags); |
4256 | 4256 | ||
4257 | ret = btrfs_start_delalloc_roots(fs_info, 0, -1); | 4257 | btrfs_wait_block_group_reservations(rc->block_group); |
4258 | if (ret < 0) { | 4258 | btrfs_wait_nocow_writers(rc->block_group); |
4259 | err = ret; | 4259 | btrfs_wait_ordered_roots(fs_info, -1, |
4260 | goto out; | 4260 | rc->block_group->key.objectid, |
4261 | } | 4261 | rc->block_group->key.offset); |
4262 | btrfs_wait_ordered_roots(fs_info, -1); | ||
4263 | 4262 | ||
4264 | while (1) { | 4263 | while (1) { |
4265 | mutex_lock(&fs_info->cleaner_mutex); | 4264 | mutex_lock(&fs_info->cleaner_mutex); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 9fcd6dfc3266..b2b14e7115f1 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -284,7 +284,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
284 | trans = btrfs_join_transaction(tree_root); | 284 | trans = btrfs_join_transaction(tree_root); |
285 | if (IS_ERR(trans)) { | 285 | if (IS_ERR(trans)) { |
286 | err = PTR_ERR(trans); | 286 | err = PTR_ERR(trans); |
287 | btrfs_std_error(tree_root->fs_info, err, | 287 | btrfs_handle_fs_error(tree_root->fs_info, err, |
288 | "Failed to start trans to delete " | 288 | "Failed to start trans to delete " |
289 | "orphan item"); | 289 | "orphan item"); |
290 | break; | 290 | break; |
@@ -293,7 +293,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
293 | root_key.objectid); | 293 | root_key.objectid); |
294 | btrfs_end_transaction(trans, tree_root); | 294 | btrfs_end_transaction(trans, tree_root); |
295 | if (err) { | 295 | if (err) { |
296 | btrfs_std_error(tree_root->fs_info, err, | 296 | btrfs_handle_fs_error(tree_root->fs_info, err, |
297 | "Failed to delete root orphan " | 297 | "Failed to delete root orphan " |
298 | "item"); | 298 | "item"); |
299 | break; | 299 | break; |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4678f03e878e..fa35cdc46494 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -1350,7 +1350,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, | |||
1350 | recover->bbio = bbio; | 1350 | recover->bbio = bbio; |
1351 | recover->map_length = mapped_length; | 1351 | recover->map_length = mapped_length; |
1352 | 1352 | ||
1353 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); | 1353 | BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK); |
1354 | 1354 | ||
1355 | nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS); | 1355 | nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS); |
1356 | 1356 | ||
@@ -2127,6 +2127,8 @@ static void scrub_missing_raid56_end_io(struct bio *bio) | |||
2127 | if (bio->bi_error) | 2127 | if (bio->bi_error) |
2128 | sblock->no_io_error_seen = 0; | 2128 | sblock->no_io_error_seen = 0; |
2129 | 2129 | ||
2130 | bio_put(bio); | ||
2131 | |||
2130 | btrfs_queue_work(fs_info->scrub_workers, &sblock->work); | 2132 | btrfs_queue_work(fs_info->scrub_workers, &sblock->work); |
2131 | } | 2133 | } |
2132 | 2134 | ||
@@ -2860,7 +2862,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, | |||
2860 | int extent_mirror_num; | 2862 | int extent_mirror_num; |
2861 | int stop_loop = 0; | 2863 | int stop_loop = 0; |
2862 | 2864 | ||
2863 | nsectors = map->stripe_len / root->sectorsize; | 2865 | nsectors = div_u64(map->stripe_len, root->sectorsize); |
2864 | bitmap_len = scrub_calc_parity_bitmap_len(nsectors); | 2866 | bitmap_len = scrub_calc_parity_bitmap_len(nsectors); |
2865 | sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len, | 2867 | sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len, |
2866 | GFP_NOFS); | 2868 | GFP_NOFS); |
@@ -3070,7 +3072,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3070 | int slot; | 3072 | int slot; |
3071 | u64 nstripes; | 3073 | u64 nstripes; |
3072 | struct extent_buffer *l; | 3074 | struct extent_buffer *l; |
3073 | struct btrfs_key key; | ||
3074 | u64 physical; | 3075 | u64 physical; |
3075 | u64 logical; | 3076 | u64 logical; |
3076 | u64 logic_end; | 3077 | u64 logic_end; |
@@ -3079,7 +3080,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3079 | int mirror_num; | 3080 | int mirror_num; |
3080 | struct reada_control *reada1; | 3081 | struct reada_control *reada1; |
3081 | struct reada_control *reada2; | 3082 | struct reada_control *reada2; |
3082 | struct btrfs_key key_start; | 3083 | struct btrfs_key key; |
3083 | struct btrfs_key key_end; | 3084 | struct btrfs_key key_end; |
3084 | u64 increment = map->stripe_len; | 3085 | u64 increment = map->stripe_len; |
3085 | u64 offset; | 3086 | u64 offset; |
@@ -3158,21 +3159,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3158 | scrub_blocked_if_needed(fs_info); | 3159 | scrub_blocked_if_needed(fs_info); |
3159 | 3160 | ||
3160 | /* FIXME it might be better to start readahead at commit root */ | 3161 | /* FIXME it might be better to start readahead at commit root */ |
3161 | key_start.objectid = logical; | 3162 | key.objectid = logical; |
3162 | key_start.type = BTRFS_EXTENT_ITEM_KEY; | 3163 | key.type = BTRFS_EXTENT_ITEM_KEY; |
3163 | key_start.offset = (u64)0; | 3164 | key.offset = (u64)0; |
3164 | key_end.objectid = logic_end; | 3165 | key_end.objectid = logic_end; |
3165 | key_end.type = BTRFS_METADATA_ITEM_KEY; | 3166 | key_end.type = BTRFS_METADATA_ITEM_KEY; |
3166 | key_end.offset = (u64)-1; | 3167 | key_end.offset = (u64)-1; |
3167 | reada1 = btrfs_reada_add(root, &key_start, &key_end); | 3168 | reada1 = btrfs_reada_add(root, &key, &key_end); |
3168 | 3169 | ||
3169 | key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 3170 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
3170 | key_start.type = BTRFS_EXTENT_CSUM_KEY; | 3171 | key.type = BTRFS_EXTENT_CSUM_KEY; |
3171 | key_start.offset = logical; | 3172 | key.offset = logical; |
3172 | key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 3173 | key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
3173 | key_end.type = BTRFS_EXTENT_CSUM_KEY; | 3174 | key_end.type = BTRFS_EXTENT_CSUM_KEY; |
3174 | key_end.offset = logic_end; | 3175 | key_end.offset = logic_end; |
3175 | reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); | 3176 | reada2 = btrfs_reada_add(csum_root, &key, &key_end); |
3176 | 3177 | ||
3177 | if (!IS_ERR(reada1)) | 3178 | if (!IS_ERR(reada1)) |
3178 | btrfs_reada_wait(reada1); | 3179 | btrfs_reada_wait(reada1); |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8d358c547c59..6a8c86074aa4 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -5939,6 +5939,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5939 | u32 i; | 5939 | u32 i; |
5940 | u64 *clone_sources_tmp = NULL; | 5940 | u64 *clone_sources_tmp = NULL; |
5941 | int clone_sources_to_rollback = 0; | 5941 | int clone_sources_to_rollback = 0; |
5942 | unsigned alloc_size; | ||
5942 | int sort_clone_roots = 0; | 5943 | int sort_clone_roots = 0; |
5943 | int index; | 5944 | int index; |
5944 | 5945 | ||
@@ -5978,6 +5979,12 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5978 | goto out; | 5979 | goto out; |
5979 | } | 5980 | } |
5980 | 5981 | ||
5982 | if (arg->clone_sources_count > | ||
5983 | ULLONG_MAX / sizeof(*arg->clone_sources)) { | ||
5984 | ret = -EINVAL; | ||
5985 | goto out; | ||
5986 | } | ||
5987 | |||
5981 | if (!access_ok(VERIFY_READ, arg->clone_sources, | 5988 | if (!access_ok(VERIFY_READ, arg->clone_sources, |
5982 | sizeof(*arg->clone_sources) * | 5989 | sizeof(*arg->clone_sources) * |
5983 | arg->clone_sources_count)) { | 5990 | arg->clone_sources_count)) { |
@@ -6022,40 +6029,53 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
6022 | sctx->clone_roots_cnt = arg->clone_sources_count; | 6029 | sctx->clone_roots_cnt = arg->clone_sources_count; |
6023 | 6030 | ||
6024 | sctx->send_max_size = BTRFS_SEND_BUF_SIZE; | 6031 | sctx->send_max_size = BTRFS_SEND_BUF_SIZE; |
6025 | sctx->send_buf = vmalloc(sctx->send_max_size); | 6032 | sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN); |
6026 | if (!sctx->send_buf) { | 6033 | if (!sctx->send_buf) { |
6027 | ret = -ENOMEM; | 6034 | sctx->send_buf = vmalloc(sctx->send_max_size); |
6028 | goto out; | 6035 | if (!sctx->send_buf) { |
6036 | ret = -ENOMEM; | ||
6037 | goto out; | ||
6038 | } | ||
6029 | } | 6039 | } |
6030 | 6040 | ||
6031 | sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); | 6041 | sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN); |
6032 | if (!sctx->read_buf) { | 6042 | if (!sctx->read_buf) { |
6033 | ret = -ENOMEM; | 6043 | sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); |
6034 | goto out; | 6044 | if (!sctx->read_buf) { |
6045 | ret = -ENOMEM; | ||
6046 | goto out; | ||
6047 | } | ||
6035 | } | 6048 | } |
6036 | 6049 | ||
6037 | sctx->pending_dir_moves = RB_ROOT; | 6050 | sctx->pending_dir_moves = RB_ROOT; |
6038 | sctx->waiting_dir_moves = RB_ROOT; | 6051 | sctx->waiting_dir_moves = RB_ROOT; |
6039 | sctx->orphan_dirs = RB_ROOT; | 6052 | sctx->orphan_dirs = RB_ROOT; |
6040 | 6053 | ||
6041 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * | 6054 | alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); |
6042 | (arg->clone_sources_count + 1)); | 6055 | |
6056 | sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); | ||
6043 | if (!sctx->clone_roots) { | 6057 | if (!sctx->clone_roots) { |
6044 | ret = -ENOMEM; | 6058 | sctx->clone_roots = vzalloc(alloc_size); |
6045 | goto out; | 6059 | if (!sctx->clone_roots) { |
6060 | ret = -ENOMEM; | ||
6061 | goto out; | ||
6062 | } | ||
6046 | } | 6063 | } |
6047 | 6064 | ||
6065 | alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); | ||
6066 | |||
6048 | if (arg->clone_sources_count) { | 6067 | if (arg->clone_sources_count) { |
6049 | clone_sources_tmp = vmalloc(arg->clone_sources_count * | 6068 | clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); |
6050 | sizeof(*arg->clone_sources)); | ||
6051 | if (!clone_sources_tmp) { | 6069 | if (!clone_sources_tmp) { |
6052 | ret = -ENOMEM; | 6070 | clone_sources_tmp = vmalloc(alloc_size); |
6053 | goto out; | 6071 | if (!clone_sources_tmp) { |
6072 | ret = -ENOMEM; | ||
6073 | goto out; | ||
6074 | } | ||
6054 | } | 6075 | } |
6055 | 6076 | ||
6056 | ret = copy_from_user(clone_sources_tmp, arg->clone_sources, | 6077 | ret = copy_from_user(clone_sources_tmp, arg->clone_sources, |
6057 | arg->clone_sources_count * | 6078 | alloc_size); |
6058 | sizeof(*arg->clone_sources)); | ||
6059 | if (ret) { | 6079 | if (ret) { |
6060 | ret = -EFAULT; | 6080 | ret = -EFAULT; |
6061 | goto out; | 6081 | goto out; |
@@ -6089,7 +6109,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
6089 | sctx->clone_roots[i].root = clone_root; | 6109 | sctx->clone_roots[i].root = clone_root; |
6090 | clone_sources_to_rollback = i + 1; | 6110 | clone_sources_to_rollback = i + 1; |
6091 | } | 6111 | } |
6092 | vfree(clone_sources_tmp); | 6112 | kvfree(clone_sources_tmp); |
6093 | clone_sources_tmp = NULL; | 6113 | clone_sources_tmp = NULL; |
6094 | } | 6114 | } |
6095 | 6115 | ||
@@ -6207,15 +6227,15 @@ out: | |||
6207 | btrfs_root_dec_send_in_progress(sctx->parent_root); | 6227 | btrfs_root_dec_send_in_progress(sctx->parent_root); |
6208 | 6228 | ||
6209 | kfree(arg); | 6229 | kfree(arg); |
6210 | vfree(clone_sources_tmp); | 6230 | kvfree(clone_sources_tmp); |
6211 | 6231 | ||
6212 | if (sctx) { | 6232 | if (sctx) { |
6213 | if (sctx->send_filp) | 6233 | if (sctx->send_filp) |
6214 | fput(sctx->send_filp); | 6234 | fput(sctx->send_filp); |
6215 | 6235 | ||
6216 | vfree(sctx->clone_roots); | 6236 | kvfree(sctx->clone_roots); |
6217 | vfree(sctx->send_buf); | 6237 | kvfree(sctx->send_buf); |
6218 | vfree(sctx->read_buf); | 6238 | kvfree(sctx->read_buf); |
6219 | 6239 | ||
6220 | name_cache_free(sctx); | 6240 | name_cache_free(sctx); |
6221 | 6241 | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 00b8f37cc306..bf71071ab6f6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -97,15 +97,6 @@ const char *btrfs_decode_error(int errno) | |||
97 | return errstr; | 97 | return errstr; |
98 | } | 98 | } |
99 | 99 | ||
100 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
101 | { | ||
102 | /* | ||
103 | * today we only save the error info into ram. Long term we'll | ||
104 | * also send it down to the disk | ||
105 | */ | ||
106 | set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); | ||
107 | } | ||
108 | |||
109 | /* btrfs handle error by forcing the filesystem readonly */ | 100 | /* btrfs handle error by forcing the filesystem readonly */ |
110 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | 101 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) |
111 | { | 102 | { |
@@ -131,11 +122,11 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | |||
131 | } | 122 | } |
132 | 123 | ||
133 | /* | 124 | /* |
134 | * __btrfs_std_error decodes expected errors from the caller and | 125 | * __btrfs_handle_fs_error decodes expected errors from the caller and |
135 | * invokes the approciate error response. | 126 | * invokes the approciate error response. |
136 | */ | 127 | */ |
137 | __cold | 128 | __cold |
138 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | 129 | void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function, |
139 | unsigned int line, int errno, const char *fmt, ...) | 130 | unsigned int line, int errno, const char *fmt, ...) |
140 | { | 131 | { |
141 | struct super_block *sb = fs_info->sb; | 132 | struct super_block *sb = fs_info->sb; |
@@ -170,8 +161,13 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | |||
170 | } | 161 | } |
171 | #endif | 162 | #endif |
172 | 163 | ||
164 | /* | ||
165 | * Today we only save the error info to memory. Long term we'll | ||
166 | * also send it down to the disk | ||
167 | */ | ||
168 | set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); | ||
169 | |||
173 | /* Don't go through full error handling during mount */ | 170 | /* Don't go through full error handling during mount */ |
174 | save_error_info(fs_info); | ||
175 | if (sb->s_flags & MS_BORN) | 171 | if (sb->s_flags & MS_BORN) |
176 | btrfs_handle_error(fs_info); | 172 | btrfs_handle_error(fs_info); |
177 | } | 173 | } |
@@ -252,7 +248,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
252 | /* Wake up anybody who may be waiting on this transaction */ | 248 | /* Wake up anybody who may be waiting on this transaction */ |
253 | wake_up(&root->fs_info->transaction_wait); | 249 | wake_up(&root->fs_info->transaction_wait); |
254 | wake_up(&root->fs_info->transaction_blocked_wait); | 250 | wake_up(&root->fs_info->transaction_blocked_wait); |
255 | __btrfs_std_error(root->fs_info, function, line, errno, NULL); | 251 | __btrfs_handle_fs_error(root->fs_info, function, line, errno, NULL); |
256 | } | 252 | } |
257 | /* | 253 | /* |
258 | * __btrfs_panic decodes unexpected, fatal errors from the caller, | 254 | * __btrfs_panic decodes unexpected, fatal errors from the caller, |
@@ -1160,7 +1156,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
1160 | return 0; | 1156 | return 0; |
1161 | } | 1157 | } |
1162 | 1158 | ||
1163 | btrfs_wait_ordered_roots(fs_info, -1); | 1159 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); |
1164 | 1160 | ||
1165 | trans = btrfs_attach_transaction_barrier(root); | 1161 | trans = btrfs_attach_transaction_barrier(root); |
1166 | if (IS_ERR(trans)) { | 1162 | if (IS_ERR(trans)) { |
@@ -1488,10 +1484,10 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, | |||
1488 | memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts)); | 1484 | memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts)); |
1489 | } else { | 1485 | } else { |
1490 | /* | 1486 | /* |
1491 | * Since SELinux(the only one supports security_mnt_opts) does | 1487 | * Since SELinux (the only one supporting security_mnt_opts) |
1492 | * NOT support changing context during remount/mount same sb, | 1488 | * does NOT support changing context during remount/mount of |
1493 | * This must be the same or part of the same security options, | 1489 | * the same sb, this must be the same or part of the same |
1494 | * just free it. | 1490 | * security options, just free it. |
1495 | */ | 1491 | */ |
1496 | security_free_mnt_opts(sec_opts); | 1492 | security_free_mnt_opts(sec_opts); |
1497 | } | 1493 | } |
@@ -1669,8 +1665,8 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, | |||
1669 | unsigned long old_opts) | 1665 | unsigned long old_opts) |
1670 | { | 1666 | { |
1671 | /* | 1667 | /* |
1672 | * We need cleanup all defragable inodes if the autodefragment is | 1668 | * We need to cleanup all defragable inodes if the autodefragment is |
1673 | * close or the fs is R/O. | 1669 | * close or the filesystem is read only. |
1674 | */ | 1670 | */ |
1675 | if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && | 1671 | if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && |
1676 | (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || | 1672 | (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || |
@@ -2051,9 +2047,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
2051 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 2047 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
2052 | int ret; | 2048 | int ret; |
2053 | u64 thresh = 0; | 2049 | u64 thresh = 0; |
2050 | int mixed = 0; | ||
2054 | 2051 | ||
2055 | /* | 2052 | /* |
2056 | * holding chunk_muext to avoid allocating new chunks, holding | 2053 | * holding chunk_mutex to avoid allocating new chunks, holding |
2057 | * device_list_mutex to avoid the device being removed | 2054 | * device_list_mutex to avoid the device being removed |
2058 | */ | 2055 | */ |
2059 | rcu_read_lock(); | 2056 | rcu_read_lock(); |
@@ -2076,8 +2073,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
2076 | } | 2073 | } |
2077 | } | 2074 | } |
2078 | } | 2075 | } |
2079 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | 2076 | |
2080 | total_free_meta += found->disk_total - found->disk_used; | 2077 | /* |
2078 | * Metadata in mixed block goup profiles are accounted in data | ||
2079 | */ | ||
2080 | if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
2081 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
2082 | mixed = 1; | ||
2083 | else | ||
2084 | total_free_meta += found->disk_total - | ||
2085 | found->disk_used; | ||
2086 | } | ||
2081 | 2087 | ||
2082 | total_used += found->disk_used; | 2088 | total_used += found->disk_used; |
2083 | } | 2089 | } |
@@ -2090,7 +2096,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
2090 | 2096 | ||
2091 | /* Account global block reserve as used, it's in logical size already */ | 2097 | /* Account global block reserve as used, it's in logical size already */ |
2092 | spin_lock(&block_rsv->lock); | 2098 | spin_lock(&block_rsv->lock); |
2093 | buf->f_bfree -= block_rsv->size >> bits; | 2099 | /* Mixed block groups accounting is not byte-accurate, avoid overflow */ |
2100 | if (buf->f_bfree >= block_rsv->size >> bits) | ||
2101 | buf->f_bfree -= block_rsv->size >> bits; | ||
2102 | else | ||
2103 | buf->f_bfree = 0; | ||
2094 | spin_unlock(&block_rsv->lock); | 2104 | spin_unlock(&block_rsv->lock); |
2095 | 2105 | ||
2096 | buf->f_bavail = div_u64(total_free_data, factor); | 2106 | buf->f_bavail = div_u64(total_free_data, factor); |
@@ -2115,7 +2125,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
2115 | */ | 2125 | */ |
2116 | thresh = 4 * 1024 * 1024; | 2126 | thresh = 4 * 1024 * 1024; |
2117 | 2127 | ||
2118 | if (total_free_meta - thresh < block_rsv->size) | 2128 | if (!mixed && total_free_meta - thresh < block_rsv->size) |
2119 | buf->f_bavail = 0; | 2129 | buf->f_bavail = 0; |
2120 | 2130 | ||
2121 | buf->f_type = BTRFS_SUPER_MAGIC; | 2131 | buf->f_type = BTRFS_SUPER_MAGIC; |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 539e7b5e3f86..4879656bda3c 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -120,6 +120,9 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj, | |||
120 | if (!fs_info) | 120 | if (!fs_info) |
121 | return -EPERM; | 121 | return -EPERM; |
122 | 122 | ||
123 | if (fs_info->sb->s_flags & MS_RDONLY) | ||
124 | return -EROFS; | ||
125 | |||
123 | ret = kstrtoul(skip_spaces(buf), 0, &val); | 126 | ret = kstrtoul(skip_spaces(buf), 0, &val); |
124 | if (ret) | 127 | if (ret) |
125 | return ret; | 128 | return ret; |
@@ -364,7 +367,13 @@ static ssize_t btrfs_label_show(struct kobject *kobj, | |||
364 | { | 367 | { |
365 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 368 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
366 | char *label = fs_info->super_copy->label; | 369 | char *label = fs_info->super_copy->label; |
367 | return snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label); | 370 | ssize_t ret; |
371 | |||
372 | spin_lock(&fs_info->super_lock); | ||
373 | ret = snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label); | ||
374 | spin_unlock(&fs_info->super_lock); | ||
375 | |||
376 | return ret; | ||
368 | } | 377 | } |
369 | 378 | ||
370 | static ssize_t btrfs_label_store(struct kobject *kobj, | 379 | static ssize_t btrfs_label_store(struct kobject *kobj, |
@@ -374,6 +383,9 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
374 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 383 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
375 | size_t p_len; | 384 | size_t p_len; |
376 | 385 | ||
386 | if (!fs_info) | ||
387 | return -EPERM; | ||
388 | |||
377 | if (fs_info->sb->s_flags & MS_RDONLY) | 389 | if (fs_info->sb->s_flags & MS_RDONLY) |
378 | return -EROFS; | 390 | return -EROFS; |
379 | 391 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 43885e51b882..5b0b758a3f79 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -311,10 +311,11 @@ loop: | |||
311 | * when the transaction commits | 311 | * when the transaction commits |
312 | */ | 312 | */ |
313 | static int record_root_in_trans(struct btrfs_trans_handle *trans, | 313 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
314 | struct btrfs_root *root) | 314 | struct btrfs_root *root, |
315 | int force) | ||
315 | { | 316 | { |
316 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && | 317 | if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) && |
317 | root->last_trans < trans->transid) { | 318 | root->last_trans < trans->transid) || force) { |
318 | WARN_ON(root == root->fs_info->extent_root); | 319 | WARN_ON(root == root->fs_info->extent_root); |
319 | WARN_ON(root->commit_root != root->node); | 320 | WARN_ON(root->commit_root != root->node); |
320 | 321 | ||
@@ -331,7 +332,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
331 | smp_wmb(); | 332 | smp_wmb(); |
332 | 333 | ||
333 | spin_lock(&root->fs_info->fs_roots_radix_lock); | 334 | spin_lock(&root->fs_info->fs_roots_radix_lock); |
334 | if (root->last_trans == trans->transid) { | 335 | if (root->last_trans == trans->transid && !force) { |
335 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | 336 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
336 | return 0; | 337 | return 0; |
337 | } | 338 | } |
@@ -402,7 +403,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | |||
402 | return 0; | 403 | return 0; |
403 | 404 | ||
404 | mutex_lock(&root->fs_info->reloc_mutex); | 405 | mutex_lock(&root->fs_info->reloc_mutex); |
405 | record_root_in_trans(trans, root); | 406 | record_root_in_trans(trans, root, 0); |
406 | mutex_unlock(&root->fs_info->reloc_mutex); | 407 | mutex_unlock(&root->fs_info->reloc_mutex); |
407 | 408 | ||
408 | return 0; | 409 | return 0; |
@@ -1310,6 +1311,97 @@ int btrfs_defrag_root(struct btrfs_root *root) | |||
1310 | return ret; | 1311 | return ret; |
1311 | } | 1312 | } |
1312 | 1313 | ||
1314 | /* Bisesctability fixup, remove in 4.8 */ | ||
1315 | #ifndef btrfs_std_error | ||
1316 | #define btrfs_std_error btrfs_handle_fs_error | ||
1317 | #endif | ||
1318 | |||
1319 | /* | ||
1320 | * Do all special snapshot related qgroup dirty hack. | ||
1321 | * | ||
1322 | * Will do all needed qgroup inherit and dirty hack like switch commit | ||
1323 | * roots inside one transaction and write all btree into disk, to make | ||
1324 | * qgroup works. | ||
1325 | */ | ||
1326 | static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, | ||
1327 | struct btrfs_root *src, | ||
1328 | struct btrfs_root *parent, | ||
1329 | struct btrfs_qgroup_inherit *inherit, | ||
1330 | u64 dst_objectid) | ||
1331 | { | ||
1332 | struct btrfs_fs_info *fs_info = src->fs_info; | ||
1333 | int ret; | ||
1334 | |||
1335 | /* | ||
1336 | * Save some performance in the case that qgroups are not | ||
1337 | * enabled. If this check races with the ioctl, rescan will | ||
1338 | * kick in anyway. | ||
1339 | */ | ||
1340 | mutex_lock(&fs_info->qgroup_ioctl_lock); | ||
1341 | if (!fs_info->quota_enabled) { | ||
1342 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | ||
1343 | return 0; | ||
1344 | } | ||
1345 | mutex_unlock(&fs_info->qgroup_ioctl_lock); | ||
1346 | |||
1347 | /* | ||
1348 | * We are going to commit transaction, see btrfs_commit_transaction() | ||
1349 | * comment for reason locking tree_log_mutex | ||
1350 | */ | ||
1351 | mutex_lock(&fs_info->tree_log_mutex); | ||
1352 | |||
1353 | ret = commit_fs_roots(trans, src); | ||
1354 | if (ret) | ||
1355 | goto out; | ||
1356 | ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); | ||
1357 | if (ret < 0) | ||
1358 | goto out; | ||
1359 | ret = btrfs_qgroup_account_extents(trans, fs_info); | ||
1360 | if (ret < 0) | ||
1361 | goto out; | ||
1362 | |||
1363 | /* Now qgroup are all updated, we can inherit it to new qgroups */ | ||
1364 | ret = btrfs_qgroup_inherit(trans, fs_info, | ||
1365 | src->root_key.objectid, dst_objectid, | ||
1366 | inherit); | ||
1367 | if (ret < 0) | ||
1368 | goto out; | ||
1369 | |||
1370 | /* | ||
1371 | * Now we do a simplified commit transaction, which will: | ||
1372 | * 1) commit all subvolume and extent tree | ||
1373 | * To ensure all subvolume and extent tree have a valid | ||
1374 | * commit_root to accounting later insert_dir_item() | ||
1375 | * 2) write all btree blocks onto disk | ||
1376 | * This is to make sure later btree modification will be cowed | ||
1377 | * Or commit_root can be populated and cause wrong qgroup numbers | ||
1378 | * In this simplified commit, we don't really care about other trees | ||
1379 | * like chunk and root tree, as they won't affect qgroup. | ||
1380 | * And we don't write super to avoid half committed status. | ||
1381 | */ | ||
1382 | ret = commit_cowonly_roots(trans, src); | ||
1383 | if (ret) | ||
1384 | goto out; | ||
1385 | switch_commit_roots(trans->transaction, fs_info); | ||
1386 | ret = btrfs_write_and_wait_transaction(trans, src); | ||
1387 | if (ret) | ||
1388 | btrfs_std_error(fs_info, ret, | ||
1389 | "Error while writing out transaction for qgroup"); | ||
1390 | |||
1391 | out: | ||
1392 | mutex_unlock(&fs_info->tree_log_mutex); | ||
1393 | |||
1394 | /* | ||
1395 | * Force parent root to be updated, as we recorded it before so its | ||
1396 | * last_trans == cur_transid. | ||
1397 | * Or it won't be committed again onto disk after later | ||
1398 | * insert_dir_item() | ||
1399 | */ | ||
1400 | if (!ret) | ||
1401 | record_root_in_trans(trans, parent, 1); | ||
1402 | return ret; | ||
1403 | } | ||
1404 | |||
1313 | /* | 1405 | /* |
1314 | * new snapshots need to be created at a very specific time in the | 1406 | * new snapshots need to be created at a very specific time in the |
1315 | * transaction commit. This does the actual creation. | 1407 | * transaction commit. This does the actual creation. |
@@ -1383,7 +1475,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1383 | dentry = pending->dentry; | 1475 | dentry = pending->dentry; |
1384 | parent_inode = pending->dir; | 1476 | parent_inode = pending->dir; |
1385 | parent_root = BTRFS_I(parent_inode)->root; | 1477 | parent_root = BTRFS_I(parent_inode)->root; |
1386 | record_root_in_trans(trans, parent_root); | 1478 | record_root_in_trans(trans, parent_root, 0); |
1387 | 1479 | ||
1388 | cur_time = current_fs_time(parent_inode->i_sb); | 1480 | cur_time = current_fs_time(parent_inode->i_sb); |
1389 | 1481 | ||
@@ -1420,7 +1512,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1420 | goto fail; | 1512 | goto fail; |
1421 | } | 1513 | } |
1422 | 1514 | ||
1423 | record_root_in_trans(trans, root); | 1515 | record_root_in_trans(trans, root, 0); |
1424 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 1516 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
1425 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 1517 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
1426 | btrfs_check_and_init_root_item(new_root_item); | 1518 | btrfs_check_and_init_root_item(new_root_item); |
@@ -1516,6 +1608,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1516 | goto fail; | 1608 | goto fail; |
1517 | } | 1609 | } |
1518 | 1610 | ||
1611 | /* | ||
1612 | * Do special qgroup accounting for snapshot, as we do some qgroup | ||
1613 | * snapshot hack to do fast snapshot. | ||
1614 | * To co-operate with that hack, we do hack again. | ||
1615 | * Or snapshot will be greatly slowed down by a subtree qgroup rescan | ||
1616 | */ | ||
1617 | ret = qgroup_account_snapshot(trans, root, parent_root, | ||
1618 | pending->inherit, objectid); | ||
1619 | if (ret < 0) | ||
1620 | goto fail; | ||
1621 | |||
1519 | ret = btrfs_insert_dir_item(trans, parent_root, | 1622 | ret = btrfs_insert_dir_item(trans, parent_root, |
1520 | dentry->d_name.name, dentry->d_name.len, | 1623 | dentry->d_name.name, dentry->d_name.len, |
1521 | parent_inode, &key, | 1624 | parent_inode, &key, |
@@ -1559,23 +1662,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
1559 | goto fail; | 1662 | goto fail; |
1560 | } | 1663 | } |
1561 | 1664 | ||
1562 | /* | ||
1563 | * account qgroup counters before qgroup_inherit() | ||
1564 | */ | ||
1565 | ret = btrfs_qgroup_prepare_account_extents(trans, fs_info); | ||
1566 | if (ret) | ||
1567 | goto fail; | ||
1568 | ret = btrfs_qgroup_account_extents(trans, fs_info); | ||
1569 | if (ret) | ||
1570 | goto fail; | ||
1571 | ret = btrfs_qgroup_inherit(trans, fs_info, | ||
1572 | root->root_key.objectid, | ||
1573 | objectid, pending->inherit); | ||
1574 | if (ret) { | ||
1575 | btrfs_abort_transaction(trans, root, ret); | ||
1576 | goto fail; | ||
1577 | } | ||
1578 | |||
1579 | fail: | 1665 | fail: |
1580 | pending->error = ret; | 1666 | pending->error = ret; |
1581 | dir_item_existed: | 1667 | dir_item_existed: |
@@ -1821,7 +1907,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | |||
1821 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) | 1907 | static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) |
1822 | { | 1908 | { |
1823 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | 1909 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
1824 | btrfs_wait_ordered_roots(fs_info, -1); | 1910 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); |
1825 | } | 1911 | } |
1826 | 1912 | ||
1827 | static inline void | 1913 | static inline void |
@@ -2145,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
2145 | 2231 | ||
2146 | ret = btrfs_write_and_wait_transaction(trans, root); | 2232 | ret = btrfs_write_and_wait_transaction(trans, root); |
2147 | if (ret) { | 2233 | if (ret) { |
2148 | btrfs_std_error(root->fs_info, ret, | 2234 | btrfs_handle_fs_error(root->fs_info, ret, |
2149 | "Error while writing out transaction"); | 2235 | "Error while writing out transaction"); |
2150 | mutex_unlock(&root->fs_info->tree_log_mutex); | 2236 | mutex_unlock(&root->fs_info->tree_log_mutex); |
2151 | goto scrub_continue; | 2237 | goto scrub_continue; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e692eea87af6..8aaca5c6af94 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -4141,6 +4141,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
4141 | 4141 | ||
4142 | INIT_LIST_HEAD(&extents); | 4142 | INIT_LIST_HEAD(&extents); |
4143 | 4143 | ||
4144 | down_write(&BTRFS_I(inode)->dio_sem); | ||
4144 | write_lock(&tree->lock); | 4145 | write_lock(&tree->lock); |
4145 | test_gen = root->fs_info->last_trans_committed; | 4146 | test_gen = root->fs_info->last_trans_committed; |
4146 | 4147 | ||
@@ -4169,13 +4170,20 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
4169 | } | 4170 | } |
4170 | 4171 | ||
4171 | list_sort(NULL, &extents, extent_cmp); | 4172 | list_sort(NULL, &extents, extent_cmp); |
4173 | btrfs_get_logged_extents(inode, logged_list, start, end); | ||
4172 | /* | 4174 | /* |
4173 | * Collect any new ordered extents within the range. This is to | 4175 | * Some ordered extents started by fsync might have completed |
4174 | * prevent logging file extent items without waiting for the disk | 4176 | * before we could collect them into the list logged_list, which |
4175 | * location they point to being written. We do this only to deal | 4177 | * means they're gone, not in our logged_list nor in the inode's |
4176 | * with races against concurrent lockless direct IO writes. | 4178 | * ordered tree. We want the application/user space to know an |
4179 | * error happened while attempting to persist file data so that | ||
4180 | * it can take proper action. If such error happened, we leave | ||
4181 | * without writing to the log tree and the fsync must report the | ||
4182 | * file data write error and not commit the current transaction. | ||
4177 | */ | 4183 | */ |
4178 | btrfs_get_logged_extents(inode, logged_list, start, end); | 4184 | ret = btrfs_inode_check_errors(inode); |
4185 | if (ret) | ||
4186 | ctx->io_err = ret; | ||
4179 | process: | 4187 | process: |
4180 | while (!list_empty(&extents)) { | 4188 | while (!list_empty(&extents)) { |
4181 | em = list_entry(extents.next, struct extent_map, list); | 4189 | em = list_entry(extents.next, struct extent_map, list); |
@@ -4202,6 +4210,7 @@ process: | |||
4202 | } | 4210 | } |
4203 | WARN_ON(!list_empty(&extents)); | 4211 | WARN_ON(!list_empty(&extents)); |
4204 | write_unlock(&tree->lock); | 4212 | write_unlock(&tree->lock); |
4213 | up_write(&BTRFS_I(inode)->dio_sem); | ||
4205 | 4214 | ||
4206 | btrfs_release_path(path); | 4215 | btrfs_release_path(path); |
4207 | return ret; | 4216 | return ret; |
@@ -4623,23 +4632,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4623 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 4632 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
4624 | 4633 | ||
4625 | /* | 4634 | /* |
4626 | * Collect ordered extents only if we are logging data. This is to | ||
4627 | * ensure a subsequent request to log this inode in LOG_INODE_ALL mode | ||
4628 | * will process the ordered extents if they still exists at the time, | ||
4629 | * because when we collect them we test and set for the flag | ||
4630 | * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the | ||
4631 | * same ordered extents. The consequence for the LOG_INODE_ALL log mode | ||
4632 | * not processing the ordered extents is that we end up logging the | ||
4633 | * corresponding file extent items, based on the extent maps in the | ||
4634 | * inode's extent_map_tree's modified_list, without logging the | ||
4635 | * respective checksums (since the may still be only attached to the | ||
4636 | * ordered extents and have not been inserted in the csum tree by | ||
4637 | * btrfs_finish_ordered_io() yet). | ||
4638 | */ | ||
4639 | if (inode_only == LOG_INODE_ALL) | ||
4640 | btrfs_get_logged_extents(inode, &logged_list, start, end); | ||
4641 | |||
4642 | /* | ||
4643 | * a brute force approach to making sure we get the most uptodate | 4635 | * a brute force approach to making sure we get the most uptodate |
4644 | * copies of everything. | 4636 | * copies of everything. |
4645 | */ | 4637 | */ |
@@ -4846,21 +4838,6 @@ log_extents: | |||
4846 | goto out_unlock; | 4838 | goto out_unlock; |
4847 | } | 4839 | } |
4848 | if (fast_search) { | 4840 | if (fast_search) { |
4849 | /* | ||
4850 | * Some ordered extents started by fsync might have completed | ||
4851 | * before we collected the ordered extents in logged_list, which | ||
4852 | * means they're gone, not in our logged_list nor in the inode's | ||
4853 | * ordered tree. We want the application/user space to know an | ||
4854 | * error happened while attempting to persist file data so that | ||
4855 | * it can take proper action. If such error happened, we leave | ||
4856 | * without writing to the log tree and the fsync must report the | ||
4857 | * file data write error and not commit the current transaction. | ||
4858 | */ | ||
4859 | err = btrfs_inode_check_errors(inode); | ||
4860 | if (err) { | ||
4861 | ctx->io_err = err; | ||
4862 | goto out_unlock; | ||
4863 | } | ||
4864 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, | 4841 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4865 | &logged_list, ctx, start, end); | 4842 | &logged_list, ctx, start, end); |
4866 | if (ret) { | 4843 | if (ret) { |
@@ -5158,7 +5135,7 @@ process_leaf: | |||
5158 | } | 5135 | } |
5159 | 5136 | ||
5160 | ctx->log_new_dentries = false; | 5137 | ctx->log_new_dentries = false; |
5161 | if (type == BTRFS_FT_DIR) | 5138 | if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) |
5162 | log_mode = LOG_INODE_ALL; | 5139 | log_mode = LOG_INODE_ALL; |
5163 | btrfs_release_path(path); | 5140 | btrfs_release_path(path); |
5164 | ret = btrfs_log_inode(trans, root, di_inode, | 5141 | ret = btrfs_log_inode(trans, root, di_inode, |
@@ -5278,11 +5255,16 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, | |||
5278 | if (IS_ERR(dir_inode)) | 5255 | if (IS_ERR(dir_inode)) |
5279 | continue; | 5256 | continue; |
5280 | 5257 | ||
5258 | if (ctx) | ||
5259 | ctx->log_new_dentries = false; | ||
5281 | ret = btrfs_log_inode(trans, root, dir_inode, | 5260 | ret = btrfs_log_inode(trans, root, dir_inode, |
5282 | LOG_INODE_ALL, 0, LLONG_MAX, ctx); | 5261 | LOG_INODE_ALL, 0, LLONG_MAX, ctx); |
5283 | if (!ret && | 5262 | if (!ret && |
5284 | btrfs_must_commit_transaction(trans, dir_inode)) | 5263 | btrfs_must_commit_transaction(trans, dir_inode)) |
5285 | ret = 1; | 5264 | ret = 1; |
5265 | if (!ret && ctx && ctx->log_new_dentries) | ||
5266 | ret = log_new_dir_dentries(trans, root, | ||
5267 | dir_inode, ctx); | ||
5286 | iput(dir_inode); | 5268 | iput(dir_inode); |
5287 | if (ret) | 5269 | if (ret) |
5288 | goto out; | 5270 | goto out; |
@@ -5519,7 +5501,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
5519 | 5501 | ||
5520 | ret = walk_log_tree(trans, log_root_tree, &wc); | 5502 | ret = walk_log_tree(trans, log_root_tree, &wc); |
5521 | if (ret) { | 5503 | if (ret) { |
5522 | btrfs_std_error(fs_info, ret, "Failed to pin buffers while " | 5504 | btrfs_handle_fs_error(fs_info, ret, "Failed to pin buffers while " |
5523 | "recovering log root tree."); | 5505 | "recovering log root tree."); |
5524 | goto error; | 5506 | goto error; |
5525 | } | 5507 | } |
@@ -5533,7 +5515,7 @@ again: | |||
5533 | ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); | 5515 | ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); |
5534 | 5516 | ||
5535 | if (ret < 0) { | 5517 | if (ret < 0) { |
5536 | btrfs_std_error(fs_info, ret, | 5518 | btrfs_handle_fs_error(fs_info, ret, |
5537 | "Couldn't find tree log root."); | 5519 | "Couldn't find tree log root."); |
5538 | goto error; | 5520 | goto error; |
5539 | } | 5521 | } |
@@ -5551,7 +5533,7 @@ again: | |||
5551 | log = btrfs_read_fs_root(log_root_tree, &found_key); | 5533 | log = btrfs_read_fs_root(log_root_tree, &found_key); |
5552 | if (IS_ERR(log)) { | 5534 | if (IS_ERR(log)) { |
5553 | ret = PTR_ERR(log); | 5535 | ret = PTR_ERR(log); |
5554 | btrfs_std_error(fs_info, ret, | 5536 | btrfs_handle_fs_error(fs_info, ret, |
5555 | "Couldn't read tree log root."); | 5537 | "Couldn't read tree log root."); |
5556 | goto error; | 5538 | goto error; |
5557 | } | 5539 | } |
@@ -5566,7 +5548,7 @@ again: | |||
5566 | free_extent_buffer(log->node); | 5548 | free_extent_buffer(log->node); |
5567 | free_extent_buffer(log->commit_root); | 5549 | free_extent_buffer(log->commit_root); |
5568 | kfree(log); | 5550 | kfree(log); |
5569 | btrfs_std_error(fs_info, ret, "Couldn't read target root " | 5551 | btrfs_handle_fs_error(fs_info, ret, "Couldn't read target root " |
5570 | "for tree log recovery."); | 5552 | "for tree log recovery."); |
5571 | goto error; | 5553 | goto error; |
5572 | } | 5554 | } |
@@ -5652,11 +5634,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | |||
5652 | * into the file. When the file is logged we check it and | 5634 | * into the file. When the file is logged we check it and |
5653 | * don't log the parents if the file is fully on disk. | 5635 | * don't log the parents if the file is fully on disk. |
5654 | */ | 5636 | */ |
5655 | if (S_ISREG(inode->i_mode)) { | 5637 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
5656 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 5638 | BTRFS_I(inode)->last_unlink_trans = trans->transid; |
5657 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | 5639 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
5658 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | ||
5659 | } | ||
5660 | 5640 | ||
5661 | /* | 5641 | /* |
5662 | * if this directory was already logged any new | 5642 | * if this directory was already logged any new |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bfb80da3e6eb..2b88127bba5b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -118,6 +118,21 @@ const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = { | |||
118 | [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6, | 118 | [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6, |
119 | }; | 119 | }; |
120 | 120 | ||
121 | /* | ||
122 | * Table to convert BTRFS_RAID_* to the error code if minimum number of devices | ||
123 | * condition is not met. Zero means there's no corresponding | ||
124 | * BTRFS_ERROR_DEV_*_NOT_MET value. | ||
125 | */ | ||
126 | const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = { | ||
127 | [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, | ||
128 | [BTRFS_RAID_RAID1] = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET, | ||
129 | [BTRFS_RAID_DUP] = 0, | ||
130 | [BTRFS_RAID_RAID0] = 0, | ||
131 | [BTRFS_RAID_SINGLE] = 0, | ||
132 | [BTRFS_RAID_RAID5] = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, | ||
133 | [BTRFS_RAID_RAID6] = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, | ||
134 | }; | ||
135 | |||
121 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 136 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
122 | struct btrfs_root *root, | 137 | struct btrfs_root *root, |
123 | struct btrfs_device *device); | 138 | struct btrfs_device *device); |
@@ -699,7 +714,8 @@ static noinline int device_list_add(const char *path, | |||
699 | * if there is new btrfs on an already registered device, | 714 | * if there is new btrfs on an already registered device, |
700 | * then remove the stale device entry. | 715 | * then remove the stale device entry. |
701 | */ | 716 | */ |
702 | btrfs_free_stale_device(device); | 717 | if (ret > 0) |
718 | btrfs_free_stale_device(device); | ||
703 | 719 | ||
704 | *fs_devices_ret = fs_devices; | 720 | *fs_devices_ret = fs_devices; |
705 | 721 | ||
@@ -988,6 +1004,56 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
988 | return ret; | 1004 | return ret; |
989 | } | 1005 | } |
990 | 1006 | ||
1007 | void btrfs_release_disk_super(struct page *page) | ||
1008 | { | ||
1009 | kunmap(page); | ||
1010 | put_page(page); | ||
1011 | } | ||
1012 | |||
1013 | int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, | ||
1014 | struct page **page, struct btrfs_super_block **disk_super) | ||
1015 | { | ||
1016 | void *p; | ||
1017 | pgoff_t index; | ||
1018 | |||
1019 | /* make sure our super fits in the device */ | ||
1020 | if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode)) | ||
1021 | return 1; | ||
1022 | |||
1023 | /* make sure our super fits in the page */ | ||
1024 | if (sizeof(**disk_super) > PAGE_SIZE) | ||
1025 | return 1; | ||
1026 | |||
1027 | /* make sure our super doesn't straddle pages on disk */ | ||
1028 | index = bytenr >> PAGE_SHIFT; | ||
1029 | if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index) | ||
1030 | return 1; | ||
1031 | |||
1032 | /* pull in the page with our super */ | ||
1033 | *page = read_cache_page_gfp(bdev->bd_inode->i_mapping, | ||
1034 | index, GFP_KERNEL); | ||
1035 | |||
1036 | if (IS_ERR_OR_NULL(*page)) | ||
1037 | return 1; | ||
1038 | |||
1039 | p = kmap(*page); | ||
1040 | |||
1041 | /* align our pointer to the offset of the super block */ | ||
1042 | *disk_super = p + (bytenr & ~PAGE_MASK); | ||
1043 | |||
1044 | if (btrfs_super_bytenr(*disk_super) != bytenr || | ||
1045 | btrfs_super_magic(*disk_super) != BTRFS_MAGIC) { | ||
1046 | btrfs_release_disk_super(*page); | ||
1047 | return 1; | ||
1048 | } | ||
1049 | |||
1050 | if ((*disk_super)->label[0] && | ||
1051 | (*disk_super)->label[BTRFS_LABEL_SIZE - 1]) | ||
1052 | (*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
1053 | |||
1054 | return 0; | ||
1055 | } | ||
1056 | |||
991 | /* | 1057 | /* |
992 | * Look for a btrfs signature on a device. This may be called out of the mount path | 1058 | * Look for a btrfs signature on a device. This may be called out of the mount path |
993 | * and we are not allowed to call set_blocksize during the scan. The superblock | 1059 | * and we are not allowed to call set_blocksize during the scan. The superblock |
@@ -999,13 +1065,11 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
999 | struct btrfs_super_block *disk_super; | 1065 | struct btrfs_super_block *disk_super; |
1000 | struct block_device *bdev; | 1066 | struct block_device *bdev; |
1001 | struct page *page; | 1067 | struct page *page; |
1002 | void *p; | ||
1003 | int ret = -EINVAL; | 1068 | int ret = -EINVAL; |
1004 | u64 devid; | 1069 | u64 devid; |
1005 | u64 transid; | 1070 | u64 transid; |
1006 | u64 total_devices; | 1071 | u64 total_devices; |
1007 | u64 bytenr; | 1072 | u64 bytenr; |
1008 | pgoff_t index; | ||
1009 | 1073 | ||
1010 | /* | 1074 | /* |
1011 | * we would like to check all the supers, but that would make | 1075 | * we would like to check all the supers, but that would make |
@@ -1018,41 +1082,14 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
1018 | mutex_lock(&uuid_mutex); | 1082 | mutex_lock(&uuid_mutex); |
1019 | 1083 | ||
1020 | bdev = blkdev_get_by_path(path, flags, holder); | 1084 | bdev = blkdev_get_by_path(path, flags, holder); |
1021 | |||
1022 | if (IS_ERR(bdev)) { | 1085 | if (IS_ERR(bdev)) { |
1023 | ret = PTR_ERR(bdev); | 1086 | ret = PTR_ERR(bdev); |
1024 | goto error; | 1087 | goto error; |
1025 | } | 1088 | } |
1026 | 1089 | ||
1027 | /* make sure our super fits in the device */ | 1090 | if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) |
1028 | if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode)) | ||
1029 | goto error_bdev_put; | ||
1030 | |||
1031 | /* make sure our super fits in the page */ | ||
1032 | if (sizeof(*disk_super) > PAGE_SIZE) | ||
1033 | goto error_bdev_put; | ||
1034 | |||
1035 | /* make sure our super doesn't straddle pages on disk */ | ||
1036 | index = bytenr >> PAGE_SHIFT; | ||
1037 | if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index) | ||
1038 | goto error_bdev_put; | ||
1039 | |||
1040 | /* pull in the page with our super */ | ||
1041 | page = read_cache_page_gfp(bdev->bd_inode->i_mapping, | ||
1042 | index, GFP_NOFS); | ||
1043 | |||
1044 | if (IS_ERR_OR_NULL(page)) | ||
1045 | goto error_bdev_put; | 1091 | goto error_bdev_put; |
1046 | 1092 | ||
1047 | p = kmap(page); | ||
1048 | |||
1049 | /* align our pointer to the offset of the super block */ | ||
1050 | disk_super = p + (bytenr & ~PAGE_MASK); | ||
1051 | |||
1052 | if (btrfs_super_bytenr(disk_super) != bytenr || | ||
1053 | btrfs_super_magic(disk_super) != BTRFS_MAGIC) | ||
1054 | goto error_unmap; | ||
1055 | |||
1056 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 1093 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
1057 | transid = btrfs_super_generation(disk_super); | 1094 | transid = btrfs_super_generation(disk_super); |
1058 | total_devices = btrfs_super_num_devices(disk_super); | 1095 | total_devices = btrfs_super_num_devices(disk_super); |
@@ -1060,8 +1097,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
1060 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 1097 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
1061 | if (ret > 0) { | 1098 | if (ret > 0) { |
1062 | if (disk_super->label[0]) { | 1099 | if (disk_super->label[0]) { |
1063 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | ||
1064 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
1065 | printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); | 1100 | printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); |
1066 | } else { | 1101 | } else { |
1067 | printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); | 1102 | printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); |
@@ -1073,9 +1108,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
1073 | if (!ret && fs_devices_ret) | 1108 | if (!ret && fs_devices_ret) |
1074 | (*fs_devices_ret)->total_devices = total_devices; | 1109 | (*fs_devices_ret)->total_devices = total_devices; |
1075 | 1110 | ||
1076 | error_unmap: | 1111 | btrfs_release_disk_super(page); |
1077 | kunmap(page); | ||
1078 | put_page(page); | ||
1079 | 1112 | ||
1080 | error_bdev_put: | 1113 | error_bdev_put: |
1081 | blkdev_put(bdev, flags); | 1114 | blkdev_put(bdev, flags); |
@@ -1454,7 +1487,7 @@ again: | |||
1454 | extent = btrfs_item_ptr(leaf, path->slots[0], | 1487 | extent = btrfs_item_ptr(leaf, path->slots[0], |
1455 | struct btrfs_dev_extent); | 1488 | struct btrfs_dev_extent); |
1456 | } else { | 1489 | } else { |
1457 | btrfs_std_error(root->fs_info, ret, "Slot search failed"); | 1490 | btrfs_handle_fs_error(root->fs_info, ret, "Slot search failed"); |
1458 | goto out; | 1491 | goto out; |
1459 | } | 1492 | } |
1460 | 1493 | ||
@@ -1462,7 +1495,7 @@ again: | |||
1462 | 1495 | ||
1463 | ret = btrfs_del_item(trans, root, path); | 1496 | ret = btrfs_del_item(trans, root, path); |
1464 | if (ret) { | 1497 | if (ret) { |
1465 | btrfs_std_error(root->fs_info, ret, | 1498 | btrfs_handle_fs_error(root->fs_info, ret, |
1466 | "Failed to remove dev extent item"); | 1499 | "Failed to remove dev extent item"); |
1467 | } else { | 1500 | } else { |
1468 | set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); | 1501 | set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); |
@@ -1688,32 +1721,92 @@ out: | |||
1688 | return ret; | 1721 | return ret; |
1689 | } | 1722 | } |
1690 | 1723 | ||
1691 | int btrfs_rm_device(struct btrfs_root *root, char *device_path) | 1724 | /* |
1725 | * Verify that @num_devices satisfies the RAID profile constraints in the whole | ||
1726 | * filesystem. It's up to the caller to adjust that number regarding eg. device | ||
1727 | * replace. | ||
1728 | */ | ||
1729 | static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info, | ||
1730 | u64 num_devices) | ||
1731 | { | ||
1732 | u64 all_avail; | ||
1733 | unsigned seq; | ||
1734 | int i; | ||
1735 | |||
1736 | do { | ||
1737 | seq = read_seqbegin(&fs_info->profiles_lock); | ||
1738 | |||
1739 | all_avail = fs_info->avail_data_alloc_bits | | ||
1740 | fs_info->avail_system_alloc_bits | | ||
1741 | fs_info->avail_metadata_alloc_bits; | ||
1742 | } while (read_seqretry(&fs_info->profiles_lock, seq)); | ||
1743 | |||
1744 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | ||
1745 | if (!(all_avail & btrfs_raid_group[i])) | ||
1746 | continue; | ||
1747 | |||
1748 | if (num_devices < btrfs_raid_array[i].devs_min) { | ||
1749 | int ret = btrfs_raid_mindev_error[i]; | ||
1750 | |||
1751 | if (ret) | ||
1752 | return ret; | ||
1753 | } | ||
1754 | } | ||
1755 | |||
1756 | return 0; | ||
1757 | } | ||
1758 | |||
1759 | struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs, | ||
1760 | struct btrfs_device *device) | ||
1692 | { | 1761 | { |
1693 | struct btrfs_device *device; | ||
1694 | struct btrfs_device *next_device; | 1762 | struct btrfs_device *next_device; |
1695 | struct block_device *bdev; | 1763 | |
1696 | struct buffer_head *bh = NULL; | 1764 | list_for_each_entry(next_device, &fs_devs->devices, dev_list) { |
1697 | struct btrfs_super_block *disk_super; | 1765 | if (next_device != device && |
1766 | !next_device->missing && next_device->bdev) | ||
1767 | return next_device; | ||
1768 | } | ||
1769 | |||
1770 | return NULL; | ||
1771 | } | ||
1772 | |||
1773 | /* | ||
1774 | * Helper function to check if the given device is part of s_bdev / latest_bdev | ||
1775 | * and replace it with the provided or the next active device, in the context | ||
1776 | * where this function called, there should be always be another device (or | ||
1777 | * this_dev) which is active. | ||
1778 | */ | ||
1779 | void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, | ||
1780 | struct btrfs_device *device, struct btrfs_device *this_dev) | ||
1781 | { | ||
1782 | struct btrfs_device *next_device; | ||
1783 | |||
1784 | if (this_dev) | ||
1785 | next_device = this_dev; | ||
1786 | else | ||
1787 | next_device = btrfs_find_next_active_device(fs_info->fs_devices, | ||
1788 | device); | ||
1789 | ASSERT(next_device); | ||
1790 | |||
1791 | if (fs_info->sb->s_bdev && | ||
1792 | (fs_info->sb->s_bdev == device->bdev)) | ||
1793 | fs_info->sb->s_bdev = next_device->bdev; | ||
1794 | |||
1795 | if (fs_info->fs_devices->latest_bdev == device->bdev) | ||
1796 | fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
1797 | } | ||
1798 | |||
1799 | int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) | ||
1800 | { | ||
1801 | struct btrfs_device *device; | ||
1698 | struct btrfs_fs_devices *cur_devices; | 1802 | struct btrfs_fs_devices *cur_devices; |
1699 | u64 all_avail; | ||
1700 | u64 devid; | ||
1701 | u64 num_devices; | 1803 | u64 num_devices; |
1702 | u8 *dev_uuid; | ||
1703 | unsigned seq; | ||
1704 | int ret = 0; | 1804 | int ret = 0; |
1705 | bool clear_super = false; | 1805 | bool clear_super = false; |
1806 | char *dev_name = NULL; | ||
1706 | 1807 | ||
1707 | mutex_lock(&uuid_mutex); | 1808 | mutex_lock(&uuid_mutex); |
1708 | 1809 | ||
1709 | do { | ||
1710 | seq = read_seqbegin(&root->fs_info->profiles_lock); | ||
1711 | |||
1712 | all_avail = root->fs_info->avail_data_alloc_bits | | ||
1713 | root->fs_info->avail_system_alloc_bits | | ||
1714 | root->fs_info->avail_metadata_alloc_bits; | ||
1715 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); | ||
1716 | |||
1717 | num_devices = root->fs_info->fs_devices->num_devices; | 1810 | num_devices = root->fs_info->fs_devices->num_devices; |
1718 | btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0); | 1811 | btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0); |
1719 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { | 1812 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { |
@@ -1722,78 +1815,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1722 | } | 1815 | } |
1723 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0); | 1816 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0); |
1724 | 1817 | ||
1725 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | 1818 | ret = btrfs_check_raid_min_devices(root->fs_info, num_devices - 1); |
1726 | ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; | 1819 | if (ret) |
1727 | goto out; | ||
1728 | } | ||
1729 | |||
1730 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { | ||
1731 | ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET; | ||
1732 | goto out; | 1820 | goto out; |
1733 | } | ||
1734 | 1821 | ||
1735 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) && | 1822 | ret = btrfs_find_device_by_devspec(root, devid, device_path, |
1736 | root->fs_info->fs_devices->rw_devices <= 2) { | 1823 | &device); |
1737 | ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET; | 1824 | if (ret) |
1738 | goto out; | ||
1739 | } | ||
1740 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) && | ||
1741 | root->fs_info->fs_devices->rw_devices <= 3) { | ||
1742 | ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET; | ||
1743 | goto out; | 1825 | goto out; |
1744 | } | ||
1745 | |||
1746 | if (strcmp(device_path, "missing") == 0) { | ||
1747 | struct list_head *devices; | ||
1748 | struct btrfs_device *tmp; | ||
1749 | |||
1750 | device = NULL; | ||
1751 | devices = &root->fs_info->fs_devices->devices; | ||
1752 | /* | ||
1753 | * It is safe to read the devices since the volume_mutex | ||
1754 | * is held. | ||
1755 | */ | ||
1756 | list_for_each_entry(tmp, devices, dev_list) { | ||
1757 | if (tmp->in_fs_metadata && | ||
1758 | !tmp->is_tgtdev_for_dev_replace && | ||
1759 | !tmp->bdev) { | ||
1760 | device = tmp; | ||
1761 | break; | ||
1762 | } | ||
1763 | } | ||
1764 | bdev = NULL; | ||
1765 | bh = NULL; | ||
1766 | disk_super = NULL; | ||
1767 | if (!device) { | ||
1768 | ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; | ||
1769 | goto out; | ||
1770 | } | ||
1771 | } else { | ||
1772 | ret = btrfs_get_bdev_and_sb(device_path, | ||
1773 | FMODE_WRITE | FMODE_EXCL, | ||
1774 | root->fs_info->bdev_holder, 0, | ||
1775 | &bdev, &bh); | ||
1776 | if (ret) | ||
1777 | goto out; | ||
1778 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
1779 | devid = btrfs_stack_device_id(&disk_super->dev_item); | ||
1780 | dev_uuid = disk_super->dev_item.uuid; | ||
1781 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, | ||
1782 | disk_super->fsid); | ||
1783 | if (!device) { | ||
1784 | ret = -ENOENT; | ||
1785 | goto error_brelse; | ||
1786 | } | ||
1787 | } | ||
1788 | 1826 | ||
1789 | if (device->is_tgtdev_for_dev_replace) { | 1827 | if (device->is_tgtdev_for_dev_replace) { |
1790 | ret = BTRFS_ERROR_DEV_TGT_REPLACE; | 1828 | ret = BTRFS_ERROR_DEV_TGT_REPLACE; |
1791 | goto error_brelse; | 1829 | goto out; |
1792 | } | 1830 | } |
1793 | 1831 | ||
1794 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { | 1832 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { |
1795 | ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; | 1833 | ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; |
1796 | goto error_brelse; | 1834 | goto out; |
1797 | } | 1835 | } |
1798 | 1836 | ||
1799 | if (device->writeable) { | 1837 | if (device->writeable) { |
@@ -1801,6 +1839,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1801 | list_del_init(&device->dev_alloc_list); | 1839 | list_del_init(&device->dev_alloc_list); |
1802 | device->fs_devices->rw_devices--; | 1840 | device->fs_devices->rw_devices--; |
1803 | unlock_chunks(root); | 1841 | unlock_chunks(root); |
1842 | dev_name = kstrdup(device->name->str, GFP_KERNEL); | ||
1843 | if (!dev_name) { | ||
1844 | ret = -ENOMEM; | ||
1845 | goto error_undo; | ||
1846 | } | ||
1804 | clear_super = true; | 1847 | clear_super = true; |
1805 | } | 1848 | } |
1806 | 1849 | ||
@@ -1842,12 +1885,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1842 | if (device->missing) | 1885 | if (device->missing) |
1843 | device->fs_devices->missing_devices--; | 1886 | device->fs_devices->missing_devices--; |
1844 | 1887 | ||
1845 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1888 | btrfs_assign_next_active_device(root->fs_info, device, NULL); |
1846 | struct btrfs_device, dev_list); | ||
1847 | if (device->bdev == root->fs_info->sb->s_bdev) | ||
1848 | root->fs_info->sb->s_bdev = next_device->bdev; | ||
1849 | if (device->bdev == root->fs_info->fs_devices->latest_bdev) | ||
1850 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
1851 | 1889 | ||
1852 | if (device->bdev) { | 1890 | if (device->bdev) { |
1853 | device->fs_devices->open_devices--; | 1891 | device->fs_devices->open_devices--; |
@@ -1883,63 +1921,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1883 | * at this point, the device is zero sized. We want to | 1921 | * at this point, the device is zero sized. We want to |
1884 | * remove it from the devices list and zero out the old super | 1922 | * remove it from the devices list and zero out the old super |
1885 | */ | 1923 | */ |
1886 | if (clear_super && disk_super) { | 1924 | if (clear_super) { |
1887 | u64 bytenr; | 1925 | struct block_device *bdev; |
1888 | int i; | 1926 | |
1889 | 1927 | bdev = blkdev_get_by_path(dev_name, FMODE_READ | FMODE_EXCL, | |
1890 | /* make sure this device isn't detected as part of | 1928 | root->fs_info->bdev_holder); |
1891 | * the FS anymore | 1929 | if (!IS_ERR(bdev)) { |
1892 | */ | 1930 | btrfs_scratch_superblocks(bdev, dev_name); |
1893 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | 1931 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
1894 | set_buffer_dirty(bh); | ||
1895 | sync_dirty_buffer(bh); | ||
1896 | |||
1897 | /* clear the mirror copies of super block on the disk | ||
1898 | * being removed, 0th copy is been taken care above and | ||
1899 | * the below would take of the rest | ||
1900 | */ | ||
1901 | for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
1902 | bytenr = btrfs_sb_offset(i); | ||
1903 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
1904 | i_size_read(bdev->bd_inode)) | ||
1905 | break; | ||
1906 | |||
1907 | brelse(bh); | ||
1908 | bh = __bread(bdev, bytenr / 4096, | ||
1909 | BTRFS_SUPER_INFO_SIZE); | ||
1910 | if (!bh) | ||
1911 | continue; | ||
1912 | |||
1913 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
1914 | |||
1915 | if (btrfs_super_bytenr(disk_super) != bytenr || | ||
1916 | btrfs_super_magic(disk_super) != BTRFS_MAGIC) { | ||
1917 | continue; | ||
1918 | } | ||
1919 | memset(&disk_super->magic, 0, | ||
1920 | sizeof(disk_super->magic)); | ||
1921 | set_buffer_dirty(bh); | ||
1922 | sync_dirty_buffer(bh); | ||
1923 | } | 1932 | } |
1924 | } | 1933 | } |
1925 | 1934 | ||
1926 | ret = 0; | ||
1927 | |||
1928 | if (bdev) { | ||
1929 | /* Notify udev that device has changed */ | ||
1930 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); | ||
1931 | |||
1932 | /* Update ctime/mtime for device path for libblkid */ | ||
1933 | update_dev_time(device_path); | ||
1934 | } | ||
1935 | |||
1936 | error_brelse: | ||
1937 | brelse(bh); | ||
1938 | if (bdev) | ||
1939 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); | ||
1940 | out: | 1935 | out: |
1936 | kfree(dev_name); | ||
1937 | |||
1941 | mutex_unlock(&uuid_mutex); | 1938 | mutex_unlock(&uuid_mutex); |
1942 | return ret; | 1939 | return ret; |
1940 | |||
1943 | error_undo: | 1941 | error_undo: |
1944 | if (device->writeable) { | 1942 | if (device->writeable) { |
1945 | lock_chunks(root); | 1943 | lock_chunks(root); |
@@ -1948,7 +1946,7 @@ error_undo: | |||
1948 | device->fs_devices->rw_devices++; | 1946 | device->fs_devices->rw_devices++; |
1949 | unlock_chunks(root); | 1947 | unlock_chunks(root); |
1950 | } | 1948 | } |
1951 | goto error_brelse; | 1949 | goto out; |
1952 | } | 1950 | } |
1953 | 1951 | ||
1954 | void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, | 1952 | void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, |
@@ -1972,11 +1970,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, | |||
1972 | if (srcdev->missing) | 1970 | if (srcdev->missing) |
1973 | fs_devices->missing_devices--; | 1971 | fs_devices->missing_devices--; |
1974 | 1972 | ||
1975 | if (srcdev->writeable) { | 1973 | if (srcdev->writeable) |
1976 | fs_devices->rw_devices--; | 1974 | fs_devices->rw_devices--; |
1977 | /* zero out the old super if it is writable */ | ||
1978 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); | ||
1979 | } | ||
1980 | 1975 | ||
1981 | if (srcdev->bdev) | 1976 | if (srcdev->bdev) |
1982 | fs_devices->open_devices--; | 1977 | fs_devices->open_devices--; |
@@ -1987,6 +1982,10 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | |||
1987 | { | 1982 | { |
1988 | struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; | 1983 | struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; |
1989 | 1984 | ||
1985 | if (srcdev->writeable) { | ||
1986 | /* zero out the old super if it is writable */ | ||
1987 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); | ||
1988 | } | ||
1990 | call_rcu(&srcdev->rcu, free_device); | 1989 | call_rcu(&srcdev->rcu, free_device); |
1991 | 1990 | ||
1992 | /* | 1991 | /* |
@@ -2016,32 +2015,33 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | |||
2016 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | 2015 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, |
2017 | struct btrfs_device *tgtdev) | 2016 | struct btrfs_device *tgtdev) |
2018 | { | 2017 | { |
2019 | struct btrfs_device *next_device; | ||
2020 | |||
2021 | mutex_lock(&uuid_mutex); | 2018 | mutex_lock(&uuid_mutex); |
2022 | WARN_ON(!tgtdev); | 2019 | WARN_ON(!tgtdev); |
2023 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | 2020 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
2024 | 2021 | ||
2025 | btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); | 2022 | btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); |
2026 | 2023 | ||
2027 | if (tgtdev->bdev) { | 2024 | if (tgtdev->bdev) |
2028 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); | ||
2029 | fs_info->fs_devices->open_devices--; | 2025 | fs_info->fs_devices->open_devices--; |
2030 | } | 2026 | |
2031 | fs_info->fs_devices->num_devices--; | 2027 | fs_info->fs_devices->num_devices--; |
2032 | 2028 | ||
2033 | next_device = list_entry(fs_info->fs_devices->devices.next, | 2029 | btrfs_assign_next_active_device(fs_info, tgtdev, NULL); |
2034 | struct btrfs_device, dev_list); | ||
2035 | if (tgtdev->bdev == fs_info->sb->s_bdev) | ||
2036 | fs_info->sb->s_bdev = next_device->bdev; | ||
2037 | if (tgtdev->bdev == fs_info->fs_devices->latest_bdev) | ||
2038 | fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
2039 | list_del_rcu(&tgtdev->dev_list); | ||
2040 | 2030 | ||
2041 | call_rcu(&tgtdev->rcu, free_device); | 2031 | list_del_rcu(&tgtdev->dev_list); |
2042 | 2032 | ||
2043 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 2033 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
2044 | mutex_unlock(&uuid_mutex); | 2034 | mutex_unlock(&uuid_mutex); |
2035 | |||
2036 | /* | ||
2037 | * The update_dev_time() with in btrfs_scratch_superblocks() | ||
2038 | * may lead to a call to btrfs_show_devname() which will try | ||
2039 | * to hold device_list_mutex. And here this device | ||
2040 | * is already out of device list, so we don't have to hold | ||
2041 | * the device_list_mutex lock. | ||
2042 | */ | ||
2043 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); | ||
2044 | call_rcu(&tgtdev->rcu, free_device); | ||
2045 | } | 2045 | } |
2046 | 2046 | ||
2047 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | 2047 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, |
@@ -2102,6 +2102,31 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | |||
2102 | } | 2102 | } |
2103 | 2103 | ||
2104 | /* | 2104 | /* |
2105 | * Lookup a device given by device id, or the path if the id is 0. | ||
2106 | */ | ||
2107 | int btrfs_find_device_by_devspec(struct btrfs_root *root, u64 devid, | ||
2108 | char *devpath, | ||
2109 | struct btrfs_device **device) | ||
2110 | { | ||
2111 | int ret; | ||
2112 | |||
2113 | if (devid) { | ||
2114 | ret = 0; | ||
2115 | *device = btrfs_find_device(root->fs_info, devid, NULL, | ||
2116 | NULL); | ||
2117 | if (!*device) | ||
2118 | ret = -ENOENT; | ||
2119 | } else { | ||
2120 | if (!devpath || !devpath[0]) | ||
2121 | return -EINVAL; | ||
2122 | |||
2123 | ret = btrfs_find_device_missing_or_by_path(root, devpath, | ||
2124 | device); | ||
2125 | } | ||
2126 | return ret; | ||
2127 | } | ||
2128 | |||
2129 | /* | ||
2105 | * does all the dirty work required for changing file system's UUID. | 2130 | * does all the dirty work required for changing file system's UUID. |
2106 | */ | 2131 | */ |
2107 | static int btrfs_prepare_sprout(struct btrfs_root *root) | 2132 | static int btrfs_prepare_sprout(struct btrfs_root *root) |
@@ -2418,7 +2443,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2418 | 2443 | ||
2419 | ret = btrfs_relocate_sys_chunks(root); | 2444 | ret = btrfs_relocate_sys_chunks(root); |
2420 | if (ret < 0) | 2445 | if (ret < 0) |
2421 | btrfs_std_error(root->fs_info, ret, | 2446 | btrfs_handle_fs_error(root->fs_info, ret, |
2422 | "Failed to relocate sys chunks after " | 2447 | "Failed to relocate sys chunks after " |
2423 | "device initialization. This can be fixed " | 2448 | "device initialization. This can be fixed " |
2424 | "using the \"btrfs balance\" command."); | 2449 | "using the \"btrfs balance\" command."); |
@@ -2663,7 +2688,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | |||
2663 | if (ret < 0) | 2688 | if (ret < 0) |
2664 | goto out; | 2689 | goto out; |
2665 | else if (ret > 0) { /* Logic error or corruption */ | 2690 | else if (ret > 0) { /* Logic error or corruption */ |
2666 | btrfs_std_error(root->fs_info, -ENOENT, | 2691 | btrfs_handle_fs_error(root->fs_info, -ENOENT, |
2667 | "Failed lookup while freeing chunk."); | 2692 | "Failed lookup while freeing chunk."); |
2668 | ret = -ENOENT; | 2693 | ret = -ENOENT; |
2669 | goto out; | 2694 | goto out; |
@@ -2671,7 +2696,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | |||
2671 | 2696 | ||
2672 | ret = btrfs_del_item(trans, root, path); | 2697 | ret = btrfs_del_item(trans, root, path); |
2673 | if (ret < 0) | 2698 | if (ret < 0) |
2674 | btrfs_std_error(root->fs_info, ret, | 2699 | btrfs_handle_fs_error(root->fs_info, ret, |
2675 | "Failed to delete chunk item."); | 2700 | "Failed to delete chunk item."); |
2676 | out: | 2701 | out: |
2677 | btrfs_free_path(path); | 2702 | btrfs_free_path(path); |
@@ -2857,7 +2882,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) | |||
2857 | chunk_offset); | 2882 | chunk_offset); |
2858 | if (IS_ERR(trans)) { | 2883 | if (IS_ERR(trans)) { |
2859 | ret = PTR_ERR(trans); | 2884 | ret = PTR_ERR(trans); |
2860 | btrfs_std_error(root->fs_info, ret, NULL); | 2885 | btrfs_handle_fs_error(root->fs_info, ret, NULL); |
2861 | return ret; | 2886 | return ret; |
2862 | } | 2887 | } |
2863 | 2888 | ||
@@ -3402,6 +3427,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
3402 | u32 count_meta = 0; | 3427 | u32 count_meta = 0; |
3403 | u32 count_sys = 0; | 3428 | u32 count_sys = 0; |
3404 | int chunk_reserved = 0; | 3429 | int chunk_reserved = 0; |
3430 | u64 bytes_used = 0; | ||
3405 | 3431 | ||
3406 | /* step one make some room on all the devices */ | 3432 | /* step one make some room on all the devices */ |
3407 | devices = &fs_info->fs_devices->devices; | 3433 | devices = &fs_info->fs_devices->devices; |
@@ -3540,7 +3566,13 @@ again: | |||
3540 | goto loop; | 3566 | goto loop; |
3541 | } | 3567 | } |
3542 | 3568 | ||
3543 | if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) { | 3569 | ASSERT(fs_info->data_sinfo); |
3570 | spin_lock(&fs_info->data_sinfo->lock); | ||
3571 | bytes_used = fs_info->data_sinfo->bytes_used; | ||
3572 | spin_unlock(&fs_info->data_sinfo->lock); | ||
3573 | |||
3574 | if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && | ||
3575 | !chunk_reserved && !bytes_used) { | ||
3544 | trans = btrfs_start_transaction(chunk_root, 0); | 3576 | trans = btrfs_start_transaction(chunk_root, 0); |
3545 | if (IS_ERR(trans)) { | 3577 | if (IS_ERR(trans)) { |
3546 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | 3578 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); |
@@ -3632,7 +3664,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) | |||
3632 | unset_balance_control(fs_info); | 3664 | unset_balance_control(fs_info); |
3633 | ret = del_balance_item(fs_info->tree_root); | 3665 | ret = del_balance_item(fs_info->tree_root); |
3634 | if (ret) | 3666 | if (ret) |
3635 | btrfs_std_error(fs_info, ret, NULL); | 3667 | btrfs_handle_fs_error(fs_info, ret, NULL); |
3636 | 3668 | ||
3637 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | 3669 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); |
3638 | } | 3670 | } |
@@ -3693,10 +3725,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
3693 | num_devices--; | 3725 | num_devices--; |
3694 | } | 3726 | } |
3695 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); | 3727 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
3696 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | 3728 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP; |
3697 | if (num_devices == 1) | 3729 | if (num_devices > 1) |
3698 | allowed |= BTRFS_BLOCK_GROUP_DUP; | ||
3699 | else if (num_devices > 1) | ||
3700 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); | 3730 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); |
3701 | if (num_devices > 2) | 3731 | if (num_devices > 2) |
3702 | allowed |= BTRFS_BLOCK_GROUP_RAID5; | 3732 | allowed |= BTRFS_BLOCK_GROUP_RAID5; |
@@ -5278,7 +5308,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5278 | stripe_nr = div64_u64(stripe_nr, stripe_len); | 5308 | stripe_nr = div64_u64(stripe_nr, stripe_len); |
5279 | 5309 | ||
5280 | stripe_offset = stripe_nr * stripe_len; | 5310 | stripe_offset = stripe_nr * stripe_len; |
5281 | BUG_ON(offset < stripe_offset); | 5311 | if (offset < stripe_offset) { |
5312 | btrfs_crit(fs_info, "stripe math has gone wrong, " | ||
5313 | "stripe_offset=%llu, offset=%llu, start=%llu, " | ||
5314 | "logical=%llu, stripe_len=%llu", | ||
5315 | stripe_offset, offset, em->start, logical, | ||
5316 | stripe_len); | ||
5317 | free_extent_map(em); | ||
5318 | return -EINVAL; | ||
5319 | } | ||
5282 | 5320 | ||
5283 | /* stripe_offset is the offset of this block in its stripe*/ | 5321 | /* stripe_offset is the offset of this block in its stripe*/ |
5284 | stripe_offset = offset - stripe_offset; | 5322 | stripe_offset = offset - stripe_offset; |
@@ -5519,7 +5557,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5519 | &stripe_index); | 5557 | &stripe_index); |
5520 | mirror_num = stripe_index + 1; | 5558 | mirror_num = stripe_index + 1; |
5521 | } | 5559 | } |
5522 | BUG_ON(stripe_index >= map->num_stripes); | 5560 | if (stripe_index >= map->num_stripes) { |
5561 | btrfs_crit(fs_info, "stripe index math went horribly wrong, " | ||
5562 | "got stripe_index=%u, num_stripes=%u", | ||
5563 | stripe_index, map->num_stripes); | ||
5564 | ret = -EINVAL; | ||
5565 | goto out; | ||
5566 | } | ||
5523 | 5567 | ||
5524 | num_alloc_stripes = num_stripes; | 5568 | num_alloc_stripes = num_stripes; |
5525 | if (dev_replace_is_ongoing) { | 5569 | if (dev_replace_is_ongoing) { |
@@ -6242,7 +6286,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
6242 | "invalid chunk length %llu", length); | 6286 | "invalid chunk length %llu", length); |
6243 | return -EIO; | 6287 | return -EIO; |
6244 | } | 6288 | } |
6245 | if (!is_power_of_2(stripe_len)) { | 6289 | if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { |
6246 | btrfs_err(root->fs_info, "invalid chunk stripe length: %llu", | 6290 | btrfs_err(root->fs_info, "invalid chunk stripe length: %llu", |
6247 | stripe_len); | 6291 | stripe_len); |
6248 | return -EIO; | 6292 | return -EIO; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1939ebde63df..0ac90f8d85bd 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -340,14 +340,14 @@ struct btrfs_raid_attr { | |||
340 | }; | 340 | }; |
341 | 341 | ||
342 | extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; | 342 | extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES]; |
343 | 343 | extern const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES]; | |
344 | extern const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES]; | 344 | extern const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES]; |
345 | 345 | ||
346 | struct map_lookup { | 346 | struct map_lookup { |
347 | u64 type; | 347 | u64 type; |
348 | int io_align; | 348 | int io_align; |
349 | int io_width; | 349 | int io_width; |
350 | int stripe_len; | 350 | u64 stripe_len; |
351 | int sector_size; | 351 | int sector_size; |
352 | int num_stripes; | 352 | int num_stripes; |
353 | int sub_stripes; | 353 | int sub_stripes; |
@@ -357,52 +357,6 @@ struct map_lookup { | |||
357 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ | 357 | #define map_lookup_size(n) (sizeof(struct map_lookup) + \ |
358 | (sizeof(struct btrfs_bio_stripe) * (n))) | 358 | (sizeof(struct btrfs_bio_stripe) * (n))) |
359 | 359 | ||
360 | /* | ||
361 | * Restriper's general type filter | ||
362 | */ | ||
363 | #define BTRFS_BALANCE_DATA (1ULL << 0) | ||
364 | #define BTRFS_BALANCE_SYSTEM (1ULL << 1) | ||
365 | #define BTRFS_BALANCE_METADATA (1ULL << 2) | ||
366 | |||
367 | #define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \ | ||
368 | BTRFS_BALANCE_SYSTEM | \ | ||
369 | BTRFS_BALANCE_METADATA) | ||
370 | |||
371 | #define BTRFS_BALANCE_FORCE (1ULL << 3) | ||
372 | #define BTRFS_BALANCE_RESUME (1ULL << 4) | ||
373 | |||
374 | /* | ||
375 | * Balance filters | ||
376 | */ | ||
377 | #define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0) | ||
378 | #define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1) | ||
379 | #define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2) | ||
380 | #define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3) | ||
381 | #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) | ||
382 | #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) | ||
383 | #define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6) | ||
384 | #define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) | ||
385 | #define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10) | ||
386 | |||
387 | #define BTRFS_BALANCE_ARGS_MASK \ | ||
388 | (BTRFS_BALANCE_ARGS_PROFILES | \ | ||
389 | BTRFS_BALANCE_ARGS_USAGE | \ | ||
390 | BTRFS_BALANCE_ARGS_DEVID | \ | ||
391 | BTRFS_BALANCE_ARGS_DRANGE | \ | ||
392 | BTRFS_BALANCE_ARGS_VRANGE | \ | ||
393 | BTRFS_BALANCE_ARGS_LIMIT | \ | ||
394 | BTRFS_BALANCE_ARGS_LIMIT_RANGE | \ | ||
395 | BTRFS_BALANCE_ARGS_STRIPES_RANGE | \ | ||
396 | BTRFS_BALANCE_ARGS_USAGE_RANGE) | ||
397 | |||
398 | /* | ||
399 | * Profile changing flags. When SOFT is set we won't relocate chunk if | ||
400 | * it already has the target profile (even though it may be | ||
401 | * half-filled). | ||
402 | */ | ||
403 | #define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8) | ||
404 | #define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9) | ||
405 | |||
406 | struct btrfs_balance_args; | 360 | struct btrfs_balance_args; |
407 | struct btrfs_balance_progress; | 361 | struct btrfs_balance_progress; |
408 | struct btrfs_balance_control { | 362 | struct btrfs_balance_control { |
@@ -445,13 +399,18 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
445 | struct btrfs_fs_devices **fs_devices_ret); | 399 | struct btrfs_fs_devices **fs_devices_ret); |
446 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); | 400 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); |
447 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step); | 401 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step); |
402 | void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, | ||
403 | struct btrfs_device *device, struct btrfs_device *this_dev); | ||
448 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | 404 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, |
449 | char *device_path, | 405 | char *device_path, |
450 | struct btrfs_device **device); | 406 | struct btrfs_device **device); |
407 | int btrfs_find_device_by_devspec(struct btrfs_root *root, u64 devid, | ||
408 | char *devpath, | ||
409 | struct btrfs_device **device); | ||
451 | struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, | 410 | struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, |
452 | const u64 *devid, | 411 | const u64 *devid, |
453 | const u8 *uuid); | 412 | const u8 *uuid); |
454 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); | 413 | int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid); |
455 | void btrfs_cleanup_fs_uuids(void); | 414 | void btrfs_cleanup_fs_uuids(void); |
456 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); | 415 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); |
457 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | 416 | int btrfs_grow_device(struct btrfs_trans_handle *trans, |