diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-11 13:26:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-11 13:26:34 -0400 |
commit | 31b7a57c9eb3d90c87b6c2b855720ec709d2f6be (patch) | |
tree | 192362e9cb3d08de8876a23abbe1b40d1d4a3e63 /fs | |
parent | 84e3e9d04d5b5368a1c26f744a98c492052d0523 (diff) | |
parent | 9689457b5b0a2b69874c421a489d3fb50ca76b7b (diff) |
Merge branch 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason:
"This is an assortment of fixes. Most of the commits are from Filipe
(fsync, the inode allocation cache and a few others). Mark kicked in
a series fixing corners in the extent sharing ioctls, and everyone
else fixed up on assorted other problems"
* 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs: fix wrong check for btrfs_force_chunk_alloc()
Btrfs: fix warning of bytes_may_use
Btrfs: fix hang when failing to submit bio of directIO
Btrfs: fix a comment in inode.c:evict_inode_truncate_pages()
Btrfs: fix memory corruption on failure to submit bio for direct IO
btrfs: don't update mtime/ctime on deduped inodes
btrfs: allow dedupe of same inode
btrfs: fix deadlock with extent-same and readpage
btrfs: pass unaligned length to btrfs_cmp_data()
Btrfs: fix fsync after truncate when no_holes feature is enabled
Btrfs: fix fsync xattr loss in the fast fsync path
Btrfs: fix fsync data loss after append write
Btrfs: fix crash on close_ctree() if cleaner starts new transaction
Btrfs: fix race between caching kthread and returning inode to inode cache
Btrfs: use kmem_cache_free when freeing entry in inode cache
Btrfs: fix race between balance and unused block group deletion
btrfs: add error handling for scrub_workers_get()
btrfs: cleanup noused initialization of dev in btrfs_end_bio()
btrfs: qgroup: allow user to clear the limitation on qgroup
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 1 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 41 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 3 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 17 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 89 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 241 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 5 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 49 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 2 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 39 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 226 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 50 |
13 files changed, 641 insertions, 124 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc13fae2..81220b2203c6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -44,6 +44,8 @@ | |||
44 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 | 44 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 |
45 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 | 45 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 |
46 | #define BTRFS_INODE_HAS_PROPS 11 | 46 | #define BTRFS_INODE_HAS_PROPS 11 |
47 | /* DIO is ready to submit */ | ||
48 | #define BTRFS_INODE_DIO_READY 12 | ||
47 | /* | 49 | /* |
48 | * The following 3 bits are meant only for the btree inode. | 50 | * The following 3 bits are meant only for the btree inode. |
49 | * When any of them is set, it means an error happened while writing an | 51 | * When any of them is set, it means an error happened while writing an |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80a9aefb0c46..aac314e14188 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1778,6 +1778,7 @@ struct btrfs_fs_info { | |||
1778 | spinlock_t unused_bgs_lock; | 1778 | spinlock_t unused_bgs_lock; |
1779 | struct list_head unused_bgs; | 1779 | struct list_head unused_bgs; |
1780 | struct mutex unused_bg_unpin_mutex; | 1780 | struct mutex unused_bg_unpin_mutex; |
1781 | struct mutex delete_unused_bgs_mutex; | ||
1781 | 1782 | ||
1782 | /* For btrfs to record security options */ | 1783 | /* For btrfs to record security options */ |
1783 | struct security_mnt_opts security_opts; | 1784 | struct security_mnt_opts security_opts; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f43bfea3684..a9aadb2ad525 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg) | |||
1751 | { | 1751 | { |
1752 | struct btrfs_root *root = arg; | 1752 | struct btrfs_root *root = arg; |
1753 | int again; | 1753 | int again; |
1754 | struct btrfs_trans_handle *trans; | ||
1754 | 1755 | ||
1755 | do { | 1756 | do { |
1756 | again = 0; | 1757 | again = 0; |
@@ -1772,7 +1773,6 @@ static int cleaner_kthread(void *arg) | |||
1772 | } | 1773 | } |
1773 | 1774 | ||
1774 | btrfs_run_delayed_iputs(root); | 1775 | btrfs_run_delayed_iputs(root); |
1775 | btrfs_delete_unused_bgs(root->fs_info); | ||
1776 | again = btrfs_clean_one_deleted_snapshot(root); | 1776 | again = btrfs_clean_one_deleted_snapshot(root); |
1777 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1777 | mutex_unlock(&root->fs_info->cleaner_mutex); |
1778 | 1778 | ||
@@ -1781,6 +1781,16 @@ static int cleaner_kthread(void *arg) | |||
1781 | * needn't do anything special here. | 1781 | * needn't do anything special here. |
1782 | */ | 1782 | */ |
1783 | btrfs_run_defrag_inodes(root->fs_info); | 1783 | btrfs_run_defrag_inodes(root->fs_info); |
1784 | |||
1785 | /* | ||
1786 | * Acquires fs_info->delete_unused_bgs_mutex to avoid racing | ||
1787 | * with relocation (btrfs_relocate_chunk) and relocation | ||
1788 | * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) | ||
1789 | * after acquiring fs_info->delete_unused_bgs_mutex. So we | ||
1790 | * can't hold, nor need to, fs_info->cleaner_mutex when deleting | ||
1791 | * unused block groups. | ||
1792 | */ | ||
1793 | btrfs_delete_unused_bgs(root->fs_info); | ||
1784 | sleep: | 1794 | sleep: |
1785 | if (!try_to_freeze() && !again) { | 1795 | if (!try_to_freeze() && !again) { |
1786 | set_current_state(TASK_INTERRUPTIBLE); | 1796 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -1789,6 +1799,34 @@ sleep: | |||
1789 | __set_current_state(TASK_RUNNING); | 1799 | __set_current_state(TASK_RUNNING); |
1790 | } | 1800 | } |
1791 | } while (!kthread_should_stop()); | 1801 | } while (!kthread_should_stop()); |
1802 | |||
1803 | /* | ||
1804 | * Transaction kthread is stopped before us and wakes us up. | ||
1805 | * However we might have started a new transaction and COWed some | ||
1806 | * tree blocks when deleting unused block groups for example. So | ||
1807 | * make sure we commit the transaction we started to have a clean | ||
1808 | * shutdown when evicting the btree inode - if it has dirty pages | ||
1809 | * when we do the final iput() on it, eviction will trigger a | ||
1810 | * writeback for it which will fail with null pointer dereferences | ||
1811 | * since work queues and other resources were already released and | ||
1812 | * destroyed by the time the iput/eviction/writeback is made. | ||
1813 | */ | ||
1814 | trans = btrfs_attach_transaction(root); | ||
1815 | if (IS_ERR(trans)) { | ||
1816 | if (PTR_ERR(trans) != -ENOENT) | ||
1817 | btrfs_err(root->fs_info, | ||
1818 | "cleaner transaction attach returned %ld", | ||
1819 | PTR_ERR(trans)); | ||
1820 | } else { | ||
1821 | int ret; | ||
1822 | |||
1823 | ret = btrfs_commit_transaction(trans, root); | ||
1824 | if (ret) | ||
1825 | btrfs_err(root->fs_info, | ||
1826 | "cleaner open transaction commit returned %d", | ||
1827 | ret); | ||
1828 | } | ||
1829 | |||
1792 | return 0; | 1830 | return 0; |
1793 | } | 1831 | } |
1794 | 1832 | ||
@@ -2492,6 +2530,7 @@ int open_ctree(struct super_block *sb, | |||
2492 | spin_lock_init(&fs_info->unused_bgs_lock); | 2530 | spin_lock_init(&fs_info->unused_bgs_lock); |
2493 | rwlock_init(&fs_info->tree_mod_log_lock); | 2531 | rwlock_init(&fs_info->tree_mod_log_lock); |
2494 | mutex_init(&fs_info->unused_bg_unpin_mutex); | 2532 | mutex_init(&fs_info->unused_bg_unpin_mutex); |
2533 | mutex_init(&fs_info->delete_unused_bgs_mutex); | ||
2495 | mutex_init(&fs_info->reloc_mutex); | 2534 | mutex_init(&fs_info->reloc_mutex); |
2496 | mutex_init(&fs_info->delalloc_root_mutex); | 2535 | mutex_init(&fs_info->delalloc_root_mutex); |
2497 | seqlock_init(&fs_info->profiles_lock); | 2536 | seqlock_init(&fs_info->profiles_lock); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 38b76cc02f48..1c2bd1723e40 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9889 | } | 9889 | } |
9890 | spin_unlock(&fs_info->unused_bgs_lock); | 9890 | spin_unlock(&fs_info->unused_bgs_lock); |
9891 | 9891 | ||
9892 | mutex_lock(&root->fs_info->delete_unused_bgs_mutex); | ||
9893 | |||
9892 | /* Don't want to race with allocators so take the groups_sem */ | 9894 | /* Don't want to race with allocators so take the groups_sem */ |
9893 | down_write(&space_info->groups_sem); | 9895 | down_write(&space_info->groups_sem); |
9894 | spin_lock(&block_group->lock); | 9896 | spin_lock(&block_group->lock); |
@@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | |||
9983 | end_trans: | 9985 | end_trans: |
9984 | btrfs_end_transaction(trans, root); | 9986 | btrfs_end_transaction(trans, root); |
9985 | next: | 9987 | next: |
9988 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
9986 | btrfs_put_block_group(block_group); | 9989 | btrfs_put_block_group(block_group); |
9987 | spin_lock(&fs_info->unused_bgs_lock); | 9990 | spin_lock(&fs_info->unused_bgs_lock); |
9988 | } | 9991 | } |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f6a596d5a637..d4a582ac3f73 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) | |||
246 | { | 246 | { |
247 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | 247 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
248 | struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; | 248 | struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; |
249 | spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; | ||
249 | struct btrfs_free_space *info; | 250 | struct btrfs_free_space *info; |
250 | struct rb_node *n; | 251 | struct rb_node *n; |
251 | u64 count; | 252 | u64 count; |
@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) | |||
254 | return; | 255 | return; |
255 | 256 | ||
256 | while (1) { | 257 | while (1) { |
258 | bool add_to_ctl = true; | ||
259 | |||
260 | spin_lock(rbroot_lock); | ||
257 | n = rb_first(rbroot); | 261 | n = rb_first(rbroot); |
258 | if (!n) | 262 | if (!n) { |
263 | spin_unlock(rbroot_lock); | ||
259 | break; | 264 | break; |
265 | } | ||
260 | 266 | ||
261 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 267 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
262 | BUG_ON(info->bitmap); /* Logic error */ | 268 | BUG_ON(info->bitmap); /* Logic error */ |
263 | 269 | ||
264 | if (info->offset > root->ino_cache_progress) | 270 | if (info->offset > root->ino_cache_progress) |
265 | goto free; | 271 | add_to_ctl = false; |
266 | else if (info->offset + info->bytes > root->ino_cache_progress) | 272 | else if (info->offset + info->bytes > root->ino_cache_progress) |
267 | count = root->ino_cache_progress - info->offset + 1; | 273 | count = root->ino_cache_progress - info->offset + 1; |
268 | else | 274 | else |
269 | count = info->bytes; | 275 | count = info->bytes; |
270 | 276 | ||
271 | __btrfs_add_free_space(ctl, info->offset, count); | ||
272 | free: | ||
273 | rb_erase(&info->offset_index, rbroot); | 277 | rb_erase(&info->offset_index, rbroot); |
274 | kfree(info); | 278 | spin_unlock(rbroot_lock); |
279 | if (add_to_ctl) | ||
280 | __btrfs_add_free_space(ctl, info->offset, count); | ||
281 | kmem_cache_free(btrfs_free_space_cachep, info); | ||
275 | } | 282 | } |
276 | } | 283 | } |
277 | 284 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 855935f6671a..b33c0cf02668 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
4989 | /* | 4989 | /* |
4990 | * Keep looping until we have no more ranges in the io tree. | 4990 | * Keep looping until we have no more ranges in the io tree. |
4991 | * We can have ongoing bios started by readpages (called from readahead) | 4991 | * We can have ongoing bios started by readpages (called from readahead) |
4992 | * that didn't get their end io callbacks called yet or they are still | 4992 | * that have their endio callback (extent_io.c:end_bio_extent_readpage) |
4993 | * in progress ((extent_io.c:end_bio_extent_readpage()). This means some | 4993 | * still in progress (unlocked the pages in the bio but did not yet |
4994 | * unlocked the ranges in the io tree). Therefore this means some | ||
4994 | * ranges can still be locked and eviction started because before | 4995 | * ranges can still be locked and eviction started because before |
4995 | * submitting those bios, which are executed by a separate task (work | 4996 | * submitting those bios, which are executed by a separate task (work |
4996 | * queue kthread), inode references (inode->i_count) were not taken | 4997 | * queue kthread), inode references (inode->i_count) were not taken |
@@ -7546,6 +7547,7 @@ unlock: | |||
7546 | 7547 | ||
7547 | current->journal_info = outstanding_extents; | 7548 | current->journal_info = outstanding_extents; |
7548 | btrfs_free_reserved_data_space(inode, len); | 7549 | btrfs_free_reserved_data_space(inode, len); |
7550 | set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); | ||
7549 | } | 7551 | } |
7550 | 7552 | ||
7551 | /* | 7553 | /* |
@@ -7871,8 +7873,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
7871 | struct bio *dio_bio; | 7873 | struct bio *dio_bio; |
7872 | int ret; | 7874 | int ret; |
7873 | 7875 | ||
7874 | if (err) | ||
7875 | goto out_done; | ||
7876 | again: | 7876 | again: |
7877 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, | 7877 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
7878 | &ordered_offset, | 7878 | &ordered_offset, |
@@ -7895,7 +7895,6 @@ out_test: | |||
7895 | ordered = NULL; | 7895 | ordered = NULL; |
7896 | goto again; | 7896 | goto again; |
7897 | } | 7897 | } |
7898 | out_done: | ||
7899 | dio_bio = dip->dio_bio; | 7898 | dio_bio = dip->dio_bio; |
7900 | 7899 | ||
7901 | kfree(dip); | 7900 | kfree(dip); |
@@ -8163,9 +8162,8 @@ out_err: | |||
8163 | static void btrfs_submit_direct(int rw, struct bio *dio_bio, | 8162 | static void btrfs_submit_direct(int rw, struct bio *dio_bio, |
8164 | struct inode *inode, loff_t file_offset) | 8163 | struct inode *inode, loff_t file_offset) |
8165 | { | 8164 | { |
8166 | struct btrfs_root *root = BTRFS_I(inode)->root; | 8165 | struct btrfs_dio_private *dip = NULL; |
8167 | struct btrfs_dio_private *dip; | 8166 | struct bio *io_bio = NULL; |
8168 | struct bio *io_bio; | ||
8169 | struct btrfs_io_bio *btrfs_bio; | 8167 | struct btrfs_io_bio *btrfs_bio; |
8170 | int skip_sum; | 8168 | int skip_sum; |
8171 | int write = rw & REQ_WRITE; | 8169 | int write = rw & REQ_WRITE; |
@@ -8182,7 +8180,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
8182 | dip = kzalloc(sizeof(*dip), GFP_NOFS); | 8180 | dip = kzalloc(sizeof(*dip), GFP_NOFS); |
8183 | if (!dip) { | 8181 | if (!dip) { |
8184 | ret = -ENOMEM; | 8182 | ret = -ENOMEM; |
8185 | goto free_io_bio; | 8183 | goto free_ordered; |
8186 | } | 8184 | } |
8187 | 8185 | ||
8188 | dip->private = dio_bio->bi_private; | 8186 | dip->private = dio_bio->bi_private; |
@@ -8210,25 +8208,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
8210 | 8208 | ||
8211 | if (btrfs_bio->end_io) | 8209 | if (btrfs_bio->end_io) |
8212 | btrfs_bio->end_io(btrfs_bio, ret); | 8210 | btrfs_bio->end_io(btrfs_bio, ret); |
8213 | free_io_bio: | ||
8214 | bio_put(io_bio); | ||
8215 | 8211 | ||
8216 | free_ordered: | 8212 | free_ordered: |
8217 | /* | 8213 | /* |
8218 | * If this is a write, we need to clean up the reserved space and kill | 8214 | * If we arrived here it means either we failed to submit the dip |
8219 | * the ordered extent. | 8215 | * or we either failed to clone the dio_bio or failed to allocate the |
8216 | * dip. If we cloned the dio_bio and allocated the dip, we can just | ||
8217 | * call bio_endio against our io_bio so that we get proper resource | ||
8218 | * cleanup if we fail to submit the dip, otherwise, we must do the | ||
8219 | * same as btrfs_endio_direct_[write|read] because we can't call these | ||
8220 | * callbacks - they require an allocated dip and a clone of dio_bio. | ||
8220 | */ | 8221 | */ |
8221 | if (write) { | 8222 | if (io_bio && dip) { |
8222 | struct btrfs_ordered_extent *ordered; | 8223 | bio_endio(io_bio, ret); |
8223 | ordered = btrfs_lookup_ordered_extent(inode, file_offset); | 8224 | /* |
8224 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | 8225 | * The end io callbacks free our dip, do the final put on io_bio |
8225 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | 8226 | * and all the cleanup and final put for dio_bio (through |
8226 | btrfs_free_reserved_extent(root, ordered->start, | 8227 | * dio_end_io()). |
8227 | ordered->disk_len, 1); | 8228 | */ |
8228 | btrfs_put_ordered_extent(ordered); | 8229 | dip = NULL; |
8229 | btrfs_put_ordered_extent(ordered); | 8230 | io_bio = NULL; |
8231 | } else { | ||
8232 | if (write) { | ||
8233 | struct btrfs_ordered_extent *ordered; | ||
8234 | |||
8235 | ordered = btrfs_lookup_ordered_extent(inode, | ||
8236 | file_offset); | ||
8237 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); | ||
8238 | /* | ||
8239 | * Decrements our ref on the ordered extent and removes | ||
8240 | * the ordered extent from the inode's ordered tree, | ||
8241 | * doing all the proper resource cleanup such as for the | ||
8242 | * reserved space and waking up any waiters for this | ||
8243 | * ordered extent (through btrfs_remove_ordered_extent). | ||
8244 | */ | ||
8245 | btrfs_finish_ordered_io(ordered); | ||
8246 | } else { | ||
8247 | unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, | ||
8248 | file_offset + dio_bio->bi_iter.bi_size - 1); | ||
8249 | } | ||
8250 | clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); | ||
8251 | /* | ||
8252 | * Releases and cleans up our dio_bio, no need to bio_put() | ||
8253 | * nor bio_endio()/bio_io_error() against dio_bio. | ||
8254 | */ | ||
8255 | dio_end_io(dio_bio, ret); | ||
8230 | } | 8256 | } |
8231 | bio_endio(dio_bio, ret); | 8257 | if (io_bio) |
8258 | bio_put(io_bio); | ||
8259 | kfree(dip); | ||
8232 | } | 8260 | } |
8233 | 8261 | ||
8234 | static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, | 8262 | static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, |
@@ -8330,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
8330 | btrfs_submit_direct, flags); | 8358 | btrfs_submit_direct, flags); |
8331 | if (iov_iter_rw(iter) == WRITE) { | 8359 | if (iov_iter_rw(iter) == WRITE) { |
8332 | current->journal_info = NULL; | 8360 | current->journal_info = NULL; |
8333 | if (ret < 0 && ret != -EIOCBQUEUED) | 8361 | if (ret < 0 && ret != -EIOCBQUEUED) { |
8334 | btrfs_delalloc_release_space(inode, count); | 8362 | /* |
8335 | else if (ret >= 0 && (size_t)ret < count) | 8363 | * If the error comes from submitting stage, |
8364 | * btrfs_get_blocsk_direct() has free'd data space, | ||
8365 | * and metadata space will be handled by | ||
8366 | * finish_ordered_fn, don't do that again to make | ||
8367 | * sure bytes_may_use is correct. | ||
8368 | */ | ||
8369 | if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, | ||
8370 | &BTRFS_I(inode)->runtime_flags)) | ||
8371 | btrfs_delalloc_release_space(inode, count); | ||
8372 | } else if (ret >= 0 && (size_t)ret < count) | ||
8336 | btrfs_delalloc_release_space(inode, | 8373 | btrfs_delalloc_release_space(inode, |
8337 | count - (size_t)ret); | 8374 | count - (size_t)ret); |
8338 | } | 8375 | } |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c86b835da7a8..5d91776e12a2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 { | |||
87 | 87 | ||
88 | 88 | ||
89 | static int btrfs_clone(struct inode *src, struct inode *inode, | 89 | static int btrfs_clone(struct inode *src, struct inode *inode, |
90 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); | 90 | u64 off, u64 olen, u64 olen_aligned, u64 destoff, |
91 | int no_time_update); | ||
91 | 92 | ||
92 | /* Mask out flags that are inappropriate for the given type of inode. */ | 93 | /* Mask out flags that are inappropriate for the given type of inode. */ |
93 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | 94 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) |
@@ -2765,14 +2766,11 @@ out: | |||
2765 | return ret; | 2766 | return ret; |
2766 | } | 2767 | } |
2767 | 2768 | ||
2768 | static struct page *extent_same_get_page(struct inode *inode, u64 off) | 2769 | static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) |
2769 | { | 2770 | { |
2770 | struct page *page; | 2771 | struct page *page; |
2771 | pgoff_t index; | ||
2772 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 2772 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
2773 | 2773 | ||
2774 | index = off >> PAGE_CACHE_SHIFT; | ||
2775 | |||
2776 | page = grab_cache_page(inode->i_mapping, index); | 2774 | page = grab_cache_page(inode->i_mapping, index); |
2777 | if (!page) | 2775 | if (!page) |
2778 | return NULL; | 2776 | return NULL; |
@@ -2793,6 +2791,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off) | |||
2793 | return page; | 2791 | return page; |
2794 | } | 2792 | } |
2795 | 2793 | ||
2794 | static int gather_extent_pages(struct inode *inode, struct page **pages, | ||
2795 | int num_pages, u64 off) | ||
2796 | { | ||
2797 | int i; | ||
2798 | pgoff_t index = off >> PAGE_CACHE_SHIFT; | ||
2799 | |||
2800 | for (i = 0; i < num_pages; i++) { | ||
2801 | pages[i] = extent_same_get_page(inode, index + i); | ||
2802 | if (!pages[i]) | ||
2803 | return -ENOMEM; | ||
2804 | } | ||
2805 | return 0; | ||
2806 | } | ||
2807 | |||
2796 | static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | 2808 | static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) |
2797 | { | 2809 | { |
2798 | /* do any pending delalloc/csum calc on src, one way or | 2810 | /* do any pending delalloc/csum calc on src, one way or |
@@ -2818,52 +2830,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | |||
2818 | } | 2830 | } |
2819 | } | 2831 | } |
2820 | 2832 | ||
2821 | static void btrfs_double_unlock(struct inode *inode1, u64 loff1, | 2833 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) |
2822 | struct inode *inode2, u64 loff2, u64 len) | ||
2823 | { | 2834 | { |
2824 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); | ||
2825 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | ||
2826 | |||
2827 | mutex_unlock(&inode1->i_mutex); | 2835 | mutex_unlock(&inode1->i_mutex); |
2828 | mutex_unlock(&inode2->i_mutex); | 2836 | mutex_unlock(&inode2->i_mutex); |
2829 | } | 2837 | } |
2830 | 2838 | ||
2831 | static void btrfs_double_lock(struct inode *inode1, u64 loff1, | 2839 | static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) |
2832 | struct inode *inode2, u64 loff2, u64 len) | 2840 | { |
2841 | if (inode1 < inode2) | ||
2842 | swap(inode1, inode2); | ||
2843 | |||
2844 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | ||
2845 | if (inode1 != inode2) | ||
2846 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | ||
2847 | } | ||
2848 | |||
2849 | static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, | ||
2850 | struct inode *inode2, u64 loff2, u64 len) | ||
2851 | { | ||
2852 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); | ||
2853 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | ||
2854 | } | ||
2855 | |||
2856 | static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, | ||
2857 | struct inode *inode2, u64 loff2, u64 len) | ||
2833 | { | 2858 | { |
2834 | if (inode1 < inode2) { | 2859 | if (inode1 < inode2) { |
2835 | swap(inode1, inode2); | 2860 | swap(inode1, inode2); |
2836 | swap(loff1, loff2); | 2861 | swap(loff1, loff2); |
2837 | } | 2862 | } |
2838 | |||
2839 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | ||
2840 | lock_extent_range(inode1, loff1, len); | 2863 | lock_extent_range(inode1, loff1, len); |
2841 | if (inode1 != inode2) { | 2864 | if (inode1 != inode2) |
2842 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | ||
2843 | lock_extent_range(inode2, loff2, len); | 2865 | lock_extent_range(inode2, loff2, len); |
2866 | } | ||
2867 | |||
2868 | struct cmp_pages { | ||
2869 | int num_pages; | ||
2870 | struct page **src_pages; | ||
2871 | struct page **dst_pages; | ||
2872 | }; | ||
2873 | |||
2874 | static void btrfs_cmp_data_free(struct cmp_pages *cmp) | ||
2875 | { | ||
2876 | int i; | ||
2877 | struct page *pg; | ||
2878 | |||
2879 | for (i = 0; i < cmp->num_pages; i++) { | ||
2880 | pg = cmp->src_pages[i]; | ||
2881 | if (pg) | ||
2882 | page_cache_release(pg); | ||
2883 | pg = cmp->dst_pages[i]; | ||
2884 | if (pg) | ||
2885 | page_cache_release(pg); | ||
2886 | } | ||
2887 | kfree(cmp->src_pages); | ||
2888 | kfree(cmp->dst_pages); | ||
2889 | } | ||
2890 | |||
2891 | static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, | ||
2892 | struct inode *dst, u64 dst_loff, | ||
2893 | u64 len, struct cmp_pages *cmp) | ||
2894 | { | ||
2895 | int ret; | ||
2896 | int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; | ||
2897 | struct page **src_pgarr, **dst_pgarr; | ||
2898 | |||
2899 | /* | ||
2900 | * We must gather up all the pages before we initiate our | ||
2901 | * extent locking. We use an array for the page pointers. Size | ||
2902 | * of the array is bounded by len, which is in turn bounded by | ||
2903 | * BTRFS_MAX_DEDUPE_LEN. | ||
2904 | */ | ||
2905 | src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); | ||
2906 | dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); | ||
2907 | if (!src_pgarr || !dst_pgarr) { | ||
2908 | kfree(src_pgarr); | ||
2909 | kfree(dst_pgarr); | ||
2910 | return -ENOMEM; | ||
2844 | } | 2911 | } |
2912 | cmp->num_pages = num_pages; | ||
2913 | cmp->src_pages = src_pgarr; | ||
2914 | cmp->dst_pages = dst_pgarr; | ||
2915 | |||
2916 | ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); | ||
2917 | if (ret) | ||
2918 | goto out; | ||
2919 | |||
2920 | ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); | ||
2921 | |||
2922 | out: | ||
2923 | if (ret) | ||
2924 | btrfs_cmp_data_free(cmp); | ||
2925 | return 0; | ||
2845 | } | 2926 | } |
2846 | 2927 | ||
2847 | static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, | 2928 | static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, |
2848 | u64 dst_loff, u64 len) | 2929 | u64 dst_loff, u64 len, struct cmp_pages *cmp) |
2849 | { | 2930 | { |
2850 | int ret = 0; | 2931 | int ret = 0; |
2932 | int i; | ||
2851 | struct page *src_page, *dst_page; | 2933 | struct page *src_page, *dst_page; |
2852 | unsigned int cmp_len = PAGE_CACHE_SIZE; | 2934 | unsigned int cmp_len = PAGE_CACHE_SIZE; |
2853 | void *addr, *dst_addr; | 2935 | void *addr, *dst_addr; |
2854 | 2936 | ||
2937 | i = 0; | ||
2855 | while (len) { | 2938 | while (len) { |
2856 | if (len < PAGE_CACHE_SIZE) | 2939 | if (len < PAGE_CACHE_SIZE) |
2857 | cmp_len = len; | 2940 | cmp_len = len; |
2858 | 2941 | ||
2859 | src_page = extent_same_get_page(src, loff); | 2942 | BUG_ON(i >= cmp->num_pages); |
2860 | if (!src_page) | 2943 | |
2861 | return -EINVAL; | 2944 | src_page = cmp->src_pages[i]; |
2862 | dst_page = extent_same_get_page(dst, dst_loff); | 2945 | dst_page = cmp->dst_pages[i]; |
2863 | if (!dst_page) { | 2946 | |
2864 | page_cache_release(src_page); | ||
2865 | return -EINVAL; | ||
2866 | } | ||
2867 | addr = kmap_atomic(src_page); | 2947 | addr = kmap_atomic(src_page); |
2868 | dst_addr = kmap_atomic(dst_page); | 2948 | dst_addr = kmap_atomic(dst_page); |
2869 | 2949 | ||
@@ -2875,15 +2955,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, | |||
2875 | 2955 | ||
2876 | kunmap_atomic(addr); | 2956 | kunmap_atomic(addr); |
2877 | kunmap_atomic(dst_addr); | 2957 | kunmap_atomic(dst_addr); |
2878 | page_cache_release(src_page); | ||
2879 | page_cache_release(dst_page); | ||
2880 | 2958 | ||
2881 | if (ret) | 2959 | if (ret) |
2882 | break; | 2960 | break; |
2883 | 2961 | ||
2884 | loff += cmp_len; | ||
2885 | dst_loff += cmp_len; | ||
2886 | len -= cmp_len; | 2962 | len -= cmp_len; |
2963 | i++; | ||
2887 | } | 2964 | } |
2888 | 2965 | ||
2889 | return ret; | 2966 | return ret; |
@@ -2914,27 +2991,62 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, | |||
2914 | { | 2991 | { |
2915 | int ret; | 2992 | int ret; |
2916 | u64 len = olen; | 2993 | u64 len = olen; |
2994 | struct cmp_pages cmp; | ||
2995 | int same_inode = 0; | ||
2996 | u64 same_lock_start = 0; | ||
2997 | u64 same_lock_len = 0; | ||
2917 | 2998 | ||
2918 | /* | ||
2919 | * btrfs_clone() can't handle extents in the same file | ||
2920 | * yet. Once that works, we can drop this check and replace it | ||
2921 | * with a check for the same inode, but overlapping extents. | ||
2922 | */ | ||
2923 | if (src == dst) | 2999 | if (src == dst) |
2924 | return -EINVAL; | 3000 | same_inode = 1; |
2925 | 3001 | ||
2926 | if (len == 0) | 3002 | if (len == 0) |
2927 | return 0; | 3003 | return 0; |
2928 | 3004 | ||
2929 | btrfs_double_lock(src, loff, dst, dst_loff, len); | 3005 | if (same_inode) { |
3006 | mutex_lock(&src->i_mutex); | ||
2930 | 3007 | ||
2931 | ret = extent_same_check_offsets(src, loff, &len, olen); | 3008 | ret = extent_same_check_offsets(src, loff, &len, olen); |
2932 | if (ret) | 3009 | if (ret) |
2933 | goto out_unlock; | 3010 | goto out_unlock; |
2934 | 3011 | ||
2935 | ret = extent_same_check_offsets(dst, dst_loff, &len, olen); | 3012 | /* |
2936 | if (ret) | 3013 | * Single inode case wants the same checks, except we |
2937 | goto out_unlock; | 3014 | * don't want our length pushed out past i_size as |
3015 | * comparing that data range makes no sense. | ||
3016 | * | ||
3017 | * extent_same_check_offsets() will do this for an | ||
3018 | * unaligned length at i_size, so catch it here and | ||
3019 | * reject the request. | ||
3020 | * | ||
3021 | * This effectively means we require aligned extents | ||
3022 | * for the single-inode case, whereas the other cases | ||
3023 | * allow an unaligned length so long as it ends at | ||
3024 | * i_size. | ||
3025 | */ | ||
3026 | if (len != olen) { | ||
3027 | ret = -EINVAL; | ||
3028 | goto out_unlock; | ||
3029 | } | ||
3030 | |||
3031 | /* Check for overlapping ranges */ | ||
3032 | if (dst_loff + len > loff && dst_loff < loff + len) { | ||
3033 | ret = -EINVAL; | ||
3034 | goto out_unlock; | ||
3035 | } | ||
3036 | |||
3037 | same_lock_start = min_t(u64, loff, dst_loff); | ||
3038 | same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; | ||
3039 | } else { | ||
3040 | btrfs_double_inode_lock(src, dst); | ||
3041 | |||
3042 | ret = extent_same_check_offsets(src, loff, &len, olen); | ||
3043 | if (ret) | ||
3044 | goto out_unlock; | ||
3045 | |||
3046 | ret = extent_same_check_offsets(dst, dst_loff, &len, olen); | ||
3047 | if (ret) | ||
3048 | goto out_unlock; | ||
3049 | } | ||
2938 | 3050 | ||
2939 | /* don't make the dst file partly checksummed */ | 3051 | /* don't make the dst file partly checksummed */ |
2940 | if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != | 3052 | if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != |
@@ -2943,12 +3055,32 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, | |||
2943 | goto out_unlock; | 3055 | goto out_unlock; |
2944 | } | 3056 | } |
2945 | 3057 | ||
2946 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); | 3058 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); |
3059 | if (ret) | ||
3060 | goto out_unlock; | ||
3061 | |||
3062 | if (same_inode) | ||
3063 | lock_extent_range(src, same_lock_start, same_lock_len); | ||
3064 | else | ||
3065 | btrfs_double_extent_lock(src, loff, dst, dst_loff, len); | ||
3066 | |||
3067 | /* pass original length for comparison so we stay within i_size */ | ||
3068 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); | ||
2947 | if (ret == 0) | 3069 | if (ret == 0) |
2948 | ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); | 3070 | ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); |
3071 | |||
3072 | if (same_inode) | ||
3073 | unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, | ||
3074 | same_lock_start + same_lock_len - 1); | ||
3075 | else | ||
3076 | btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); | ||
2949 | 3077 | ||
3078 | btrfs_cmp_data_free(&cmp); | ||
2950 | out_unlock: | 3079 | out_unlock: |
2951 | btrfs_double_unlock(src, loff, dst, dst_loff, len); | 3080 | if (same_inode) |
3081 | mutex_unlock(&src->i_mutex); | ||
3082 | else | ||
3083 | btrfs_double_inode_unlock(src, dst); | ||
2952 | 3084 | ||
2953 | return ret; | 3085 | return ret; |
2954 | } | 3086 | } |
@@ -3100,13 +3232,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, | |||
3100 | struct inode *inode, | 3232 | struct inode *inode, |
3101 | u64 endoff, | 3233 | u64 endoff, |
3102 | const u64 destoff, | 3234 | const u64 destoff, |
3103 | const u64 olen) | 3235 | const u64 olen, |
3236 | int no_time_update) | ||
3104 | { | 3237 | { |
3105 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3238 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3106 | int ret; | 3239 | int ret; |
3107 | 3240 | ||
3108 | inode_inc_iversion(inode); | 3241 | inode_inc_iversion(inode); |
3109 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 3242 | if (!no_time_update) |
3243 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
3110 | /* | 3244 | /* |
3111 | * We round up to the block size at eof when determining which | 3245 | * We round up to the block size at eof when determining which |
3112 | * extents to clone above, but shouldn't round up the file size. | 3246 | * extents to clone above, but shouldn't round up the file size. |
@@ -3191,13 +3325,13 @@ static void clone_update_extent_map(struct inode *inode, | |||
3191 | * @inode: Inode to clone to | 3325 | * @inode: Inode to clone to |
3192 | * @off: Offset within source to start clone from | 3326 | * @off: Offset within source to start clone from |
3193 | * @olen: Original length, passed by user, of range to clone | 3327 | * @olen: Original length, passed by user, of range to clone |
3194 | * @olen_aligned: Block-aligned value of olen, extent_same uses | 3328 | * @olen_aligned: Block-aligned value of olen |
3195 | * identical values here | ||
3196 | * @destoff: Offset within @inode to start clone | 3329 | * @destoff: Offset within @inode to start clone |
3330 | * @no_time_update: Whether to update mtime/ctime on the target inode | ||
3197 | */ | 3331 | */ |
3198 | static int btrfs_clone(struct inode *src, struct inode *inode, | 3332 | static int btrfs_clone(struct inode *src, struct inode *inode, |
3199 | const u64 off, const u64 olen, const u64 olen_aligned, | 3333 | const u64 off, const u64 olen, const u64 olen_aligned, |
3200 | const u64 destoff) | 3334 | const u64 destoff, int no_time_update) |
3201 | { | 3335 | { |
3202 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3336 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3203 | struct btrfs_path *path = NULL; | 3337 | struct btrfs_path *path = NULL; |
@@ -3521,7 +3655,8 @@ process_slot: | |||
3521 | root->sectorsize); | 3655 | root->sectorsize); |
3522 | ret = clone_finish_inode_update(trans, inode, | 3656 | ret = clone_finish_inode_update(trans, inode, |
3523 | last_dest_end, | 3657 | last_dest_end, |
3524 | destoff, olen); | 3658 | destoff, olen, |
3659 | no_time_update); | ||
3525 | if (ret) | 3660 | if (ret) |
3526 | goto out; | 3661 | goto out; |
3527 | if (new_key.offset + datal >= destoff + len) | 3662 | if (new_key.offset + datal >= destoff + len) |
@@ -3559,7 +3694,7 @@ process_slot: | |||
3559 | clone_update_extent_map(inode, trans, NULL, last_dest_end, | 3694 | clone_update_extent_map(inode, trans, NULL, last_dest_end, |
3560 | destoff + len - last_dest_end); | 3695 | destoff + len - last_dest_end); |
3561 | ret = clone_finish_inode_update(trans, inode, destoff + len, | 3696 | ret = clone_finish_inode_update(trans, inode, destoff + len, |
3562 | destoff, olen); | 3697 | destoff, olen, no_time_update); |
3563 | } | 3698 | } |
3564 | 3699 | ||
3565 | out: | 3700 | out: |
@@ -3696,7 +3831,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
3696 | lock_extent_range(inode, destoff, len); | 3831 | lock_extent_range(inode, destoff, len); |
3697 | } | 3832 | } |
3698 | 3833 | ||
3699 | ret = btrfs_clone(src, inode, off, olen, len, destoff); | 3834 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |
3700 | 3835 | ||
3701 | if (same_inode) { | 3836 | if (same_inode) { |
3702 | u64 lock_start = min_t(u64, off, destoff); | 3837 | u64 lock_start = min_t(u64, off, destoff); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 89656d799ff6..52170cf1757e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
552 | trace_btrfs_ordered_extent_put(entry->inode, entry); | 552 | trace_btrfs_ordered_extent_put(entry->inode, entry); |
553 | 553 | ||
554 | if (atomic_dec_and_test(&entry->refs)) { | 554 | if (atomic_dec_and_test(&entry->refs)) { |
555 | ASSERT(list_empty(&entry->log_list)); | ||
556 | ASSERT(list_empty(&entry->trans_list)); | ||
557 | ASSERT(list_empty(&entry->root_extent_list)); | ||
558 | ASSERT(RB_EMPTY_NODE(&entry->rb_node)); | ||
555 | if (entry->inode) | 559 | if (entry->inode) |
556 | btrfs_add_delayed_iput(entry->inode); | 560 | btrfs_add_delayed_iput(entry->inode); |
557 | while (!list_empty(&entry->list)) { | 561 | while (!list_empty(&entry->list)) { |
@@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
579 | spin_lock_irq(&tree->lock); | 583 | spin_lock_irq(&tree->lock); |
580 | node = &entry->rb_node; | 584 | node = &entry->rb_node; |
581 | rb_erase(node, &tree->tree); | 585 | rb_erase(node, &tree->tree); |
586 | RB_CLEAR_NODE(node); | ||
582 | if (tree->last == node) | 587 | if (tree->last == node) |
583 | tree->last = NULL; | 588 | tree->last = NULL; |
584 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 589 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d5f1f033b7a0..e9ace099162c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | |||
1349 | struct btrfs_root *quota_root; | 1349 | struct btrfs_root *quota_root; |
1350 | struct btrfs_qgroup *qgroup; | 1350 | struct btrfs_qgroup *qgroup; |
1351 | int ret = 0; | 1351 | int ret = 0; |
1352 | /* Sometimes we would want to clear the limit on this qgroup. | ||
1353 | * To meet this requirement, we treat the -1 as a special value | ||
1354 | * which tell kernel to clear the limit on this qgroup. | ||
1355 | */ | ||
1356 | const u64 CLEAR_VALUE = -1; | ||
1352 | 1357 | ||
1353 | mutex_lock(&fs_info->qgroup_ioctl_lock); | 1358 | mutex_lock(&fs_info->qgroup_ioctl_lock); |
1354 | quota_root = fs_info->quota_root; | 1359 | quota_root = fs_info->quota_root; |
@@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, | |||
1364 | } | 1369 | } |
1365 | 1370 | ||
1366 | spin_lock(&fs_info->qgroup_lock); | 1371 | spin_lock(&fs_info->qgroup_lock); |
1367 | if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) | 1372 | if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { |
1368 | qgroup->max_rfer = limit->max_rfer; | 1373 | if (limit->max_rfer == CLEAR_VALUE) { |
1369 | if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) | 1374 | qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; |
1370 | qgroup->max_excl = limit->max_excl; | 1375 | limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; |
1371 | if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) | 1376 | qgroup->max_rfer = 0; |
1372 | qgroup->rsv_rfer = limit->rsv_rfer; | 1377 | } else { |
1373 | if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) | 1378 | qgroup->max_rfer = limit->max_rfer; |
1374 | qgroup->rsv_excl = limit->rsv_excl; | 1379 | } |
1380 | } | ||
1381 | if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { | ||
1382 | if (limit->max_excl == CLEAR_VALUE) { | ||
1383 | qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; | ||
1384 | limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; | ||
1385 | qgroup->max_excl = 0; | ||
1386 | } else { | ||
1387 | qgroup->max_excl = limit->max_excl; | ||
1388 | } | ||
1389 | } | ||
1390 | if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { | ||
1391 | if (limit->rsv_rfer == CLEAR_VALUE) { | ||
1392 | qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; | ||
1393 | limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; | ||
1394 | qgroup->rsv_rfer = 0; | ||
1395 | } else { | ||
1396 | qgroup->rsv_rfer = limit->rsv_rfer; | ||
1397 | } | ||
1398 | } | ||
1399 | if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { | ||
1400 | if (limit->rsv_excl == CLEAR_VALUE) { | ||
1401 | qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; | ||
1402 | limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; | ||
1403 | qgroup->rsv_excl = 0; | ||
1404 | } else { | ||
1405 | qgroup->rsv_excl = limit->rsv_excl; | ||
1406 | } | ||
1407 | } | ||
1375 | qgroup->lim_flags |= limit->flags; | 1408 | qgroup->lim_flags |= limit->flags; |
1376 | 1409 | ||
1377 | spin_unlock(&fs_info->qgroup_lock); | 1410 | spin_unlock(&fs_info->qgroup_lock); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 827951fbf7fc..88cbb5995667 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -4049,7 +4049,7 @@ restart: | |||
4049 | if (trans && progress && err == -ENOSPC) { | 4049 | if (trans && progress && err == -ENOSPC) { |
4050 | ret = btrfs_force_chunk_alloc(trans, rc->extent_root, | 4050 | ret = btrfs_force_chunk_alloc(trans, rc->extent_root, |
4051 | rc->block_group->flags); | 4051 | rc->block_group->flags); |
4052 | if (ret == 0) { | 4052 | if (ret == 1) { |
4053 | err = 0; | 4053 | err = 0; |
4054 | progress = 0; | 4054 | progress = 0; |
4055 | goto restart; | 4055 | goto restart; |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9f2feabe99f2..94db0fa5225a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, | |||
3571 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | 3571 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, |
3572 | int is_dev_replace) | 3572 | int is_dev_replace) |
3573 | { | 3573 | { |
3574 | int ret = 0; | ||
3575 | unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; | 3574 | unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; |
3576 | int max_active = fs_info->thread_pool_size; | 3575 | int max_active = fs_info->thread_pool_size; |
3577 | 3576 | ||
@@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | |||
3584 | fs_info->scrub_workers = | 3583 | fs_info->scrub_workers = |
3585 | btrfs_alloc_workqueue("btrfs-scrub", flags, | 3584 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
3586 | max_active, 4); | 3585 | max_active, 4); |
3587 | if (!fs_info->scrub_workers) { | 3586 | if (!fs_info->scrub_workers) |
3588 | ret = -ENOMEM; | 3587 | goto fail_scrub_workers; |
3589 | goto out; | 3588 | |
3590 | } | ||
3591 | fs_info->scrub_wr_completion_workers = | 3589 | fs_info->scrub_wr_completion_workers = |
3592 | btrfs_alloc_workqueue("btrfs-scrubwrc", flags, | 3590 | btrfs_alloc_workqueue("btrfs-scrubwrc", flags, |
3593 | max_active, 2); | 3591 | max_active, 2); |
3594 | if (!fs_info->scrub_wr_completion_workers) { | 3592 | if (!fs_info->scrub_wr_completion_workers) |
3595 | ret = -ENOMEM; | 3593 | goto fail_scrub_wr_completion_workers; |
3596 | goto out; | 3594 | |
3597 | } | ||
3598 | fs_info->scrub_nocow_workers = | 3595 | fs_info->scrub_nocow_workers = |
3599 | btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); | 3596 | btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); |
3600 | if (!fs_info->scrub_nocow_workers) { | 3597 | if (!fs_info->scrub_nocow_workers) |
3601 | ret = -ENOMEM; | 3598 | goto fail_scrub_nocow_workers; |
3602 | goto out; | ||
3603 | } | ||
3604 | fs_info->scrub_parity_workers = | 3599 | fs_info->scrub_parity_workers = |
3605 | btrfs_alloc_workqueue("btrfs-scrubparity", flags, | 3600 | btrfs_alloc_workqueue("btrfs-scrubparity", flags, |
3606 | max_active, 2); | 3601 | max_active, 2); |
3607 | if (!fs_info->scrub_parity_workers) { | 3602 | if (!fs_info->scrub_parity_workers) |
3608 | ret = -ENOMEM; | 3603 | goto fail_scrub_parity_workers; |
3609 | goto out; | ||
3610 | } | ||
3611 | } | 3604 | } |
3612 | ++fs_info->scrub_workers_refcnt; | 3605 | ++fs_info->scrub_workers_refcnt; |
3613 | out: | 3606 | return 0; |
3614 | return ret; | 3607 | |
3608 | fail_scrub_parity_workers: | ||
3609 | btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); | ||
3610 | fail_scrub_nocow_workers: | ||
3611 | btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); | ||
3612 | fail_scrub_wr_completion_workers: | ||
3613 | btrfs_destroy_workqueue(fs_info->scrub_workers); | ||
3614 | fail_scrub_workers: | ||
3615 | return -ENOMEM; | ||
3615 | } | 3616 | } |
3616 | 3617 | ||
3617 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) | 3618 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1ce80c1c4eb6..9c45431e69ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, | |||
4117 | return 0; | 4117 | return 0; |
4118 | } | 4118 | } |
4119 | 4119 | ||
4120 | /* | ||
4121 | * At the moment we always log all xattrs. This is to figure out at log replay | ||
4122 | * time which xattrs must have their deletion replayed. If a xattr is missing | ||
4123 | * in the log tree and exists in the fs/subvol tree, we delete it. This is | ||
4124 | * because if a xattr is deleted, the inode is fsynced and a power failure | ||
4125 | * happens, causing the log to be replayed the next time the fs is mounted, | ||
4126 | * we want the xattr to not exist anymore (same behaviour as other filesystems | ||
4127 | * with a journal, ext3/4, xfs, f2fs, etc). | ||
4128 | */ | ||
4129 | static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, | ||
4130 | struct btrfs_root *root, | ||
4131 | struct inode *inode, | ||
4132 | struct btrfs_path *path, | ||
4133 | struct btrfs_path *dst_path) | ||
4134 | { | ||
4135 | int ret; | ||
4136 | struct btrfs_key key; | ||
4137 | const u64 ino = btrfs_ino(inode); | ||
4138 | int ins_nr = 0; | ||
4139 | int start_slot = 0; | ||
4140 | |||
4141 | key.objectid = ino; | ||
4142 | key.type = BTRFS_XATTR_ITEM_KEY; | ||
4143 | key.offset = 0; | ||
4144 | |||
4145 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
4146 | if (ret < 0) | ||
4147 | return ret; | ||
4148 | |||
4149 | while (true) { | ||
4150 | int slot = path->slots[0]; | ||
4151 | struct extent_buffer *leaf = path->nodes[0]; | ||
4152 | int nritems = btrfs_header_nritems(leaf); | ||
4153 | |||
4154 | if (slot >= nritems) { | ||
4155 | if (ins_nr > 0) { | ||
4156 | u64 last_extent = 0; | ||
4157 | |||
4158 | ret = copy_items(trans, inode, dst_path, path, | ||
4159 | &last_extent, start_slot, | ||
4160 | ins_nr, 1, 0); | ||
4161 | /* can't be 1, extent items aren't processed */ | ||
4162 | ASSERT(ret <= 0); | ||
4163 | if (ret < 0) | ||
4164 | return ret; | ||
4165 | ins_nr = 0; | ||
4166 | } | ||
4167 | ret = btrfs_next_leaf(root, path); | ||
4168 | if (ret < 0) | ||
4169 | return ret; | ||
4170 | else if (ret > 0) | ||
4171 | break; | ||
4172 | continue; | ||
4173 | } | ||
4174 | |||
4175 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
4176 | if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) | ||
4177 | break; | ||
4178 | |||
4179 | if (ins_nr == 0) | ||
4180 | start_slot = slot; | ||
4181 | ins_nr++; | ||
4182 | path->slots[0]++; | ||
4183 | cond_resched(); | ||
4184 | } | ||
4185 | if (ins_nr > 0) { | ||
4186 | u64 last_extent = 0; | ||
4187 | |||
4188 | ret = copy_items(trans, inode, dst_path, path, | ||
4189 | &last_extent, start_slot, | ||
4190 | ins_nr, 1, 0); | ||
4191 | /* can't be 1, extent items aren't processed */ | ||
4192 | ASSERT(ret <= 0); | ||
4193 | if (ret < 0) | ||
4194 | return ret; | ||
4195 | } | ||
4196 | |||
4197 | return 0; | ||
4198 | } | ||
4199 | |||
4200 | /* | ||
4201 | * If the no holes feature is enabled we need to make sure any hole between the | ||
4202 | * last extent and the i_size of our inode is explicitly marked in the log. This | ||
4203 | * is to make sure that doing something like: | ||
4204 | * | ||
4205 | * 1) create file with 128Kb of data | ||
4206 | * 2) truncate file to 64Kb | ||
4207 | * 3) truncate file to 256Kb | ||
4208 | * 4) fsync file | ||
4209 | * 5) <crash/power failure> | ||
4210 | * 6) mount fs and trigger log replay | ||
4211 | * | ||
4212 | * Will give us a file with a size of 256Kb, the first 64Kb of data match what | ||
4213 | * the file had in its first 64Kb of data at step 1 and the last 192Kb of the | ||
4214 | * file correspond to a hole. The presence of explicit holes in a log tree is | ||
4215 | * what guarantees that log replay will remove/adjust file extent items in the | ||
4216 | * fs/subvol tree. | ||
4217 | * | ||
4218 | * Here we do not need to care about holes between extents, that is already done | ||
4219 | * by copy_items(). We also only need to do this in the full sync path, where we | ||
4220 | * lookup for extents from the fs/subvol tree only. In the fast path case, we | ||
4221 | * lookup the list of modified extent maps and if any represents a hole, we | ||
4222 | * insert a corresponding extent representing a hole in the log tree. | ||
4223 | */ | ||
4224 | static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, | ||
4225 | struct btrfs_root *root, | ||
4226 | struct inode *inode, | ||
4227 | struct btrfs_path *path) | ||
4228 | { | ||
4229 | int ret; | ||
4230 | struct btrfs_key key; | ||
4231 | u64 hole_start; | ||
4232 | u64 hole_size; | ||
4233 | struct extent_buffer *leaf; | ||
4234 | struct btrfs_root *log = root->log_root; | ||
4235 | const u64 ino = btrfs_ino(inode); | ||
4236 | const u64 i_size = i_size_read(inode); | ||
4237 | |||
4238 | if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) | ||
4239 | return 0; | ||
4240 | |||
4241 | key.objectid = ino; | ||
4242 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
4243 | key.offset = (u64)-1; | ||
4244 | |||
4245 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
4246 | ASSERT(ret != 0); | ||
4247 | if (ret < 0) | ||
4248 | return ret; | ||
4249 | |||
4250 | ASSERT(path->slots[0] > 0); | ||
4251 | path->slots[0]--; | ||
4252 | leaf = path->nodes[0]; | ||
4253 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
4254 | |||
4255 | if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { | ||
4256 | /* inode does not have any extents */ | ||
4257 | hole_start = 0; | ||
4258 | hole_size = i_size; | ||
4259 | } else { | ||
4260 | struct btrfs_file_extent_item *extent; | ||
4261 | u64 len; | ||
4262 | |||
4263 | /* | ||
4264 | * If there's an extent beyond i_size, an explicit hole was | ||
4265 | * already inserted by copy_items(). | ||
4266 | */ | ||
4267 | if (key.offset >= i_size) | ||
4268 | return 0; | ||
4269 | |||
4270 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
4271 | struct btrfs_file_extent_item); | ||
4272 | |||
4273 | if (btrfs_file_extent_type(leaf, extent) == | ||
4274 | BTRFS_FILE_EXTENT_INLINE) { | ||
4275 | len = btrfs_file_extent_inline_len(leaf, | ||
4276 | path->slots[0], | ||
4277 | extent); | ||
4278 | ASSERT(len == i_size); | ||
4279 | return 0; | ||
4280 | } | ||
4281 | |||
4282 | len = btrfs_file_extent_num_bytes(leaf, extent); | ||
4283 | /* Last extent goes beyond i_size, no need to log a hole. */ | ||
4284 | if (key.offset + len > i_size) | ||
4285 | return 0; | ||
4286 | hole_start = key.offset + len; | ||
4287 | hole_size = i_size - hole_start; | ||
4288 | } | ||
4289 | btrfs_release_path(path); | ||
4290 | |||
4291 | /* Last extent ends at i_size. */ | ||
4292 | if (hole_size == 0) | ||
4293 | return 0; | ||
4294 | |||
4295 | hole_size = ALIGN(hole_size, root->sectorsize); | ||
4296 | ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, | ||
4297 | hole_size, 0, hole_size, 0, 0, 0); | ||
4298 | return ret; | ||
4299 | } | ||
4300 | |||
4120 | /* log a single inode in the tree log. | 4301 | /* log a single inode in the tree log. |
4121 | * At least one parent directory for this inode must exist in the tree | 4302 | * At least one parent directory for this inode must exist in the tree |
4122 | * or be logged already. | 4303 | * or be logged already. |
@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4155 | u64 ino = btrfs_ino(inode); | 4336 | u64 ino = btrfs_ino(inode); |
4156 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 4337 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4157 | u64 logged_isize = 0; | 4338 | u64 logged_isize = 0; |
4339 | bool need_log_inode_item = true; | ||
4158 | 4340 | ||
4159 | path = btrfs_alloc_path(); | 4341 | path = btrfs_alloc_path(); |
4160 | if (!path) | 4342 | if (!path) |
@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4263 | } else { | 4445 | } else { |
4264 | if (inode_only == LOG_INODE_ALL) | 4446 | if (inode_only == LOG_INODE_ALL) |
4265 | fast_search = true; | 4447 | fast_search = true; |
4266 | ret = log_inode_item(trans, log, dst_path, inode); | ||
4267 | if (ret) { | ||
4268 | err = ret; | ||
4269 | goto out_unlock; | ||
4270 | } | ||
4271 | goto log_extents; | 4448 | goto log_extents; |
4272 | } | 4449 | } |
4273 | 4450 | ||
@@ -4290,6 +4467,28 @@ again: | |||
4290 | if (min_key.type > max_key.type) | 4467 | if (min_key.type > max_key.type) |
4291 | break; | 4468 | break; |
4292 | 4469 | ||
4470 | if (min_key.type == BTRFS_INODE_ITEM_KEY) | ||
4471 | need_log_inode_item = false; | ||
4472 | |||
4473 | /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ | ||
4474 | if (min_key.type == BTRFS_XATTR_ITEM_KEY) { | ||
4475 | if (ins_nr == 0) | ||
4476 | goto next_slot; | ||
4477 | ret = copy_items(trans, inode, dst_path, path, | ||
4478 | &last_extent, ins_start_slot, | ||
4479 | ins_nr, inode_only, logged_isize); | ||
4480 | if (ret < 0) { | ||
4481 | err = ret; | ||
4482 | goto out_unlock; | ||
4483 | } | ||
4484 | ins_nr = 0; | ||
4485 | if (ret) { | ||
4486 | btrfs_release_path(path); | ||
4487 | continue; | ||
4488 | } | ||
4489 | goto next_slot; | ||
4490 | } | ||
4491 | |||
4293 | src = path->nodes[0]; | 4492 | src = path->nodes[0]; |
4294 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { | 4493 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { |
4295 | ins_nr++; | 4494 | ins_nr++; |
@@ -4357,9 +4556,26 @@ next_slot: | |||
4357 | ins_nr = 0; | 4556 | ins_nr = 0; |
4358 | } | 4557 | } |
4359 | 4558 | ||
4559 | btrfs_release_path(path); | ||
4560 | btrfs_release_path(dst_path); | ||
4561 | err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); | ||
4562 | if (err) | ||
4563 | goto out_unlock; | ||
4564 | if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { | ||
4565 | btrfs_release_path(path); | ||
4566 | btrfs_release_path(dst_path); | ||
4567 | err = btrfs_log_trailing_hole(trans, root, inode, path); | ||
4568 | if (err) | ||
4569 | goto out_unlock; | ||
4570 | } | ||
4360 | log_extents: | 4571 | log_extents: |
4361 | btrfs_release_path(path); | 4572 | btrfs_release_path(path); |
4362 | btrfs_release_path(dst_path); | 4573 | btrfs_release_path(dst_path); |
4574 | if (need_log_inode_item) { | ||
4575 | err = log_inode_item(trans, log, dst_path, inode); | ||
4576 | if (err) | ||
4577 | goto out_unlock; | ||
4578 | } | ||
4363 | if (fast_search) { | 4579 | if (fast_search) { |
4364 | /* | 4580 | /* |
4365 | * Some ordered extents started by fsync might have completed | 4581 | * Some ordered extents started by fsync might have completed |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4b438b4c8c91..fbe7c104531c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
2766 | root = root->fs_info->chunk_root; | 2766 | root = root->fs_info->chunk_root; |
2767 | extent_root = root->fs_info->extent_root; | 2767 | extent_root = root->fs_info->extent_root; |
2768 | 2768 | ||
2769 | /* | ||
2770 | * Prevent races with automatic removal of unused block groups. | ||
2771 | * After we relocate and before we remove the chunk with offset | ||
2772 | * chunk_offset, automatic removal of the block group can kick in, | ||
2773 | * resulting in a failure when calling btrfs_remove_chunk() below. | ||
2774 | * | ||
2775 | * Make sure to acquire this mutex before doing a tree search (dev | ||
2776 | * or chunk trees) to find chunks. Otherwise the cleaner kthread might | ||
2777 | * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after | ||
2778 | * we release the path used to search the chunk/dev tree and before | ||
2779 | * the current task acquires this mutex and calls us. | ||
2780 | */ | ||
2781 | ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex)); | ||
2782 | |||
2769 | ret = btrfs_can_relocate(extent_root, chunk_offset); | 2783 | ret = btrfs_can_relocate(extent_root, chunk_offset); |
2770 | if (ret) | 2784 | if (ret) |
2771 | return -ENOSPC; | 2785 | return -ENOSPC; |
@@ -2814,13 +2828,18 @@ again: | |||
2814 | key.type = BTRFS_CHUNK_ITEM_KEY; | 2828 | key.type = BTRFS_CHUNK_ITEM_KEY; |
2815 | 2829 | ||
2816 | while (1) { | 2830 | while (1) { |
2831 | mutex_lock(&root->fs_info->delete_unused_bgs_mutex); | ||
2817 | ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); | 2832 | ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); |
2818 | if (ret < 0) | 2833 | if (ret < 0) { |
2834 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
2819 | goto error; | 2835 | goto error; |
2836 | } | ||
2820 | BUG_ON(ret == 0); /* Corruption */ | 2837 | BUG_ON(ret == 0); /* Corruption */ |
2821 | 2838 | ||
2822 | ret = btrfs_previous_item(chunk_root, path, key.objectid, | 2839 | ret = btrfs_previous_item(chunk_root, path, key.objectid, |
2823 | key.type); | 2840 | key.type); |
2841 | if (ret) | ||
2842 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
2824 | if (ret < 0) | 2843 | if (ret < 0) |
2825 | goto error; | 2844 | goto error; |
2826 | if (ret > 0) | 2845 | if (ret > 0) |
@@ -2843,6 +2862,7 @@ again: | |||
2843 | else | 2862 | else |
2844 | BUG_ON(ret); | 2863 | BUG_ON(ret); |
2845 | } | 2864 | } |
2865 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
2846 | 2866 | ||
2847 | if (found_key.offset == 0) | 2867 | if (found_key.offset == 0) |
2848 | break; | 2868 | break; |
@@ -3299,9 +3319,12 @@ again: | |||
3299 | goto error; | 3319 | goto error; |
3300 | } | 3320 | } |
3301 | 3321 | ||
3322 | mutex_lock(&fs_info->delete_unused_bgs_mutex); | ||
3302 | ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); | 3323 | ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); |
3303 | if (ret < 0) | 3324 | if (ret < 0) { |
3325 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | ||
3304 | goto error; | 3326 | goto error; |
3327 | } | ||
3305 | 3328 | ||
3306 | /* | 3329 | /* |
3307 | * this shouldn't happen, it means the last relocate | 3330 | * this shouldn't happen, it means the last relocate |
@@ -3313,6 +3336,7 @@ again: | |||
3313 | ret = btrfs_previous_item(chunk_root, path, 0, | 3336 | ret = btrfs_previous_item(chunk_root, path, 0, |
3314 | BTRFS_CHUNK_ITEM_KEY); | 3337 | BTRFS_CHUNK_ITEM_KEY); |
3315 | if (ret) { | 3338 | if (ret) { |
3339 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | ||
3316 | ret = 0; | 3340 | ret = 0; |
3317 | break; | 3341 | break; |
3318 | } | 3342 | } |
@@ -3321,8 +3345,10 @@ again: | |||
3321 | slot = path->slots[0]; | 3345 | slot = path->slots[0]; |
3322 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 3346 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
3323 | 3347 | ||
3324 | if (found_key.objectid != key.objectid) | 3348 | if (found_key.objectid != key.objectid) { |
3349 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | ||
3325 | break; | 3350 | break; |
3351 | } | ||
3326 | 3352 | ||
3327 | chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); | 3353 | chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); |
3328 | 3354 | ||
@@ -3335,10 +3361,13 @@ again: | |||
3335 | ret = should_balance_chunk(chunk_root, leaf, chunk, | 3361 | ret = should_balance_chunk(chunk_root, leaf, chunk, |
3336 | found_key.offset); | 3362 | found_key.offset); |
3337 | btrfs_release_path(path); | 3363 | btrfs_release_path(path); |
3338 | if (!ret) | 3364 | if (!ret) { |
3365 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | ||
3339 | goto loop; | 3366 | goto loop; |
3367 | } | ||
3340 | 3368 | ||
3341 | if (counting) { | 3369 | if (counting) { |
3370 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | ||
3342 | spin_lock(&fs_info->balance_lock); | 3371 | spin_lock(&fs_info->balance_lock); |
3343 | bctl->stat.expected++; | 3372 | bctl->stat.expected++; |
3344 | spin_unlock(&fs_info->balance_lock); | 3373 | spin_unlock(&fs_info->balance_lock); |
@@ -3348,6 +3377,7 @@ again: | |||
3348 | ret = btrfs_relocate_chunk(chunk_root, | 3377 | ret = btrfs_relocate_chunk(chunk_root, |
3349 | found_key.objectid, | 3378 | found_key.objectid, |
3350 | found_key.offset); | 3379 | found_key.offset); |
3380 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | ||
3351 | if (ret && ret != -ENOSPC) | 3381 | if (ret && ret != -ENOSPC) |
3352 | goto error; | 3382 | goto error; |
3353 | if (ret == -ENOSPC) { | 3383 | if (ret == -ENOSPC) { |
@@ -4087,11 +4117,16 @@ again: | |||
4087 | key.type = BTRFS_DEV_EXTENT_KEY; | 4117 | key.type = BTRFS_DEV_EXTENT_KEY; |
4088 | 4118 | ||
4089 | do { | 4119 | do { |
4120 | mutex_lock(&root->fs_info->delete_unused_bgs_mutex); | ||
4090 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4121 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4091 | if (ret < 0) | 4122 | if (ret < 0) { |
4123 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
4092 | goto done; | 4124 | goto done; |
4125 | } | ||
4093 | 4126 | ||
4094 | ret = btrfs_previous_item(root, path, 0, key.type); | 4127 | ret = btrfs_previous_item(root, path, 0, key.type); |
4128 | if (ret) | ||
4129 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
4095 | if (ret < 0) | 4130 | if (ret < 0) |
4096 | goto done; | 4131 | goto done; |
4097 | if (ret) { | 4132 | if (ret) { |
@@ -4105,6 +4140,7 @@ again: | |||
4105 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 4140 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
4106 | 4141 | ||
4107 | if (key.objectid != device->devid) { | 4142 | if (key.objectid != device->devid) { |
4143 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
4108 | btrfs_release_path(path); | 4144 | btrfs_release_path(path); |
4109 | break; | 4145 | break; |
4110 | } | 4146 | } |
@@ -4113,6 +4149,7 @@ again: | |||
4113 | length = btrfs_dev_extent_length(l, dev_extent); | 4149 | length = btrfs_dev_extent_length(l, dev_extent); |
4114 | 4150 | ||
4115 | if (key.offset + length <= new_size) { | 4151 | if (key.offset + length <= new_size) { |
4152 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
4116 | btrfs_release_path(path); | 4153 | btrfs_release_path(path); |
4117 | break; | 4154 | break; |
4118 | } | 4155 | } |
@@ -4122,6 +4159,7 @@ again: | |||
4122 | btrfs_release_path(path); | 4159 | btrfs_release_path(path); |
4123 | 4160 | ||
4124 | ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); | 4161 | ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); |
4162 | mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); | ||
4125 | if (ret && ret != -ENOSPC) | 4163 | if (ret && ret != -ENOSPC) |
4126 | goto done; | 4164 | goto done; |
4127 | if (ret == -ENOSPC) | 4165 | if (ret == -ENOSPC) |
@@ -5715,7 +5753,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e | |||
5715 | static void btrfs_end_bio(struct bio *bio, int err) | 5753 | static void btrfs_end_bio(struct bio *bio, int err) |
5716 | { | 5754 | { |
5717 | struct btrfs_bio *bbio = bio->bi_private; | 5755 | struct btrfs_bio *bbio = bio->bi_private; |
5718 | struct btrfs_device *dev = bbio->stripes[0].dev; | ||
5719 | int is_orig_bio = 0; | 5756 | int is_orig_bio = 0; |
5720 | 5757 | ||
5721 | if (err) { | 5758 | if (err) { |
@@ -5723,6 +5760,7 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
5723 | if (err == -EIO || err == -EREMOTEIO) { | 5760 | if (err == -EIO || err == -EREMOTEIO) { |
5724 | unsigned int stripe_index = | 5761 | unsigned int stripe_index = |
5725 | btrfs_io_bio(bio)->stripe_index; | 5762 | btrfs_io_bio(bio)->stripe_index; |
5763 | struct btrfs_device *dev; | ||
5726 | 5764 | ||
5727 | BUG_ON(stripe_index >= bbio->num_stripes); | 5765 | BUG_ON(stripe_index >= bbio->num_stripes); |
5728 | dev = bbio->stripes[stripe_index].dev; | 5766 | dev = bbio->stripes[stripe_index].dev; |