aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-27 12:14:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-27 12:14:17 -0400
commit1fb00cbca05ba13f386e75aa1f6d801895cfed29 (patch)
tree92250b6e29c6c29611d7aadc9d641277f548830d /fs/btrfs
parentc0fe5dcb91f75204e34f63269ea72b913b0923d1 (diff)
parent9e0af23764344f7f1b68e4eefbe7dc865018b63d (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "The biggest of these comes from Liu Bo, who tracked down a hang we've been hitting since moving to kernel workqueues (it's a btrfs bug, not in the generic code). His patch needs backporting to 3.16 and 3.15 stable, which I'll send once this is in. Otherwise these are assorted fixes. Most were integrated last week during KS, but I wanted to give everyone the chance to test the result, so I waited for rc2 to come out before sending" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (24 commits) Btrfs: fix task hang under heavy compressed write Btrfs: fix filemap_flush call in btrfs_file_release Btrfs: fix crash on endio of reading corrupted block btrfs: fix leak in qgroup_subtree_accounting() error path btrfs: Use right extent length when inserting overlap extent map. Btrfs: clone, don't create invalid hole extent map Btrfs: don't monopolize a core when evicting inode Btrfs: fix hole detection during file fsync Btrfs: ensure tmpfile inode is always persisted with link count of 0 Btrfs: race free update of commit root for ro snapshots Btrfs: fix regression of btrfs device replace Btrfs: don't consider the missing device when allocating new chunks Btrfs: Fix wrong device size when we are resizing the device Btrfs: don't write any data into a readonly device when scrub Btrfs: Fix the problem that the replace destroys the seed filesystem btrfs: Return right extent when fiemap gives unaligned offset and len. Btrfs: fix wrong extent mapping for DirectIO Btrfs: fix wrong write range for filemap_fdatawrite_range() Btrfs: fix wrong missing device counter decrease Btrfs: fix unzeroed members in fs_devices when creating a fs from seed fs ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/async-thread.c44
-rw-r--r--fs/btrfs/async-thread.h28
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c56
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/file.c17
-rw-r--r--fs/btrfs/inode.c109
-rw-r--r--fs/btrfs/ioctl.c36
-rw-r--r--fs/btrfs/ordered-data.c1
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/raid56.c9
-rw-r--r--fs/btrfs/reada.c3
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tree-log.c17
-rw-r--r--fs/btrfs/volumes.c65
17 files changed, 312 insertions, 135 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 5a201d81049c..fbd76ded9a34 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -22,7 +22,6 @@
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/workqueue.h>
26#include "async-thread.h" 25#include "async-thread.h"
27#include "ctree.h" 26#include "ctree.h"
28 27
@@ -55,8 +54,39 @@ struct btrfs_workqueue {
55 struct __btrfs_workqueue *high; 54 struct __btrfs_workqueue *high;
56}; 55};
57 56
58static inline struct __btrfs_workqueue 57static void normal_work_helper(struct btrfs_work *work);
59*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, 58
59#define BTRFS_WORK_HELPER(name) \
60void btrfs_##name(struct work_struct *arg) \
61{ \
62 struct btrfs_work *work = container_of(arg, struct btrfs_work, \
63 normal_work); \
64 normal_work_helper(work); \
65}
66
67BTRFS_WORK_HELPER(worker_helper);
68BTRFS_WORK_HELPER(delalloc_helper);
69BTRFS_WORK_HELPER(flush_delalloc_helper);
70BTRFS_WORK_HELPER(cache_helper);
71BTRFS_WORK_HELPER(submit_helper);
72BTRFS_WORK_HELPER(fixup_helper);
73BTRFS_WORK_HELPER(endio_helper);
74BTRFS_WORK_HELPER(endio_meta_helper);
75BTRFS_WORK_HELPER(endio_meta_write_helper);
76BTRFS_WORK_HELPER(endio_raid56_helper);
77BTRFS_WORK_HELPER(rmw_helper);
78BTRFS_WORK_HELPER(endio_write_helper);
79BTRFS_WORK_HELPER(freespace_write_helper);
80BTRFS_WORK_HELPER(delayed_meta_helper);
81BTRFS_WORK_HELPER(readahead_helper);
82BTRFS_WORK_HELPER(qgroup_rescan_helper);
83BTRFS_WORK_HELPER(extent_refs_helper);
84BTRFS_WORK_HELPER(scrub_helper);
85BTRFS_WORK_HELPER(scrubwrc_helper);
86BTRFS_WORK_HELPER(scrubnc_helper);
87
88static struct __btrfs_workqueue *
89__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
60 int thresh) 90 int thresh)
61{ 91{
62 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 92 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
232 spin_unlock_irqrestore(lock, flags); 262 spin_unlock_irqrestore(lock, flags);
233} 263}
234 264
235static void normal_work_helper(struct work_struct *arg) 265static void normal_work_helper(struct btrfs_work *work)
236{ 266{
237 struct btrfs_work *work;
238 struct __btrfs_workqueue *wq; 267 struct __btrfs_workqueue *wq;
239 int need_order = 0; 268 int need_order = 0;
240 269
241 work = container_of(arg, struct btrfs_work, normal_work);
242 /* 270 /*
243 * We should not touch things inside work in the following cases: 271 * We should not touch things inside work in the following cases:
244 * 1) after work->func() if it has no ordered_free 272 * 1) after work->func() if it has no ordered_free
@@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg)
262 trace_btrfs_all_work_done(work); 290 trace_btrfs_all_work_done(work);
263} 291}
264 292
265void btrfs_init_work(struct btrfs_work *work, 293void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
266 btrfs_func_t func, 294 btrfs_func_t func,
267 btrfs_func_t ordered_func, 295 btrfs_func_t ordered_func,
268 btrfs_func_t ordered_free) 296 btrfs_func_t ordered_free)
@@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work,
270 work->func = func; 298 work->func = func;
271 work->ordered_func = ordered_func; 299 work->ordered_func = ordered_func;
272 work->ordered_free = ordered_free; 300 work->ordered_free = ordered_free;
273 INIT_WORK(&work->normal_work, normal_work_helper); 301 INIT_WORK(&work->normal_work, uniq_func);
274 INIT_LIST_HEAD(&work->ordered_list); 302 INIT_LIST_HEAD(&work->ordered_list);
275 work->flags = 0; 303 work->flags = 0;
276} 304}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 9c6b66d15fb0..e9e31c94758f 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -19,12 +19,14 @@
19 19
20#ifndef __BTRFS_ASYNC_THREAD_ 20#ifndef __BTRFS_ASYNC_THREAD_
21#define __BTRFS_ASYNC_THREAD_ 21#define __BTRFS_ASYNC_THREAD_
22#include <linux/workqueue.h>
22 23
23struct btrfs_workqueue; 24struct btrfs_workqueue;
24/* Internal use only */ 25/* Internal use only */
25struct __btrfs_workqueue; 26struct __btrfs_workqueue;
26struct btrfs_work; 27struct btrfs_work;
27typedef void (*btrfs_func_t)(struct btrfs_work *arg); 28typedef void (*btrfs_func_t)(struct btrfs_work *arg);
29typedef void (*btrfs_work_func_t)(struct work_struct *arg);
28 30
29struct btrfs_work { 31struct btrfs_work {
30 btrfs_func_t func; 32 btrfs_func_t func;
@@ -38,11 +40,35 @@ struct btrfs_work {
38 unsigned long flags; 40 unsigned long flags;
39}; 41};
40 42
43#define BTRFS_WORK_HELPER_PROTO(name) \
44void btrfs_##name(struct work_struct *arg)
45
46BTRFS_WORK_HELPER_PROTO(worker_helper);
47BTRFS_WORK_HELPER_PROTO(delalloc_helper);
48BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
49BTRFS_WORK_HELPER_PROTO(cache_helper);
50BTRFS_WORK_HELPER_PROTO(submit_helper);
51BTRFS_WORK_HELPER_PROTO(fixup_helper);
52BTRFS_WORK_HELPER_PROTO(endio_helper);
53BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
54BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
55BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
56BTRFS_WORK_HELPER_PROTO(rmw_helper);
57BTRFS_WORK_HELPER_PROTO(endio_write_helper);
58BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
59BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
60BTRFS_WORK_HELPER_PROTO(readahead_helper);
61BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
62BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
63BTRFS_WORK_HELPER_PROTO(scrub_helper);
64BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
65BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
66
41struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 67struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
42 int flags, 68 int flags,
43 int max_active, 69 int max_active,
44 int thresh); 70 int thresh);
45void btrfs_init_work(struct btrfs_work *work, 71void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
46 btrfs_func_t func, 72 btrfs_func_t func,
47 btrfs_func_t ordered_func, 73 btrfs_func_t ordered_func,
48 btrfs_func_t ordered_free); 74 btrfs_func_t ordered_free);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index da775bfdebc9..a2e90f855d7d 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1395,8 +1395,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1395 return -ENOMEM; 1395 return -ENOMEM;
1396 1396
1397 async_work->delayed_root = delayed_root; 1397 async_work->delayed_root = delayed_root;
1398 btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, 1398 btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1399 NULL, NULL); 1399 btrfs_async_run_delayed_root, NULL, NULL);
1400 async_work->nr = nr; 1400 async_work->nr = nr;
1401 1401
1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); 1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0ed9e664f7d..a1d36e62179c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -39,7 +39,6 @@
39#include "btrfs_inode.h" 39#include "btrfs_inode.h"
40#include "volumes.h" 40#include "volumes.h"
41#include "print-tree.h" 41#include "print-tree.h"
42#include "async-thread.h"
43#include "locking.h" 42#include "locking.h"
44#include "tree-log.h" 43#include "tree-log.h"
45#include "free-space-cache.h" 44#include "free-space-cache.h"
@@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err)
693{ 692{
694 struct end_io_wq *end_io_wq = bio->bi_private; 693 struct end_io_wq *end_io_wq = bio->bi_private;
695 struct btrfs_fs_info *fs_info; 694 struct btrfs_fs_info *fs_info;
695 struct btrfs_workqueue *wq;
696 btrfs_work_func_t func;
696 697
697 fs_info = end_io_wq->info; 698 fs_info = end_io_wq->info;
698 end_io_wq->error = err; 699 end_io_wq->error = err;
699 btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
700 700
701 if (bio->bi_rw & REQ_WRITE) { 701 if (bio->bi_rw & REQ_WRITE) {
702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) 702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
703 btrfs_queue_work(fs_info->endio_meta_write_workers, 703 wq = fs_info->endio_meta_write_workers;
704 &end_io_wq->work); 704 func = btrfs_endio_meta_write_helper;
705 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) 705 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
706 btrfs_queue_work(fs_info->endio_freespace_worker, 706 wq = fs_info->endio_freespace_worker;
707 &end_io_wq->work); 707 func = btrfs_freespace_write_helper;
708 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 708 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
709 btrfs_queue_work(fs_info->endio_raid56_workers, 709 wq = fs_info->endio_raid56_workers;
710 &end_io_wq->work); 710 func = btrfs_endio_raid56_helper;
711 else 711 } else {
712 btrfs_queue_work(fs_info->endio_write_workers, 712 wq = fs_info->endio_write_workers;
713 &end_io_wq->work); 713 func = btrfs_endio_write_helper;
714 }
714 } else { 715 } else {
715 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 716 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
716 btrfs_queue_work(fs_info->endio_raid56_workers, 717 wq = fs_info->endio_raid56_workers;
717 &end_io_wq->work); 718 func = btrfs_endio_raid56_helper;
718 else if (end_io_wq->metadata) 719 } else if (end_io_wq->metadata) {
719 btrfs_queue_work(fs_info->endio_meta_workers, 720 wq = fs_info->endio_meta_workers;
720 &end_io_wq->work); 721 func = btrfs_endio_meta_helper;
721 else 722 } else {
722 btrfs_queue_work(fs_info->endio_workers, 723 wq = fs_info->endio_workers;
723 &end_io_wq->work); 724 func = btrfs_endio_helper;
725 }
724 } 726 }
727
728 btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
729 btrfs_queue_work(wq, &end_io_wq->work);
725} 730}
726 731
727/* 732/*
@@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
828 async->submit_bio_start = submit_bio_start; 833 async->submit_bio_start = submit_bio_start;
829 async->submit_bio_done = submit_bio_done; 834 async->submit_bio_done = submit_bio_done;
830 835
831 btrfs_init_work(&async->work, run_one_async_start, 836 btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
832 run_one_async_done, run_one_async_free); 837 run_one_async_done, run_one_async_free);
833 838
834 async->bio_flags = bio_flags; 839 async->bio_flags = bio_flags;
@@ -3450,7 +3455,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3450 btrfs_set_stack_device_generation(dev_item, 0); 3455 btrfs_set_stack_device_generation(dev_item, 0);
3451 btrfs_set_stack_device_type(dev_item, dev->type); 3456 btrfs_set_stack_device_type(dev_item, dev->type);
3452 btrfs_set_stack_device_id(dev_item, dev->devid); 3457 btrfs_set_stack_device_id(dev_item, dev->devid);
3453 btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); 3458 btrfs_set_stack_device_total_bytes(dev_item,
3459 dev->disk_total_bytes);
3454 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); 3460 btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
3455 btrfs_set_stack_device_io_align(dev_item, dev->io_align); 3461 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
3456 btrfs_set_stack_device_io_width(dev_item, dev->io_width); 3462 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 102ed3143976..3efe1c3877bf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -552,7 +552,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
552 caching_ctl->block_group = cache; 552 caching_ctl->block_group = cache;
553 caching_ctl->progress = cache->key.objectid; 553 caching_ctl->progress = cache->key.objectid;
554 atomic_set(&caching_ctl->count, 1); 554 atomic_set(&caching_ctl->count, 1);
555 btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); 555 btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
556 caching_thread, NULL, NULL);
556 557
557 spin_lock(&cache->lock); 558 spin_lock(&cache->lock);
558 /* 559 /*
@@ -2749,8 +2750,8 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
2749 async->sync = 0; 2750 async->sync = 0;
2750 init_completion(&async->wait); 2751 init_completion(&async->wait);
2751 2752
2752 btrfs_init_work(&async->work, delayed_ref_async_start, 2753 btrfs_init_work(&async->work, btrfs_extent_refs_helper,
2753 NULL, NULL); 2754 delayed_ref_async_start, NULL, NULL);
2754 2755
2755 btrfs_queue_work(root->fs_info->extent_workers, &async->work); 2756 btrfs_queue_work(root->fs_info->extent_workers, &async->work);
2756 2757
@@ -3586,13 +3587,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3586 */ 3587 */
3587static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3588static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3588{ 3589{
3589 /* 3590 u64 num_devices = root->fs_info->fs_devices->rw_devices;
3590 * we add in the count of missing devices because we want
3591 * to make sure that any RAID levels on a degraded FS
3592 * continue to be honored.
3593 */
3594 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3595 root->fs_info->fs_devices->missing_devices;
3596 u64 target; 3591 u64 target;
3597 u64 tmp; 3592 u64 tmp;
3598 3593
@@ -8440,13 +8435,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8440 if (stripped) 8435 if (stripped)
8441 return extended_to_chunk(stripped); 8436 return extended_to_chunk(stripped);
8442 8437
8443 /* 8438 num_devices = root->fs_info->fs_devices->rw_devices;
8444 * we add in the count of missing devices because we want
8445 * to make sure that any RAID levels on a degraded FS
8446 * continue to be honored.
8447 */
8448 num_devices = root->fs_info->fs_devices->rw_devices +
8449 root->fs_info->fs_devices->missing_devices;
8450 8439
8451 stripped = BTRFS_BLOCK_GROUP_RAID0 | 8440 stripped = BTRFS_BLOCK_GROUP_RAID0 |
8452 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | 8441 BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e11aab9f391..af0359dcf337 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2532,6 +2532,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2532 test_bit(BIO_UPTODATE, &bio->bi_flags); 2532 test_bit(BIO_UPTODATE, &bio->bi_flags);
2533 if (err) 2533 if (err)
2534 uptodate = 0; 2534 uptodate = 0;
2535 offset += len;
2535 continue; 2536 continue;
2536 } 2537 }
2537 } 2538 }
@@ -4207,8 +4208,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4207 return -ENOMEM; 4208 return -ENOMEM;
4208 path->leave_spinning = 1; 4209 path->leave_spinning = 1;
4209 4210
4210 start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); 4211 start = round_down(start, BTRFS_I(inode)->root->sectorsize);
4211 len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); 4212 len = round_up(max, BTRFS_I(inode)->root->sectorsize) - start;
4212 4213
4213 /* 4214 /*
4214 * lookup the last file extent. We're not using i_size here 4215 * lookup the last file extent. We're not using i_size here
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d3afac292d67..36861b7a6757 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1840,7 +1840,15 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1840{ 1840{
1841 if (filp->private_data) 1841 if (filp->private_data)
1842 btrfs_ioctl_trans_end(filp); 1842 btrfs_ioctl_trans_end(filp);
1843 filemap_flush(inode->i_mapping); 1843 /*
1844 * ordered_data_close is set by settattr when we are about to truncate
1845 * a file from a non-zero size to a zero size. This tries to
1846 * flush down new bytes that may have been written if the
1847 * application were using truncate to replace a file in place.
1848 */
1849 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1850 &BTRFS_I(inode)->runtime_flags))
1851 filemap_flush(inode->i_mapping);
1844 return 0; 1852 return 0;
1845} 1853}
1846 1854
@@ -2088,10 +2096,9 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2088 goto out; 2096 goto out;
2089 } 2097 }
2090 2098
2091 if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) { 2099 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2092 u64 num_bytes; 2100 u64 num_bytes;
2093 2101
2094 path->slots[0]++;
2095 key.offset = offset; 2102 key.offset = offset;
2096 btrfs_set_item_key_safe(root, path, &key); 2103 btrfs_set_item_key_safe(root, path, &key);
2097 fi = btrfs_item_ptr(leaf, path->slots[0], 2104 fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -2216,7 +2223,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2216 goto out_only_mutex; 2223 goto out_only_mutex;
2217 } 2224 }
2218 2225
2219 lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize); 2226 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2220 lockend = round_down(offset + len, 2227 lockend = round_down(offset + len,
2221 BTRFS_I(inode)->root->sectorsize) - 1; 2228 BTRFS_I(inode)->root->sectorsize) - 1;
2222 same_page = ((offset >> PAGE_CACHE_SHIFT) == 2229 same_page = ((offset >> PAGE_CACHE_SHIFT) ==
@@ -2277,7 +2284,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2277 tail_start + tail_len, 0, 1); 2284 tail_start + tail_len, 0, 1);
2278 if (ret) 2285 if (ret)
2279 goto out_only_mutex; 2286 goto out_only_mutex;
2280 } 2287 }
2281 } 2288 }
2282 } 2289 }
2283 2290
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 03708ef3deef..9c194bd74d6e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1096,8 +1096,10 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1096 async_cow->end = cur_end; 1096 async_cow->end = cur_end;
1097 INIT_LIST_HEAD(&async_cow->extents); 1097 INIT_LIST_HEAD(&async_cow->extents);
1098 1098
1099 btrfs_init_work(&async_cow->work, async_cow_start, 1099 btrfs_init_work(&async_cow->work,
1100 async_cow_submit, async_cow_free); 1100 btrfs_delalloc_helper,
1101 async_cow_start, async_cow_submit,
1102 async_cow_free);
1101 1103
1102 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> 1104 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
1103 PAGE_CACHE_SHIFT; 1105 PAGE_CACHE_SHIFT;
@@ -1881,7 +1883,8 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1881 1883
1882 SetPageChecked(page); 1884 SetPageChecked(page);
1883 page_cache_get(page); 1885 page_cache_get(page);
1884 btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); 1886 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
1887 btrfs_writepage_fixup_worker, NULL, NULL);
1885 fixup->page = page; 1888 fixup->page = page;
1886 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); 1889 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
1887 return -EBUSY; 1890 return -EBUSY;
@@ -2822,7 +2825,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2822 struct inode *inode = page->mapping->host; 2825 struct inode *inode = page->mapping->host;
2823 struct btrfs_root *root = BTRFS_I(inode)->root; 2826 struct btrfs_root *root = BTRFS_I(inode)->root;
2824 struct btrfs_ordered_extent *ordered_extent = NULL; 2827 struct btrfs_ordered_extent *ordered_extent = NULL;
2825 struct btrfs_workqueue *workers; 2828 struct btrfs_workqueue *wq;
2829 btrfs_work_func_t func;
2826 2830
2827 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); 2831 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
2828 2832
@@ -2831,13 +2835,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2831 end - start + 1, uptodate)) 2835 end - start + 1, uptodate))
2832 return 0; 2836 return 0;
2833 2837
2834 btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); 2838 if (btrfs_is_free_space_inode(inode)) {
2839 wq = root->fs_info->endio_freespace_worker;
2840 func = btrfs_freespace_write_helper;
2841 } else {
2842 wq = root->fs_info->endio_write_workers;
2843 func = btrfs_endio_write_helper;
2844 }
2835 2845
2836 if (btrfs_is_free_space_inode(inode)) 2846 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
2837 workers = root->fs_info->endio_freespace_worker; 2847 NULL);
2838 else 2848 btrfs_queue_work(wq, &ordered_extent->work);
2839 workers = root->fs_info->endio_write_workers;
2840 btrfs_queue_work(workers, &ordered_extent->work);
2841 2849
2842 return 0; 2850 return 0;
2843} 2851}
@@ -4674,6 +4682,11 @@ static void evict_inode_truncate_pages(struct inode *inode)
4674 clear_bit(EXTENT_FLAG_LOGGING, &em->flags); 4682 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
4675 remove_extent_mapping(map_tree, em); 4683 remove_extent_mapping(map_tree, em);
4676 free_extent_map(em); 4684 free_extent_map(em);
4685 if (need_resched()) {
4686 write_unlock(&map_tree->lock);
4687 cond_resched();
4688 write_lock(&map_tree->lock);
4689 }
4677 } 4690 }
4678 write_unlock(&map_tree->lock); 4691 write_unlock(&map_tree->lock);
4679 4692
@@ -4696,6 +4709,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
4696 &cached_state, GFP_NOFS); 4709 &cached_state, GFP_NOFS);
4697 free_extent_state(state); 4710 free_extent_state(state);
4698 4711
4712 cond_resched();
4699 spin_lock(&io_tree->lock); 4713 spin_lock(&io_tree->lock);
4700 } 4714 }
4701 spin_unlock(&io_tree->lock); 4715 spin_unlock(&io_tree->lock);
@@ -5181,6 +5195,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5181 iput(inode); 5195 iput(inode);
5182 inode = ERR_PTR(ret); 5196 inode = ERR_PTR(ret);
5183 } 5197 }
5198 /*
5199 * If orphan cleanup did remove any orphans, it means the tree
5200 * was modified and therefore the commit root is not the same as
5201 * the current root anymore. This is a problem, because send
5202 * uses the commit root and therefore can see inode items that
5203 * don't exist in the current root anymore, and for example make
5204 * calls to btrfs_iget, which will do tree lookups based on the
5205 * current root and not on the commit root. Those lookups will
5206 * fail, returning a -ESTALE error, and making send fail with
5207 * that error. So make sure a send does not see any orphans we
5208 * have just removed, and that it will see the same inodes
5209 * regardless of whether a transaction commit happened before
5210 * it started (meaning that the commit root will be the same as
5211 * the current root) or not.
5212 */
5213 if (sub_root->node != sub_root->commit_root) {
5214 u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
5215
5216 if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
5217 struct extent_buffer *eb;
5218
5219 /*
5220 * Assert we can't have races between dentry
5221 * lookup called through the snapshot creation
5222 * ioctl and the VFS.
5223 */
5224 ASSERT(mutex_is_locked(&dir->i_mutex));
5225
5226 down_write(&root->fs_info->commit_root_sem);
5227 eb = sub_root->commit_root;
5228 sub_root->commit_root =
5229 btrfs_root_node(sub_root);
5230 up_write(&root->fs_info->commit_root_sem);
5231 free_extent_buffer(eb);
5232 }
5233 }
5184 } 5234 }
5185 5235
5186 return inode; 5236 return inode;
@@ -5606,6 +5656,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5606 } 5656 }
5607 5657
5608 /* 5658 /*
5659 * O_TMPFILE, set link count to 0, so that after this point,
5660 * we fill in an inode item with the correct link count.
5661 */
5662 if (!name)
5663 set_nlink(inode, 0);
5664
5665 /*
5609 * we have to initialize this early, so we can reclaim the inode 5666 * we have to initialize this early, so we can reclaim the inode
5610 * number if we fail afterwards in this function. 5667 * number if we fail afterwards in this function.
5611 */ 5668 */
@@ -6097,14 +6154,14 @@ out_fail:
6097static int merge_extent_mapping(struct extent_map_tree *em_tree, 6154static int merge_extent_mapping(struct extent_map_tree *em_tree,
6098 struct extent_map *existing, 6155 struct extent_map *existing,
6099 struct extent_map *em, 6156 struct extent_map *em,
6100 u64 map_start, u64 map_len) 6157 u64 map_start)
6101{ 6158{
6102 u64 start_diff; 6159 u64 start_diff;
6103 6160
6104 BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); 6161 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
6105 start_diff = map_start - em->start; 6162 start_diff = map_start - em->start;
6106 em->start = map_start; 6163 em->start = map_start;
6107 em->len = map_len; 6164 em->len = existing->start - em->start;
6108 if (em->block_start < EXTENT_MAP_LAST_BYTE && 6165 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
6109 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 6166 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
6110 em->block_start += start_diff; 6167 em->block_start += start_diff;
@@ -6275,6 +6332,8 @@ next:
6275 goto not_found; 6332 goto not_found;
6276 if (start + len <= found_key.offset) 6333 if (start + len <= found_key.offset)
6277 goto not_found; 6334 goto not_found;
6335 if (start > found_key.offset)
6336 goto next;
6278 em->start = start; 6337 em->start = start;
6279 em->orig_start = start; 6338 em->orig_start = start;
6280 em->len = found_key.offset - start; 6339 em->len = found_key.offset - start;
@@ -6390,8 +6449,7 @@ insert:
6390 em->len); 6449 em->len);
6391 if (existing) { 6450 if (existing) {
6392 err = merge_extent_mapping(em_tree, existing, 6451 err = merge_extent_mapping(em_tree, existing,
6393 em, start, 6452 em, start);
6394 root->sectorsize);
6395 free_extent_map(existing); 6453 free_extent_map(existing);
6396 if (err) { 6454 if (err) {
6397 free_extent_map(em); 6455 free_extent_map(em);
@@ -7158,7 +7216,8 @@ again:
7158 if (!ret) 7216 if (!ret)
7159 goto out_test; 7217 goto out_test;
7160 7218
7161 btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); 7219 btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
7220 finish_ordered_fn, NULL, NULL);
7162 btrfs_queue_work(root->fs_info->endio_write_workers, 7221 btrfs_queue_work(root->fs_info->endio_write_workers,
7163 &ordered->work); 7222 &ordered->work);
7164out_test: 7223out_test:
@@ -7306,10 +7365,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7306 map_length = orig_bio->bi_iter.bi_size; 7365 map_length = orig_bio->bi_iter.bi_size;
7307 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, 7366 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
7308 &map_length, NULL, 0); 7367 &map_length, NULL, 0);
7309 if (ret) { 7368 if (ret)
7310 bio_put(orig_bio);
7311 return -EIO; 7369 return -EIO;
7312 }
7313 7370
7314 if (map_length >= orig_bio->bi_iter.bi_size) { 7371 if (map_length >= orig_bio->bi_iter.bi_size) {
7315 bio = orig_bio; 7372 bio = orig_bio;
@@ -7326,6 +7383,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7326 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); 7383 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
7327 if (!bio) 7384 if (!bio)
7328 return -ENOMEM; 7385 return -ENOMEM;
7386
7329 bio->bi_private = dip; 7387 bio->bi_private = dip;
7330 bio->bi_end_io = btrfs_end_dio_bio; 7388 bio->bi_end_io = btrfs_end_dio_bio;
7331 atomic_inc(&dip->pending_bios); 7389 atomic_inc(&dip->pending_bios);
@@ -7534,7 +7592,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7534 count = iov_iter_count(iter); 7592 count = iov_iter_count(iter);
7535 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 7593 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7536 &BTRFS_I(inode)->runtime_flags)) 7594 &BTRFS_I(inode)->runtime_flags))
7537 filemap_fdatawrite_range(inode->i_mapping, offset, count); 7595 filemap_fdatawrite_range(inode->i_mapping, offset,
7596 offset + count - 1);
7538 7597
7539 if (rw & WRITE) { 7598 if (rw & WRITE) {
7540 /* 7599 /*
@@ -8495,7 +8554,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
8495 work->inode = inode; 8554 work->inode = inode;
8496 work->wait = wait; 8555 work->wait = wait;
8497 work->delay_iput = delay_iput; 8556 work->delay_iput = delay_iput;
8498 btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); 8557 WARN_ON_ONCE(!inode);
8558 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
8559 btrfs_run_delalloc_work, NULL, NULL);
8499 8560
8500 return work; 8561 return work;
8501} 8562}
@@ -8979,6 +9040,14 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
8979 if (ret) 9040 if (ret)
8980 goto out; 9041 goto out;
8981 9042
9043 /*
9044 * We set number of links to 0 in btrfs_new_inode(), and here we set
9045 * it to 1 because d_tmpfile() will issue a warning if the count is 0,
9046 * through:
9047 *
9048 * d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
9049 */
9050 set_nlink(inode, 1);
8982 d_tmpfile(dentry, inode); 9051 d_tmpfile(dentry, inode);
8983 mark_inode_dirty(inode); 9052 mark_inode_dirty(inode);
8984 9053
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 47aceb494d1d..fce6fd0e3f50 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
711 if (ret) 711 if (ret)
712 goto fail; 712 goto fail;
713 713
714 ret = btrfs_orphan_cleanup(pending_snapshot->snap);
715 if (ret)
716 goto fail;
717
718 /*
719 * If orphan cleanup did remove any orphans, it means the tree was
720 * modified and therefore the commit root is not the same as the
721 * current root anymore. This is a problem, because send uses the
722 * commit root and therefore can see inode items that don't exist
723 * in the current root anymore, and for example make calls to
724 * btrfs_iget, which will do tree lookups based on the current root
725 * and not on the commit root. Those lookups will fail, returning a
726 * -ESTALE error, and making send fail with that error. So make sure
727 * a send does not see any orphans we have just removed, and that it
728 * will see the same inodes regardless of whether a transaction
729 * commit happened before it started (meaning that the commit root
730 * will be the same as the current root) or not.
731 */
732 if (readonly && pending_snapshot->snap->node !=
733 pending_snapshot->snap->commit_root) {
734 trans = btrfs_join_transaction(pending_snapshot->snap);
735 if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) {
736 ret = PTR_ERR(trans);
737 goto fail;
738 }
739 if (!IS_ERR(trans)) {
740 ret = btrfs_commit_transaction(trans,
741 pending_snapshot->snap);
742 if (ret)
743 goto fail;
744 }
745 }
746
747 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 714 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
748 if (IS_ERR(inode)) { 715 if (IS_ERR(inode)) {
749 ret = PTR_ERR(inode); 716 ret = PTR_ERR(inode);
@@ -3527,7 +3494,8 @@ process_slot:
3527 btrfs_mark_buffer_dirty(leaf); 3494 btrfs_mark_buffer_dirty(leaf);
3528 btrfs_release_path(path); 3495 btrfs_release_path(path);
3529 3496
3530 last_dest_end = new_key.offset + datal; 3497 last_dest_end = ALIGN(new_key.offset + datal,
3498 root->sectorsize);
3531 ret = clone_finish_inode_update(trans, inode, 3499 ret = clone_finish_inode_update(trans, inode,
3532 last_dest_end, 3500 last_dest_end,
3533 destoff, olen); 3501 destoff, olen);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 963895c1f801..ac734ec4cc20 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -615,6 +615,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
615 spin_unlock(&root->ordered_extent_lock); 615 spin_unlock(&root->ordered_extent_lock);
616 616
617 btrfs_init_work(&ordered->flush_work, 617 btrfs_init_work(&ordered->flush_work,
618 btrfs_flush_delalloc_helper,
618 btrfs_run_ordered_extent_work, NULL, NULL); 619 btrfs_run_ordered_extent_work, NULL, NULL);
619 list_add_tail(&ordered->work_list, &works); 620 list_add_tail(&ordered->work_list, &works);
620 btrfs_queue_work(root->fs_info->flush_workers, 621 btrfs_queue_work(root->fs_info->flush_workers,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b497498484be..ded5c601d916 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1973,7 +1973,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1973 elem.seq, &roots); 1973 elem.seq, &roots);
1974 btrfs_put_tree_mod_seq(fs_info, &elem); 1974 btrfs_put_tree_mod_seq(fs_info, &elem);
1975 if (ret < 0) 1975 if (ret < 0)
1976 return ret; 1976 goto out;
1977 1977
1978 if (roots->nnodes != 1) 1978 if (roots->nnodes != 1)
1979 goto out; 1979 goto out;
@@ -2720,6 +2720,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2720 memset(&fs_info->qgroup_rescan_work, 0, 2720 memset(&fs_info->qgroup_rescan_work, 0,
2721 sizeof(fs_info->qgroup_rescan_work)); 2721 sizeof(fs_info->qgroup_rescan_work));
2722 btrfs_init_work(&fs_info->qgroup_rescan_work, 2722 btrfs_init_work(&fs_info->qgroup_rescan_work,
2723 btrfs_qgroup_rescan_helper,
2723 btrfs_qgroup_rescan_worker, NULL, NULL); 2724 btrfs_qgroup_rescan_worker, NULL, NULL);
2724 2725
2725 if (ret) { 2726 if (ret) {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 4a88f073fdd7..0a6b6e4bcbb9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1416,7 +1416,8 @@ cleanup:
1416 1416
1417static void async_rmw_stripe(struct btrfs_raid_bio *rbio) 1417static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1418{ 1418{
1419 btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); 1419 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1420 rmw_work, NULL, NULL);
1420 1421
1421 btrfs_queue_work(rbio->fs_info->rmw_workers, 1422 btrfs_queue_work(rbio->fs_info->rmw_workers,
1422 &rbio->work); 1423 &rbio->work);
@@ -1424,7 +1425,8 @@ static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1424 1425
1425static void async_read_rebuild(struct btrfs_raid_bio *rbio) 1426static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1426{ 1427{
1427 btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); 1428 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1429 read_rebuild_work, NULL, NULL);
1428 1430
1429 btrfs_queue_work(rbio->fs_info->rmw_workers, 1431 btrfs_queue_work(rbio->fs_info->rmw_workers,
1430 &rbio->work); 1432 &rbio->work);
@@ -1665,7 +1667,8 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1665 plug = container_of(cb, struct btrfs_plug_cb, cb); 1667 plug = container_of(cb, struct btrfs_plug_cb, cb);
1666 1668
1667 if (from_schedule) { 1669 if (from_schedule) {
1668 btrfs_init_work(&plug->work, unplug_work, NULL, NULL); 1670 btrfs_init_work(&plug->work, btrfs_rmw_helper,
1671 unplug_work, NULL, NULL);
1669 btrfs_queue_work(plug->info->rmw_workers, 1672 btrfs_queue_work(plug->info->rmw_workers,
1670 &plug->work); 1673 &plug->work);
1671 return; 1674 return;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 09230cf3a244..20408c6b665a 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -798,7 +798,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
798 /* FIXME we cannot handle this properly right now */ 798 /* FIXME we cannot handle this properly right now */
799 BUG(); 799 BUG();
800 } 800 }
801 btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); 801 btrfs_init_work(&rmw->work, btrfs_readahead_helper,
802 reada_start_machine_worker, NULL, NULL);
802 rmw->fs_info = fs_info; 803 rmw->fs_info = fs_info;
803 804
804 btrfs_queue_work(fs_info->readahead_workers, &rmw->work); 805 btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b6d198f5181e..f4a41f37be22 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -428,8 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
428 sbio->index = i; 428 sbio->index = i;
429 sbio->sctx = sctx; 429 sbio->sctx = sctx;
430 sbio->page_count = 0; 430 sbio->page_count = 0;
431 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, 431 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
432 NULL, NULL); 432 scrub_bio_end_io_worker, NULL, NULL);
433 433
434 if (i != SCRUB_BIOS_PER_SCTX - 1) 434 if (i != SCRUB_BIOS_PER_SCTX - 1)
435 sctx->bios[i]->next_free = i + 1; 435 sctx->bios[i]->next_free = i + 1;
@@ -999,8 +999,8 @@ nodatasum_case:
999 fixup_nodatasum->root = fs_info->extent_root; 999 fixup_nodatasum->root = fs_info->extent_root;
1000 fixup_nodatasum->mirror_num = failed_mirror_index + 1; 1000 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1001 scrub_pending_trans_workers_inc(sctx); 1001 scrub_pending_trans_workers_inc(sctx);
1002 btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, 1002 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1003 NULL, NULL); 1003 scrub_fixup_nodatasum, NULL, NULL);
1004 btrfs_queue_work(fs_info->scrub_workers, 1004 btrfs_queue_work(fs_info->scrub_workers,
1005 &fixup_nodatasum->work); 1005 &fixup_nodatasum->work);
1006 goto out; 1006 goto out;
@@ -1616,7 +1616,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err)
1616 sbio->err = err; 1616 sbio->err = err;
1617 sbio->bio = bio; 1617 sbio->bio = bio;
1618 1618
1619 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); 1619 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1620 scrub_wr_bio_end_io_worker, NULL, NULL);
1620 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); 1621 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1621} 1622}
1622 1623
@@ -2904,6 +2905,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2904 struct scrub_ctx *sctx; 2905 struct scrub_ctx *sctx;
2905 int ret; 2906 int ret;
2906 struct btrfs_device *dev; 2907 struct btrfs_device *dev;
2908 struct rcu_string *name;
2907 2909
2908 if (btrfs_fs_closing(fs_info)) 2910 if (btrfs_fs_closing(fs_info))
2909 return -EINVAL; 2911 return -EINVAL;
@@ -2965,6 +2967,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2965 return -ENODEV; 2967 return -ENODEV;
2966 } 2968 }
2967 2969
2970 if (!is_dev_replace && !readonly && !dev->writeable) {
2971 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2972 rcu_read_lock();
2973 name = rcu_dereference(dev->name);
2974 btrfs_err(fs_info, "scrub: device %s is not writable",
2975 name->str);
2976 rcu_read_unlock();
2977 return -EROFS;
2978 }
2979
2968 mutex_lock(&fs_info->scrub_lock); 2980 mutex_lock(&fs_info->scrub_lock);
2969 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { 2981 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2970 mutex_unlock(&fs_info->scrub_lock); 2982 mutex_unlock(&fs_info->scrub_lock);
@@ -3203,7 +3215,8 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3203 nocow_ctx->len = len; 3215 nocow_ctx->len = len;
3204 nocow_ctx->mirror_num = mirror_num; 3216 nocow_ctx->mirror_num = mirror_num;
3205 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; 3217 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3206 btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); 3218 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
3219 copy_nocow_pages_worker, NULL, NULL);
3207 INIT_LIST_HEAD(&nocow_ctx->inodes); 3220 INIT_LIST_HEAD(&nocow_ctx->inodes);
3208 btrfs_queue_work(fs_info->scrub_nocow_workers, 3221 btrfs_queue_work(fs_info->scrub_nocow_workers,
3209 &nocow_ctx->work); 3222 &nocow_ctx->work);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 78699364f537..12e53556e214 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -614,7 +614,7 @@ int btrfs_kobj_rm_device(struct btrfs_fs_info *fs_info,
614 if (!fs_info->device_dir_kobj) 614 if (!fs_info->device_dir_kobj)
615 return -EINVAL; 615 return -EINVAL;
616 616
617 if (one_device) { 617 if (one_device && one_device->bdev) {
618 disk = one_device->bdev->bd_part; 618 disk = one_device->bdev->bd_part;
619 disk_kobj = &part_to_dev(disk)->kobj; 619 disk_kobj = &part_to_dev(disk)->kobj;
620 620
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9e1f2cd5e67a..7e0e6e3029dd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3298,7 +3298,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3298 struct list_head ordered_sums; 3298 struct list_head ordered_sums;
3299 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3299 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3300 bool has_extents = false; 3300 bool has_extents = false;
3301 bool need_find_last_extent = (*last_extent == 0); 3301 bool need_find_last_extent = true;
3302 bool done = false; 3302 bool done = false;
3303 3303
3304 INIT_LIST_HEAD(&ordered_sums); 3304 INIT_LIST_HEAD(&ordered_sums);
@@ -3352,8 +3352,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3352 */ 3352 */
3353 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { 3353 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
3354 has_extents = true; 3354 has_extents = true;
3355 if (need_find_last_extent && 3355 if (first_key.objectid == (u64)-1)
3356 first_key.objectid == (u64)-1)
3357 first_key = ins_keys[i]; 3356 first_key = ins_keys[i];
3358 } else { 3357 } else {
3359 need_find_last_extent = false; 3358 need_find_last_extent = false;
@@ -3427,6 +3426,16 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3427 if (!has_extents) 3426 if (!has_extents)
3428 return ret; 3427 return ret;
3429 3428
3429 if (need_find_last_extent && *last_extent == first_key.offset) {
3430 /*
3431 * We don't have any leafs between our current one and the one
3432 * we processed before that can have file extent items for our
3433 * inode (and have a generation number smaller than our current
3434 * transaction id).
3435 */
3436 need_find_last_extent = false;
3437 }
3438
3430 /* 3439 /*
3431 * Because we use btrfs_search_forward we could skip leaves that were 3440 * Because we use btrfs_search_forward we could skip leaves that were
3432 * not modified and then assume *last_extent is valid when it really 3441 * not modified and then assume *last_extent is valid when it really
@@ -3537,7 +3546,7 @@ fill_holes:
3537 0, 0); 3546 0, 0);
3538 if (ret) 3547 if (ret)
3539 break; 3548 break;
3540 *last_extent = offset + len; 3549 *last_extent = extent_end;
3541 } 3550 }
3542 /* 3551 /*
3543 * Need to let the callers know we dropped the path so they should 3552 * Need to let the callers know we dropped the path so they should
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6cb82f62cb7c..340a92d08e84 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -508,6 +508,44 @@ static noinline int device_list_add(const char *path,
508 ret = 1; 508 ret = 1;
509 device->fs_devices = fs_devices; 509 device->fs_devices = fs_devices;
510 } else if (!device->name || strcmp(device->name->str, path)) { 510 } else if (!device->name || strcmp(device->name->str, path)) {
511 /*
512 * When FS is already mounted.
513 * 1. If you are here and if the device->name is NULL that
514 * means this device was missing at time of FS mount.
515 * 2. If you are here and if the device->name is different
516 * from 'path' that means either
517 * a. The same device disappeared and reappeared with
518 * different name. or
519 * b. The missing-disk-which-was-replaced, has
520 * reappeared now.
521 *
522 * We must allow 1 and 2a above. But 2b would be a spurious
523 * and unintentional.
524 *
525 * Further in case of 1 and 2a above, the disk at 'path'
526 * would have missed some transaction when it was away and
527 * in case of 2a the stale bdev has to be updated as well.
528 * 2b must not be allowed at all time.
529 */
530
531 /*
532 * As of now don't allow update to btrfs_fs_device through
533 * the btrfs dev scan cli, after FS has been mounted.
534 */
535 if (fs_devices->opened) {
536 return -EBUSY;
537 } else {
538 /*
539 * That is if the FS is _not_ mounted and if you
540 * are here, that means there is more than one
541 * disk with same uuid and devid.We keep the one
542 * with larger generation number or the last-in if
543 * generation are equal.
544 */
545 if (found_transid < device->generation)
546 return -EEXIST;
547 }
548
511 name = rcu_string_strdup(path, GFP_NOFS); 549 name = rcu_string_strdup(path, GFP_NOFS);
512 if (!name) 550 if (!name)
513 return -ENOMEM; 551 return -ENOMEM;
@@ -519,6 +557,15 @@ static noinline int device_list_add(const char *path,
519 } 557 }
520 } 558 }
521 559
560 /*
561 * Unmount does not free the btrfs_device struct but would zero
562 * generation along with most of the other members. So just update
563 * it back. We need it to pick the disk with largest generation
564 * (as above).
565 */
566 if (!fs_devices->opened)
567 device->generation = found_transid;
568
522 if (found_transid > fs_devices->latest_trans) { 569 if (found_transid > fs_devices->latest_trans) {
523 fs_devices->latest_devid = devid; 570 fs_devices->latest_devid = devid;
524 fs_devices->latest_trans = found_transid; 571 fs_devices->latest_trans = found_transid;
@@ -1436,7 +1483,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
1436 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 1483 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1437 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 1484 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1438 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 1485 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1439 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 1486 btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1440 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 1487 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1441 btrfs_set_device_group(leaf, dev_item, 0); 1488 btrfs_set_device_group(leaf, dev_item, 0);
1442 btrfs_set_device_seek_speed(leaf, dev_item, 0); 1489 btrfs_set_device_seek_speed(leaf, dev_item, 0);
@@ -1671,7 +1718,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1671 device->fs_devices->total_devices--; 1718 device->fs_devices->total_devices--;
1672 1719
1673 if (device->missing) 1720 if (device->missing)
1674 root->fs_info->fs_devices->missing_devices--; 1721 device->fs_devices->missing_devices--;
1675 1722
1676 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1723 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1677 struct btrfs_device, dev_list); 1724 struct btrfs_device, dev_list);
@@ -1801,8 +1848,12 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1801 if (srcdev->bdev) { 1848 if (srcdev->bdev) {
1802 fs_info->fs_devices->open_devices--; 1849 fs_info->fs_devices->open_devices--;
1803 1850
1804 /* zero out the old super */ 1851 /*
1805 btrfs_scratch_superblock(srcdev); 1852 * zero out the old super if it is not writable
1853 * (e.g. seed device)
1854 */
1855 if (srcdev->writeable)
1856 btrfs_scratch_superblock(srcdev);
1806 } 1857 }
1807 1858
1808 call_rcu(&srcdev->rcu, free_device); 1859 call_rcu(&srcdev->rcu, free_device);
@@ -1941,6 +1992,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1941 fs_devices->seeding = 0; 1992 fs_devices->seeding = 0;
1942 fs_devices->num_devices = 0; 1993 fs_devices->num_devices = 0;
1943 fs_devices->open_devices = 0; 1994 fs_devices->open_devices = 0;
1995 fs_devices->missing_devices = 0;
1996 fs_devices->num_can_discard = 0;
1997 fs_devices->rotating = 0;
1944 fs_devices->seed = seed_devices; 1998 fs_devices->seed = seed_devices;
1945 1999
1946 generate_random_uuid(fs_devices->fsid); 2000 generate_random_uuid(fs_devices->fsid);
@@ -5800,7 +5854,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
5800 else 5854 else
5801 generate_random_uuid(dev->uuid); 5855 generate_random_uuid(dev->uuid);
5802 5856
5803 btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); 5857 btrfs_init_work(&dev->work, btrfs_submit_helper,
5858 pending_bios_fn, NULL, NULL);
5804 5859
5805 return dev; 5860 return dev;
5806} 5861}