aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-09-29 11:19:10 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-29 11:19:10 -0400
commit8c8bee1d7ca47fc75b6bd24a8085c525a2394c02 (patch)
treeeb10010a7baa7ab73b068f9d9a95d15b7d79e300
parent1a40e23b95da45051ee4d74374c58ae87a14051c (diff)
Btrfs: Wait for IO on the block device inodes of newly added devices
btrfs-vol -a /dev/xxx will zero the first and last two MB of the device. The kernel code needs to wait for this IO to finish before it adds the device. btrfs metadata IO does not happen through the block device inode. A separate address space is used, allowing the zero filled buffer heads in the block device inode to be written to disk after FS metadata starts going down to the disk via the btrfs metadata inode. The end result is zero filled metadata blocks after adding new devices into the filesystem. The fix is a simple filemap_write_and_wait on the block device inode before actually inserting it into the pool of available devices. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/inode.c13
-rw-r--r--fs/btrfs/volumes.c1
4 files changed, 22 insertions, 4 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2775e270881e..0079b60b18f3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -610,6 +610,7 @@ struct btrfs_fs_info {
610 struct list_head dead_roots; 610 struct list_head dead_roots;
611 611
612 atomic_t nr_async_submits; 612 atomic_t nr_async_submits;
613 atomic_t async_submit_draining;
613 atomic_t nr_async_bios; 614 atomic_t nr_async_bios;
614 atomic_t tree_log_writers; 615 atomic_t tree_log_writers;
615 atomic_t tree_log_commit; 616 atomic_t tree_log_commit;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 45bc3132b054..45b4f7285275 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -460,6 +460,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
460 async->submit_bio_hook = submit_bio_hook; 460 async->submit_bio_hook = submit_bio_hook;
461 async->work.func = run_one_async_submit; 461 async->work.func = run_one_async_submit;
462 async->work.flags = 0; 462 async->work.flags = 0;
463
464 while(atomic_read(&fs_info->async_submit_draining) &&
465 atomic_read(&fs_info->nr_async_submits)) {
466 wait_event(fs_info->async_submit_wait,
467 (atomic_read(&fs_info->nr_async_submits) == 0));
468 }
469
463 atomic_inc(&fs_info->nr_async_submits); 470 atomic_inc(&fs_info->nr_async_submits);
464 btrfs_queue_worker(&fs_info->workers, &async->work); 471 btrfs_queue_worker(&fs_info->workers, &async->work);
465 472
@@ -495,11 +502,8 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
495 int mirror_num) 502 int mirror_num)
496{ 503{
497 struct btrfs_root *root = BTRFS_I(inode)->root; 504 struct btrfs_root *root = BTRFS_I(inode)->root;
498 u64 offset;
499 int ret; 505 int ret;
500 506
501 offset = bio->bi_sector << 9;
502
503 /* 507 /*
504 * when we're called for a write, we're already in the async 508 * when we're called for a write, we're already in the async
505 * submission context. Just jump into btrfs_map_bio 509 * submission context. Just jump into btrfs_map_bio
@@ -1360,6 +1364,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1360 INIT_LIST_HEAD(&fs_info->space_info); 1364 INIT_LIST_HEAD(&fs_info->space_info);
1361 btrfs_mapping_init(&fs_info->mapping_tree); 1365 btrfs_mapping_init(&fs_info->mapping_tree);
1362 atomic_set(&fs_info->nr_async_submits, 0); 1366 atomic_set(&fs_info->nr_async_submits, 0);
1367 atomic_set(&fs_info->async_submit_draining, 0);
1363 atomic_set(&fs_info->nr_async_bios, 0); 1368 atomic_set(&fs_info->nr_async_bios, 0);
1364 atomic_set(&fs_info->throttles, 0); 1369 atomic_set(&fs_info->throttles, 0);
1365 atomic_set(&fs_info->throttle_gen, 0); 1370 atomic_set(&fs_info->throttle_gen, 0);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4516fbf01671..404704d26822 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3440,13 +3440,24 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
3440 list_del_init(&binode->delalloc_inodes); 3440 list_del_init(&binode->delalloc_inodes);
3441 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); 3441 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
3442 if (inode) { 3442 if (inode) {
3443 filemap_write_and_wait(inode->i_mapping); 3443 filemap_flush(inode->i_mapping);
3444 iput(inode); 3444 iput(inode);
3445 } 3445 }
3446 cond_resched(); 3446 cond_resched();
3447 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); 3447 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
3448 } 3448 }
3449 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); 3449 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
3450
3451 /* the filemap_flush will queue IO into the worker threads, but
3452 * we have to make sure the IO is actually started and that
3453 * ordered extents get created before we return
3454 */
3455 atomic_inc(&root->fs_info->async_submit_draining);
3456 while(atomic_read(&root->fs_info->nr_async_submits)) {
3457 wait_event(root->fs_info->async_submit_wait,
3458 (atomic_read(&root->fs_info->nr_async_submits) == 0));
3459 }
3460 atomic_dec(&root->fs_info->async_submit_draining);
3450 return 0; 3461 return 0;
3451} 3462}
3452 3463
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 51f113119b25..f63cf7621a01 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1038,6 +1038,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1038 return -EIO; 1038 return -EIO;
1039 } 1039 }
1040 1040
1041 filemap_write_and_wait(bdev->bd_inode->i_mapping);
1041 mutex_lock(&root->fs_info->volume_mutex); 1042 mutex_lock(&root->fs_info->volume_mutex);
1042 1043
1043 trans = btrfs_start_transaction(root, 1); 1044 trans = btrfs_start_transaction(root, 1);