diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-05-07 11:43:44 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:02 -0400 |
commit | a061fc8da7b990faa41ca503e66faef3ecdeead0 (patch) | |
tree | 0d3b5b6e4d2164d507d9a16d5b38d373592a5c8f /fs/btrfs/disk-io.c | |
parent | 5d9cd9ecbf40b8bd5045a3c2f1feb35db6a12266 (diff) |
Btrfs: Add support for online device removal
This required a few structural changes to the code that manages bdev pointers:
The VFS super block now gets an anon-bdev instead of a pointer to the
lowest bdev. This allows us to avoid swapping the super block bdev pointer
around at run time.
The code to read in the super block no longer goes through the extent
buffer interface. Things got ugly keeping the mapping constant.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 101 |
1 files changed, 46 insertions, 55 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fabc31b334b6..9d5424ad01a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -78,9 +78,13 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, | |||
78 | 78 | ||
79 | spin_lock(&em_tree->lock); | 79 | spin_lock(&em_tree->lock); |
80 | em = lookup_extent_mapping(em_tree, start, len); | 80 | em = lookup_extent_mapping(em_tree, start, len); |
81 | spin_unlock(&em_tree->lock); | 81 | if (em) { |
82 | if (em) | 82 | em->bdev = |
83 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | ||
84 | spin_unlock(&em_tree->lock); | ||
83 | goto out; | 85 | goto out; |
86 | } | ||
87 | spin_unlock(&em_tree->lock); | ||
84 | 88 | ||
85 | em = alloc_extent_map(GFP_NOFS); | 89 | em = alloc_extent_map(GFP_NOFS); |
86 | if (!em) { | 90 | if (!em) { |
@@ -90,7 +94,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, | |||
90 | em->start = 0; | 94 | em->start = 0; |
91 | em->len = (u64)-1; | 95 | em->len = (u64)-1; |
92 | em->block_start = 0; | 96 | em->block_start = 0; |
93 | em->bdev = inode->i_sb->s_bdev; | 97 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
94 | 98 | ||
95 | spin_lock(&em_tree->lock); | 99 | spin_lock(&em_tree->lock); |
96 | ret = add_extent_mapping(em_tree, em); | 100 | ret = add_extent_mapping(em_tree, em); |
@@ -435,11 +439,6 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
435 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); | 439 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); |
436 | BUG_ON(ret); | 440 | BUG_ON(ret); |
437 | 441 | ||
438 | if (offset == BTRFS_SUPER_INFO_OFFSET) { | ||
439 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; | ||
440 | submit_bio(rw, bio); | ||
441 | return 0; | ||
442 | } | ||
443 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); | 442 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); |
444 | } | 443 | } |
445 | 444 | ||
@@ -587,8 +586,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) | |||
587 | list = &fs_info->fs_devices->devices; | 586 | list = &fs_info->fs_devices->devices; |
588 | list_for_each(next, list) { | 587 | list_for_each(next, list) { |
589 | device = list_entry(next, struct btrfs_device, dev_list); | 588 | device = list_entry(next, struct btrfs_device, dev_list); |
590 | if (device->bdev && device->bdev != fs_info->sb->s_bdev) | 589 | close_bdev_excl(device->bdev); |
591 | close_bdev_excl(device->bdev); | ||
592 | device->bdev = NULL; | 590 | device->bdev = NULL; |
593 | } | 591 | } |
594 | return 0; | 592 | return 0; |
@@ -1118,6 +1116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1118 | u32 leafsize; | 1116 | u32 leafsize; |
1119 | u32 blocksize; | 1117 | u32 blocksize; |
1120 | u32 stripesize; | 1118 | u32 stripesize; |
1119 | struct buffer_head *bh; | ||
1121 | struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), | 1120 | struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), |
1122 | GFP_NOFS); | 1121 | GFP_NOFS); |
1123 | struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), | 1122 | struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), |
@@ -1153,7 +1152,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1153 | spin_lock_init(&fs_info->new_trans_lock); | 1152 | spin_lock_init(&fs_info->new_trans_lock); |
1154 | 1153 | ||
1155 | init_completion(&fs_info->kobj_unregister); | 1154 | init_completion(&fs_info->kobj_unregister); |
1156 | sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE); | ||
1157 | fs_info->tree_root = tree_root; | 1155 | fs_info->tree_root = tree_root; |
1158 | fs_info->extent_root = extent_root; | 1156 | fs_info->extent_root = extent_root; |
1159 | fs_info->chunk_root = chunk_root; | 1157 | fs_info->chunk_root = chunk_root; |
@@ -1170,6 +1168,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1170 | fs_info->btree_inode->i_ino = 1; | 1168 | fs_info->btree_inode->i_ino = 1; |
1171 | fs_info->btree_inode->i_nlink = 1; | 1169 | fs_info->btree_inode->i_nlink = 1; |
1172 | 1170 | ||
1171 | sb->s_blocksize = 4096; | ||
1172 | sb->s_blocksize_bits = blksize_bits(4096); | ||
1173 | |||
1173 | /* | 1174 | /* |
1174 | * we set the i_size on the btree inode to the max possible int. | 1175 | * we set the i_size on the btree inode to the max possible int. |
1175 | * the real end of the address space is determined by all of | 1176 | * the real end of the address space is determined by all of |
@@ -1229,19 +1230,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1229 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 1230 | __setup_root(4096, 4096, 4096, 4096, tree_root, |
1230 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1231 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1231 | 1232 | ||
1232 | fs_info->sb_buffer = read_tree_block(tree_root, | ||
1233 | BTRFS_SUPER_INFO_OFFSET, | ||
1234 | 4096); | ||
1235 | 1233 | ||
1236 | if (!fs_info->sb_buffer) | 1234 | bh = __bread(fs_devices->latest_bdev, |
1235 | BTRFS_SUPER_INFO_OFFSET / 4096, 4096); | ||
1236 | if (!bh) | ||
1237 | goto fail_iput; | 1237 | goto fail_iput; |
1238 | 1238 | ||
1239 | read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, | 1239 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1240 | sizeof(fs_info->super_copy)); | 1240 | brelse(bh); |
1241 | 1241 | ||
1242 | read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, | 1242 | memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); |
1243 | (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), | ||
1244 | BTRFS_FSID_SIZE); | ||
1245 | 1243 | ||
1246 | disk_super = &fs_info->super_copy; | 1244 | disk_super = &fs_info->super_copy; |
1247 | if (!btrfs_super_root(disk_super)) | 1245 | if (!btrfs_super_root(disk_super)) |
@@ -1263,7 +1261,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1263 | tree_root->leafsize = leafsize; | 1261 | tree_root->leafsize = leafsize; |
1264 | tree_root->sectorsize = sectorsize; | 1262 | tree_root->sectorsize = sectorsize; |
1265 | tree_root->stripesize = stripesize; | 1263 | tree_root->stripesize = stripesize; |
1266 | sb_set_blocksize(sb, sectorsize); | 1264 | |
1265 | sb->s_blocksize = sectorsize; | ||
1266 | sb->s_blocksize_bits = blksize_bits(sectorsize); | ||
1267 | 1267 | ||
1268 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | 1268 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, |
1269 | sizeof(disk_super->magic))) { | 1269 | sizeof(disk_super->magic))) { |
@@ -1339,7 +1339,6 @@ fail_tree_root: | |||
1339 | fail_sys_array: | 1339 | fail_sys_array: |
1340 | mutex_unlock(&fs_info->fs_mutex); | 1340 | mutex_unlock(&fs_info->fs_mutex); |
1341 | fail_sb_buffer: | 1341 | fail_sb_buffer: |
1342 | free_extent_buffer(fs_info->sb_buffer); | ||
1343 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); | 1342 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); |
1344 | fail_iput: | 1343 | fail_iput: |
1345 | iput(fs_info->btree_inode); | 1344 | iput(fs_info->btree_inode); |
@@ -1380,41 +1379,44 @@ int write_all_supers(struct btrfs_root *root) | |||
1380 | struct list_head *cur; | 1379 | struct list_head *cur; |
1381 | struct list_head *head = &root->fs_info->fs_devices->devices; | 1380 | struct list_head *head = &root->fs_info->fs_devices->devices; |
1382 | struct btrfs_device *dev; | 1381 | struct btrfs_device *dev; |
1383 | struct extent_buffer *sb; | 1382 | struct btrfs_super_block *sb; |
1384 | struct btrfs_dev_item *dev_item; | 1383 | struct btrfs_dev_item *dev_item; |
1385 | struct buffer_head *bh; | 1384 | struct buffer_head *bh; |
1386 | int ret; | 1385 | int ret; |
1387 | int do_barriers; | 1386 | int do_barriers; |
1388 | int max_errors; | 1387 | int max_errors; |
1389 | int total_errors = 0; | 1388 | int total_errors = 0; |
1389 | u32 crc; | ||
1390 | u64 flags; | ||
1390 | 1391 | ||
1391 | max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | 1392 | max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; |
1392 | do_barriers = !btrfs_test_opt(root, NOBARRIER); | 1393 | do_barriers = !btrfs_test_opt(root, NOBARRIER); |
1393 | 1394 | ||
1394 | sb = root->fs_info->sb_buffer; | 1395 | sb = &root->fs_info->super_for_commit; |
1395 | dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, | 1396 | dev_item = &sb->dev_item; |
1396 | dev_item); | ||
1397 | list_for_each(cur, head) { | 1397 | list_for_each(cur, head) { |
1398 | dev = list_entry(cur, struct btrfs_device, dev_list); | 1398 | dev = list_entry(cur, struct btrfs_device, dev_list); |
1399 | btrfs_set_device_type(sb, dev_item, dev->type); | 1399 | btrfs_set_stack_device_type(dev_item, dev->type); |
1400 | btrfs_set_device_id(sb, dev_item, dev->devid); | 1400 | btrfs_set_stack_device_id(dev_item, dev->devid); |
1401 | btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes); | 1401 | btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); |
1402 | btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used); | 1402 | btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); |
1403 | btrfs_set_device_io_align(sb, dev_item, dev->io_align); | 1403 | btrfs_set_stack_device_io_align(dev_item, dev->io_align); |
1404 | btrfs_set_device_io_width(sb, dev_item, dev->io_width); | 1404 | btrfs_set_stack_device_io_width(dev_item, dev->io_width); |
1405 | btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); | 1405 | btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); |
1406 | write_extent_buffer(sb, dev->uuid, | 1406 | memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); |
1407 | (unsigned long)btrfs_device_uuid(dev_item), | 1407 | flags = btrfs_super_flags(sb); |
1408 | BTRFS_UUID_SIZE); | 1408 | btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); |
1409 | 1409 | ||
1410 | btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); | 1410 | |
1411 | csum_tree_block(root, sb, 0); | 1411 | crc = ~(u32)0; |
1412 | 1412 | crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, | |
1413 | bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / | 1413 | BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); |
1414 | root->fs_info->sb->s_blocksize, | 1414 | btrfs_csum_final(crc, sb->csum); |
1415 | |||
1416 | bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, | ||
1415 | BTRFS_SUPER_INFO_SIZE); | 1417 | BTRFS_SUPER_INFO_SIZE); |
1416 | 1418 | ||
1417 | read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE); | 1419 | memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); |
1418 | dev->pending_io = bh; | 1420 | dev->pending_io = bh; |
1419 | 1421 | ||
1420 | get_bh(bh); | 1422 | get_bh(bh); |
@@ -1483,15 +1485,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1483 | int ret; | 1485 | int ret; |
1484 | 1486 | ||
1485 | ret = write_all_supers(root); | 1487 | ret = write_all_supers(root); |
1486 | #if 0 | ||
1487 | if (!btrfs_test_opt(root, NOBARRIER)) | ||
1488 | blkdev_issue_flush(sb->s_bdev, NULL); | ||
1489 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); | ||
1490 | ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, | ||
1491 | super->start, super->len); | ||
1492 | if (!btrfs_test_opt(root, NOBARRIER)) | ||
1493 | blkdev_issue_flush(sb->s_bdev, NULL); | ||
1494 | #endif | ||
1495 | return ret; | 1488 | return ret; |
1496 | } | 1489 | } |
1497 | 1490 | ||
@@ -1570,8 +1563,6 @@ int close_ctree(struct btrfs_root *root) | |||
1570 | if (root->fs_info->dev_root->node); | 1563 | if (root->fs_info->dev_root->node); |
1571 | free_extent_buffer(root->fs_info->dev_root->node); | 1564 | free_extent_buffer(root->fs_info->dev_root->node); |
1572 | 1565 | ||
1573 | free_extent_buffer(fs_info->sb_buffer); | ||
1574 | |||
1575 | btrfs_free_block_groups(root->fs_info); | 1566 | btrfs_free_block_groups(root->fs_info); |
1576 | del_fs_roots(fs_info); | 1567 | del_fs_roots(fs_info); |
1577 | 1568 | ||
@@ -1652,7 +1643,7 @@ void btrfs_throttle(struct btrfs_root *root) | |||
1652 | { | 1643 | { |
1653 | struct backing_dev_info *bdi; | 1644 | struct backing_dev_info *bdi; |
1654 | 1645 | ||
1655 | bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | 1646 | bdi = &root->fs_info->bdi; |
1656 | if (root->fs_info->throttles && bdi_write_congested(bdi)) { | 1647 | if (root->fs_info->throttles && bdi_write_congested(bdi)) { |
1657 | #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) | 1648 | #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) |
1658 | congestion_wait(WRITE, HZ/20); | 1649 | congestion_wait(WRITE, HZ/20); |