aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2008-11-17 21:11:30 -0500
committerChris Mason <chris.mason@oracle.com>2008-11-17 21:11:30 -0500
commit2b82032c34ec40515d3c45c36cd1961f37977de8 (patch)
treefbdfe7b13dd51983dfca4aeb75983b37ee186ff9 /fs/btrfs/volumes.c
parentc146afad2c7fea6a366d4945c1bab9b03880f526 (diff)
Btrfs: Seed device support
Seed device is a special btrfs with SEEDING super flag set and can only be mounted in read-only mode. Seed devices allow people to create new btrfs on top of it. The new FS contains the same contents as the seed device, but it can be mounted in read-write mode. This patch does the following: 1) split code in btrfs_alloc_chunk into two parts. The first part does makes the newly allocated chunk usable, but does not do any operation that modifies the chunk tree. The second part does the the chunk tree modifications. This division is for the bootstrap step of adding storage to the seed device. 2) Update device management code to handle seed device. The basic idea is: For an FS grown from seed devices, its seed devices are put into a list. Seed devices are opened on demand at mounting time. If any seed device is missing or has been changed, btrfs kernel module will refuse to mount the FS. 3) make btrfs_find_block_group not return NULL when all block groups are read-only. Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c1103
1 files changed, 834 insertions, 269 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 80a27284dbf1..d6f1996de629 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,12 @@ struct map_lookup {
40 struct btrfs_bio_stripe stripes[]; 40 struct btrfs_bio_stripe stripes[];
41}; 41};
42 42
43static int init_first_rw_device(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root,
45 struct btrfs_device *device);
46static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
47
48
43#define map_lookup_size(n) (sizeof(struct map_lookup) + \ 49#define map_lookup_size(n) (sizeof(struct map_lookup) + \
44 (sizeof(struct btrfs_bio_stripe) * (n))) 50 (sizeof(struct btrfs_bio_stripe) * (n)))
45 51
@@ -69,25 +75,31 @@ static void unlock_chunks(struct btrfs_root *root)
69int btrfs_cleanup_fs_uuids(void) 75int btrfs_cleanup_fs_uuids(void)
70{ 76{
71 struct btrfs_fs_devices *fs_devices; 77 struct btrfs_fs_devices *fs_devices;
72 struct list_head *uuid_cur;
73 struct list_head *devices_cur;
74 struct btrfs_device *dev; 78 struct btrfs_device *dev;
75 79
76 list_for_each(uuid_cur, &fs_uuids) { 80 while (!list_empty(&fs_uuids)) {
77 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, 81 fs_devices = list_entry(fs_uuids.next,
78 list); 82 struct btrfs_fs_devices, list);
83 list_del(&fs_devices->list);
79 while(!list_empty(&fs_devices->devices)) { 84 while(!list_empty(&fs_devices->devices)) {
80 devices_cur = fs_devices->devices.next; 85 dev = list_entry(fs_devices->devices.next,
81 dev = list_entry(devices_cur, struct btrfs_device, 86 struct btrfs_device, dev_list);
82 dev_list);
83 if (dev->bdev) { 87 if (dev->bdev) {
84 close_bdev_excl(dev->bdev); 88 close_bdev_excl(dev->bdev);
85 fs_devices->open_devices--; 89 fs_devices->open_devices--;
86 } 90 }
91 fs_devices->num_devices--;
92 if (dev->writeable)
93 fs_devices->rw_devices--;
87 list_del(&dev->dev_list); 94 list_del(&dev->dev_list);
95 list_del(&dev->dev_alloc_list);
88 kfree(dev->name); 96 kfree(dev->name);
89 kfree(dev); 97 kfree(dev);
90 } 98 }
99 WARN_ON(fs_devices->num_devices);
100 WARN_ON(fs_devices->open_devices);
101 WARN_ON(fs_devices->rw_devices);
102 kfree(fs_devices);
91 } 103 }
92 return 0; 104 return 0;
93} 105}
@@ -257,6 +269,9 @@ static noinline int device_list_add(const char *path,
257 disk_super->dev_item.uuid); 269 disk_super->dev_item.uuid);
258 } 270 }
259 if (!device) { 271 if (!device) {
272 if (fs_devices->opened)
273 return -EBUSY;
274
260 device = kzalloc(sizeof(*device), GFP_NOFS); 275 device = kzalloc(sizeof(*device), GFP_NOFS);
261 if (!device) { 276 if (!device) {
262 /* we can safely leave the fs_devices entry around */ 277 /* we can safely leave the fs_devices entry around */
@@ -273,8 +288,9 @@ static noinline int device_list_add(const char *path,
273 kfree(device); 288 kfree(device);
274 return -ENOMEM; 289 return -ENOMEM;
275 } 290 }
291 INIT_LIST_HEAD(&device->dev_alloc_list);
276 list_add(&device->dev_list, &fs_devices->devices); 292 list_add(&device->dev_list, &fs_devices->devices);
277 list_add(&device->dev_alloc_list, &fs_devices->alloc_list); 293 device->fs_devices = fs_devices;
278 fs_devices->num_devices++; 294 fs_devices->num_devices++;
279 } 295 }
280 296
@@ -288,58 +304,94 @@ static noinline int device_list_add(const char *path,
288 304
289int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) 305int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
290{ 306{
291 struct list_head *head = &fs_devices->devices; 307 struct list_head *tmp;
292 struct list_head *cur; 308 struct list_head *cur;
293 struct btrfs_device *device; 309 struct btrfs_device *device;
310 int seed_devices = 0;
294 311
295 mutex_lock(&uuid_mutex); 312 mutex_lock(&uuid_mutex);
296again: 313again:
297 list_for_each(cur, head) { 314 list_for_each_safe(cur, tmp, &fs_devices->devices) {
298 device = list_entry(cur, struct btrfs_device, dev_list); 315 device = list_entry(cur, struct btrfs_device, dev_list);
299 if (!device->in_fs_metadata) { 316 if (device->in_fs_metadata)
300 struct block_device *bdev; 317 continue;
301 list_del(&device->dev_list); 318
302 list_del(&device->dev_alloc_list); 319 if (device->bdev) {
320 close_bdev_excl(device->bdev);
321 device->bdev = NULL;
322 fs_devices->open_devices--;
323 }
324 if (device->writeable) {
325 list_del_init(&device->dev_alloc_list);
326 device->writeable = 0;
327 fs_devices->rw_devices--;
328 }
329 if (!seed_devices) {
330 list_del_init(&device->dev_list);
303 fs_devices->num_devices--; 331 fs_devices->num_devices--;
304 if (device->bdev) {
305 bdev = device->bdev;
306 fs_devices->open_devices--;
307 mutex_unlock(&uuid_mutex);
308 close_bdev_excl(bdev);
309 mutex_lock(&uuid_mutex);
310 }
311 kfree(device->name); 332 kfree(device->name);
312 kfree(device); 333 kfree(device);
313 goto again;
314 } 334 }
315 } 335 }
336
337 if (fs_devices->seed) {
338 fs_devices = fs_devices->seed;
339 seed_devices = 1;
340 goto again;
341 }
342
316 mutex_unlock(&uuid_mutex); 343 mutex_unlock(&uuid_mutex);
317 return 0; 344 return 0;
318} 345}
319 346
320int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) 347static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
321{ 348{
322 struct list_head *head = &fs_devices->devices; 349 struct btrfs_fs_devices *seed_devices;
323 struct list_head *cur; 350 struct list_head *cur;
324 struct btrfs_device *device; 351 struct btrfs_device *device;
352again:
353 if (--fs_devices->opened > 0)
354 return 0;
325 355
326 mutex_lock(&uuid_mutex); 356 list_for_each(cur, &fs_devices->devices) {
327 list_for_each(cur, head) {
328 device = list_entry(cur, struct btrfs_device, dev_list); 357 device = list_entry(cur, struct btrfs_device, dev_list);
329 if (device->bdev) { 358 if (device->bdev) {
330 close_bdev_excl(device->bdev); 359 close_bdev_excl(device->bdev);
331 fs_devices->open_devices--; 360 fs_devices->open_devices--;
332 } 361 }
362 if (device->writeable) {
363 list_del_init(&device->dev_alloc_list);
364 fs_devices->rw_devices--;
365 }
366
333 device->bdev = NULL; 367 device->bdev = NULL;
368 device->writeable = 0;
334 device->in_fs_metadata = 0; 369 device->in_fs_metadata = 0;
335 } 370 }
336 fs_devices->mounted = 0; 371 fs_devices->opened = 0;
337 mutex_unlock(&uuid_mutex); 372 fs_devices->seeding = 0;
373 fs_devices->sprouted = 0;
374
375 seed_devices = fs_devices->seed;
376 fs_devices->seed = NULL;
377 if (seed_devices) {
378 fs_devices = seed_devices;
379 goto again;
380 }
338 return 0; 381 return 0;
339} 382}
340 383
341int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 384int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
342 int flags, void *holder) 385{
386 int ret;
387
388 mutex_lock(&uuid_mutex);
389 ret = __btrfs_close_devices(fs_devices);
390 mutex_unlock(&uuid_mutex);
391 return ret;
392}
393
394int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, void *holder)
343{ 395{
344 struct block_device *bdev; 396 struct block_device *bdev;
345 struct list_head *head = &fs_devices->devices; 397 struct list_head *head = &fs_devices->devices;
@@ -350,24 +402,18 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
350 struct btrfs_super_block *disk_super; 402 struct btrfs_super_block *disk_super;
351 u64 latest_devid = 0; 403 u64 latest_devid = 0;
352 u64 latest_transid = 0; 404 u64 latest_transid = 0;
353 u64 transid;
354 u64 devid; 405 u64 devid;
406 int seeding = 1;
355 int ret = 0; 407 int ret = 0;
356 408
357 mutex_lock(&uuid_mutex);
358 if (fs_devices->mounted)
359 goto out;
360
361 list_for_each(cur, head) { 409 list_for_each(cur, head) {
362 device = list_entry(cur, struct btrfs_device, dev_list); 410 device = list_entry(cur, struct btrfs_device, dev_list);
363 if (device->bdev) 411 if (device->bdev)
364 continue; 412 continue;
365
366 if (!device->name) 413 if (!device->name)
367 continue; 414 continue;
368 415
369 bdev = open_bdev_excl(device->name, flags, holder); 416 bdev = open_bdev_excl(device->name, MS_RDONLY, holder);
370
371 if (IS_ERR(bdev)) { 417 if (IS_ERR(bdev)) {
372 printk("open %s failed\n", device->name); 418 printk("open %s failed\n", device->name);
373 goto error; 419 goto error;
@@ -387,16 +433,32 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
387 if (devid != device->devid) 433 if (devid != device->devid)
388 goto error_brelse; 434 goto error_brelse;
389 435
390 transid = btrfs_super_generation(disk_super); 436 if (memcmp(device->uuid, disk_super->dev_item.uuid,
391 if (!latest_transid || transid > latest_transid) { 437 BTRFS_UUID_SIZE))
438 goto error_brelse;
439
440 device->generation = btrfs_super_generation(disk_super);
441 if (!latest_transid || device->generation > latest_transid) {
392 latest_devid = devid; 442 latest_devid = devid;
393 latest_transid = transid; 443 latest_transid = device->generation;
394 latest_bdev = bdev; 444 latest_bdev = bdev;
395 } 445 }
396 446
447 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
448 device->writeable = 0;
449 } else {
450 device->writeable = !bdev_read_only(bdev);
451 seeding = 0;
452 }
453
397 device->bdev = bdev; 454 device->bdev = bdev;
398 device->in_fs_metadata = 0; 455 device->in_fs_metadata = 0;
399 fs_devices->open_devices++; 456 fs_devices->open_devices++;
457 if (device->writeable) {
458 fs_devices->rw_devices++;
459 list_add(&device->dev_alloc_list,
460 &fs_devices->alloc_list);
461 }
400 continue; 462 continue;
401 463
402error_brelse: 464error_brelse:
@@ -410,11 +472,32 @@ error:
410 ret = -EIO; 472 ret = -EIO;
411 goto out; 473 goto out;
412 } 474 }
413 fs_devices->mounted = 1; 475 fs_devices->seeding = seeding;
476 fs_devices->opened = 1;
414 fs_devices->latest_bdev = latest_bdev; 477 fs_devices->latest_bdev = latest_bdev;
415 fs_devices->latest_devid = latest_devid; 478 fs_devices->latest_devid = latest_devid;
416 fs_devices->latest_trans = latest_transid; 479 fs_devices->latest_trans = latest_transid;
480 fs_devices->total_rw_bytes = 0;
417out: 481out:
482 return ret;
483}
484
485int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
486 int flags, void *holder)
487{
488 int ret;
489
490 mutex_lock(&uuid_mutex);
491 if (fs_devices->opened) {
492 if (fs_devices->sprouted) {
493 ret = -EBUSY;
494 } else {
495 fs_devices->opened++;
496 ret = 0;
497 }
498 } else {
499 ret = __btrfs_open_devices(fs_devices, holder);
500 }
418 mutex_unlock(&uuid_mutex); 501 mutex_unlock(&uuid_mutex);
419 return ret; 502 return ret;
420} 503}
@@ -481,12 +564,12 @@ error:
481 */ 564 */
482static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, 565static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
483 struct btrfs_device *device, 566 struct btrfs_device *device,
484 struct btrfs_path *path,
485 u64 num_bytes, u64 *start) 567 u64 num_bytes, u64 *start)
486{ 568{
487 struct btrfs_key key; 569 struct btrfs_key key;
488 struct btrfs_root *root = device->dev_root; 570 struct btrfs_root *root = device->dev_root;
489 struct btrfs_dev_extent *dev_extent = NULL; 571 struct btrfs_dev_extent *dev_extent = NULL;
572 struct btrfs_path *path;
490 u64 hole_size = 0; 573 u64 hole_size = 0;
491 u64 last_byte = 0; 574 u64 last_byte = 0;
492 u64 search_start = 0; 575 u64 search_start = 0;
@@ -496,8 +579,11 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
496 int start_found; 579 int start_found;
497 struct extent_buffer *l; 580 struct extent_buffer *l;
498 581
499 start_found = 0; 582 path = btrfs_alloc_path();
583 if (!path)
584 return -ENOMEM;
500 path->reada = 2; 585 path->reada = 2;
586 start_found = 0;
501 587
502 /* FIXME use last free of some kind */ 588 /* FIXME use last free of some kind */
503 589
@@ -581,7 +667,6 @@ check_pending:
581 /* we have to make sure we didn't find an extent that has already 667 /* we have to make sure we didn't find an extent that has already
582 * been allocated by the map tree or the original allocation 668 * been allocated by the map tree or the original allocation
583 */ 669 */
584 btrfs_release_path(root, path);
585 BUG_ON(*start < search_start); 670 BUG_ON(*start < search_start);
586 671
587 if (*start + num_bytes > search_end) { 672 if (*start + num_bytes > search_end) {
@@ -589,10 +674,10 @@ check_pending:
589 goto error; 674 goto error;
590 } 675 }
591 /* check for pending inserts here */ 676 /* check for pending inserts here */
592 return 0; 677 ret = 0;
593 678
594error: 679error:
595 btrfs_release_path(root, path); 680 btrfs_free_path(path);
596 return ret; 681 return ret;
597} 682}
598 683
@@ -644,11 +729,10 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
644 return ret; 729 return ret;
645} 730}
646 731
647int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, 732int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
648 struct btrfs_device *device, 733 struct btrfs_device *device,
649 u64 chunk_tree, u64 chunk_objectid, 734 u64 chunk_tree, u64 chunk_objectid,
650 u64 chunk_offset, 735 u64 chunk_offset, u64 start, u64 num_bytes)
651 u64 num_bytes, u64 *start)
652{ 736{
653 int ret; 737 int ret;
654 struct btrfs_path *path; 738 struct btrfs_path *path;
@@ -662,13 +746,8 @@ int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
662 if (!path) 746 if (!path)
663 return -ENOMEM; 747 return -ENOMEM;
664 748
665 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
666 if (ret) {
667 goto err;
668 }
669
670 key.objectid = device->devid; 749 key.objectid = device->devid;
671 key.offset = *start; 750 key.offset = start;
672 key.type = BTRFS_DEV_EXTENT_KEY; 751 key.type = BTRFS_DEV_EXTENT_KEY;
673 ret = btrfs_insert_empty_item(trans, root, path, &key, 752 ret = btrfs_insert_empty_item(trans, root, path, &key,
674 sizeof(*extent)); 753 sizeof(*extent));
@@ -687,7 +766,6 @@ int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
687 766
688 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 767 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
689 btrfs_mark_buffer_dirty(leaf); 768 btrfs_mark_buffer_dirty(leaf);
690err:
691 btrfs_free_path(path); 769 btrfs_free_path(path);
692 return ret; 770 return ret;
693} 771}
@@ -735,12 +813,18 @@ error:
735 return ret; 813 return ret;
736} 814}
737 815
738static noinline int find_next_devid(struct btrfs_root *root, 816static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
739 struct btrfs_path *path, u64 *objectid)
740{ 817{
741 int ret; 818 int ret;
742 struct btrfs_key key; 819 struct btrfs_key key;
743 struct btrfs_key found_key; 820 struct btrfs_key found_key;
821 struct btrfs_path *path;
822
823 root = root->fs_info->chunk_root;
824
825 path = btrfs_alloc_path();
826 if (!path)
827 return -ENOMEM;
744 828
745 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 829 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
746 key.type = BTRFS_DEV_ITEM_KEY; 830 key.type = BTRFS_DEV_ITEM_KEY;
@@ -763,7 +847,7 @@ static noinline int find_next_devid(struct btrfs_root *root,
763 } 847 }
764 ret = 0; 848 ret = 0;
765error: 849error:
766 btrfs_release_path(root, path); 850 btrfs_free_path(path);
767 return ret; 851 return ret;
768} 852}
769 853
@@ -781,7 +865,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
781 struct extent_buffer *leaf; 865 struct extent_buffer *leaf;
782 struct btrfs_key key; 866 struct btrfs_key key;
783 unsigned long ptr; 867 unsigned long ptr;
784 u64 free_devid = 0;
785 868
786 root = root->fs_info->chunk_root; 869 root = root->fs_info->chunk_root;
787 870
@@ -789,13 +872,9 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
789 if (!path) 872 if (!path)
790 return -ENOMEM; 873 return -ENOMEM;
791 874
792 ret = find_next_devid(root, path, &free_devid);
793 if (ret)
794 goto out;
795
796 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 875 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
797 key.type = BTRFS_DEV_ITEM_KEY; 876 key.type = BTRFS_DEV_ITEM_KEY;
798 key.offset = free_devid; 877 key.offset = device->devid;
799 878
800 ret = btrfs_insert_empty_item(trans, root, path, &key, 879 ret = btrfs_insert_empty_item(trans, root, path, &key,
801 sizeof(*dev_item)); 880 sizeof(*dev_item));
@@ -805,8 +884,8 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
805 leaf = path->nodes[0]; 884 leaf = path->nodes[0];
806 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); 885 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
807 886
808 device->devid = free_devid;
809 btrfs_set_device_id(leaf, dev_item, device->devid); 887 btrfs_set_device_id(leaf, dev_item, device->devid);
888 btrfs_set_device_generation(leaf, dev_item, 0);
810 btrfs_set_device_type(leaf, dev_item, device->type); 889 btrfs_set_device_type(leaf, dev_item, device->type);
811 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 890 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
812 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 891 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
@@ -819,9 +898,11 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
819 898
820 ptr = (unsigned long)btrfs_device_uuid(dev_item); 899 ptr = (unsigned long)btrfs_device_uuid(dev_item);
821 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 900 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
901 ptr = (unsigned long)btrfs_device_fsid(dev_item);
902 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
822 btrfs_mark_buffer_dirty(leaf); 903 btrfs_mark_buffer_dirty(leaf);
823 ret = 0;
824 904
905 ret = 0;
825out: 906out:
826 btrfs_free_path(path); 907 btrfs_free_path(path);
827 return ret; 908 return ret;
@@ -832,11 +913,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
832{ 913{
833 int ret; 914 int ret;
834 struct btrfs_path *path; 915 struct btrfs_path *path;
835 struct block_device *bdev = device->bdev;
836 struct btrfs_device *next_dev;
837 struct btrfs_key key; 916 struct btrfs_key key;
838 u64 total_bytes;
839 struct btrfs_fs_devices *fs_devices;
840 struct btrfs_trans_handle *trans; 917 struct btrfs_trans_handle *trans;
841 918
842 root = root->fs_info->chunk_root; 919 root = root->fs_info->chunk_root;
@@ -863,25 +940,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
863 ret = btrfs_del_item(trans, root, path); 940 ret = btrfs_del_item(trans, root, path);
864 if (ret) 941 if (ret)
865 goto out; 942 goto out;
866
867 /*
868 * at this point, the device is zero sized. We want to
869 * remove it from the devices list and zero out the old super
870 */
871 list_del_init(&device->dev_list);
872 list_del_init(&device->dev_alloc_list);
873 fs_devices = root->fs_info->fs_devices;
874
875 next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
876 dev_list);
877 if (bdev == root->fs_info->sb->s_bdev)
878 root->fs_info->sb->s_bdev = next_dev->bdev;
879 if (bdev == fs_devices->latest_bdev)
880 fs_devices->latest_bdev = next_dev->bdev;
881
882 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
883 btrfs_set_super_num_devices(&root->fs_info->super_copy,
884 total_bytes - 1);
885out: 943out:
886 btrfs_free_path(path); 944 btrfs_free_path(path);
887 unlock_chunks(root); 945 unlock_chunks(root);
@@ -892,11 +950,14 @@ out:
892int btrfs_rm_device(struct btrfs_root *root, char *device_path) 950int btrfs_rm_device(struct btrfs_root *root, char *device_path)
893{ 951{
894 struct btrfs_device *device; 952 struct btrfs_device *device;
953 struct btrfs_device *next_device;
895 struct block_device *bdev; 954 struct block_device *bdev;
896 struct buffer_head *bh = NULL; 955 struct buffer_head *bh = NULL;
897 struct btrfs_super_block *disk_super; 956 struct btrfs_super_block *disk_super;
898 u64 all_avail; 957 u64 all_avail;
899 u64 devid; 958 u64 devid;
959 u64 num_devices;
960 u8 *dev_uuid;
900 int ret = 0; 961 int ret = 0;
901 962
902 mutex_lock(&uuid_mutex); 963 mutex_lock(&uuid_mutex);
@@ -907,14 +968,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
907 root->fs_info->avail_metadata_alloc_bits; 968 root->fs_info->avail_metadata_alloc_bits;
908 969
909 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && 970 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
910 btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) { 971 root->fs_info->fs_devices->rw_devices <= 4) {
911 printk("btrfs: unable to go below four devices on raid10\n"); 972 printk("btrfs: unable to go below four devices on raid10\n");
912 ret = -EINVAL; 973 ret = -EINVAL;
913 goto out; 974 goto out;
914 } 975 }
915 976
916 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && 977 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
917 btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) { 978 root->fs_info->fs_devices->rw_devices <= 2) {
918 printk("btrfs: unable to go below two devices on raid1\n"); 979 printk("btrfs: unable to go below two devices on raid1\n");
919 ret = -EINVAL; 980 ret = -EINVAL;
920 goto out; 981 goto out;
@@ -941,15 +1002,15 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
941 printk("btrfs: no missing devices found to remove\n"); 1002 printk("btrfs: no missing devices found to remove\n");
942 goto out; 1003 goto out;
943 } 1004 }
944
945 } else { 1005 } else {
946 bdev = open_bdev_excl(device_path, 0, 1006 bdev = open_bdev_excl(device_path, MS_RDONLY,
947 root->fs_info->bdev_holder); 1007 root->fs_info->bdev_holder);
948 if (IS_ERR(bdev)) { 1008 if (IS_ERR(bdev)) {
949 ret = PTR_ERR(bdev); 1009 ret = PTR_ERR(bdev);
950 goto out; 1010 goto out;
951 } 1011 }
952 1012
1013 set_blocksize(bdev, 4096);
953 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); 1014 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
954 if (!bh) { 1015 if (!bh) {
955 ret = -EIO; 1016 ret = -EIO;
@@ -957,45 +1018,97 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
957 } 1018 }
958 disk_super = (struct btrfs_super_block *)bh->b_data; 1019 disk_super = (struct btrfs_super_block *)bh->b_data;
959 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 1020 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
960 sizeof(disk_super->magic))) { 1021 sizeof(disk_super->magic))) {
961 ret = -ENOENT;
962 goto error_brelse;
963 }
964 if (memcmp(disk_super->fsid, root->fs_info->fsid,
965 BTRFS_FSID_SIZE)) {
966 ret = -ENOENT; 1022 ret = -ENOENT;
967 goto error_brelse; 1023 goto error_brelse;
968 } 1024 }
969 devid = le64_to_cpu(disk_super->dev_item.devid); 1025 devid = le64_to_cpu(disk_super->dev_item.devid);
970 device = btrfs_find_device(root, devid, NULL); 1026 dev_uuid = disk_super->dev_item.uuid;
1027 device = btrfs_find_device(root, devid, dev_uuid,
1028 disk_super->fsid);
971 if (!device) { 1029 if (!device) {
972 ret = -ENOENT; 1030 ret = -ENOENT;
973 goto error_brelse; 1031 goto error_brelse;
974 } 1032 }
1033 }
975 1034
1035 if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
1036 printk("btrfs: unable to remove the only writeable device\n");
1037 ret = -EINVAL;
1038 goto error_brelse;
1039 }
1040
1041 if (device->writeable) {
1042 list_del_init(&device->dev_alloc_list);
1043 root->fs_info->fs_devices->rw_devices--;
976 } 1044 }
977 root->fs_info->fs_devices->num_devices--;
978 root->fs_info->fs_devices->open_devices--;
979 1045
980 ret = btrfs_shrink_device(device, 0); 1046 ret = btrfs_shrink_device(device, 0);
981 if (ret) 1047 if (ret)
982 goto error_brelse; 1048 goto error_brelse;
983 1049
984
985 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1050 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
986 if (ret) 1051 if (ret)
987 goto error_brelse; 1052 goto error_brelse;
988 1053
989 if (bh) { 1054 device->in_fs_metadata = 0;
1055 if (device->fs_devices == root->fs_info->fs_devices) {
1056 list_del_init(&device->dev_list);
1057 root->fs_info->fs_devices->num_devices--;
1058 if (device->bdev)
1059 device->fs_devices->open_devices--;
1060 }
1061
1062 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1063 struct btrfs_device, dev_list);
1064 if (device->bdev == root->fs_info->sb->s_bdev)
1065 root->fs_info->sb->s_bdev = next_device->bdev;
1066 if (device->bdev == root->fs_info->fs_devices->latest_bdev)
1067 root->fs_info->fs_devices->latest_bdev = next_device->bdev;
1068
1069 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
1070 btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
1071
1072 if (device->fs_devices != root->fs_info->fs_devices) {
1073 BUG_ON(device->writeable);
1074 brelse(bh);
1075 if (bdev)
1076 close_bdev_excl(bdev);
1077
1078 if (device->bdev) {
1079 close_bdev_excl(device->bdev);
1080 device->bdev = NULL;
1081 device->fs_devices->open_devices--;
1082 }
1083 if (device->fs_devices->open_devices == 0) {
1084 struct btrfs_fs_devices *fs_devices;
1085 fs_devices = root->fs_info->fs_devices;
1086 while (fs_devices) {
1087 if (fs_devices->seed == device->fs_devices)
1088 break;
1089 fs_devices = fs_devices->seed;
1090 }
1091 fs_devices->seed = device->fs_devices->seed;
1092 device->fs_devices->seed = NULL;
1093 __btrfs_close_devices(device->fs_devices);
1094 }
1095 ret = 0;
1096 goto out;
1097 }
1098
1099 /*
1100 * at this point, the device is zero sized. We want to
1101 * remove it from the devices list and zero out the old super
1102 */
1103 if (device->writeable) {
990 /* make sure this device isn't detected as part of 1104 /* make sure this device isn't detected as part of
991 * the FS anymore 1105 * the FS anymore
992 */ 1106 */
993 memset(&disk_super->magic, 0, sizeof(disk_super->magic)); 1107 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
994 set_buffer_dirty(bh); 1108 set_buffer_dirty(bh);
995 sync_dirty_buffer(bh); 1109 sync_dirty_buffer(bh);
996
997 brelse(bh);
998 } 1110 }
1111 brelse(bh);
999 1112
1000 if (device->bdev) { 1113 if (device->bdev) {
1001 /* one close for the device struct or super_block */ 1114 /* one close for the device struct or super_block */
@@ -1021,6 +1134,129 @@ out:
1021 return ret; 1134 return ret;
1022} 1135}
1023 1136
1137/*
1138 * does all the dirty work required for changing file system's UUID.
1139 */
1140static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
1141 struct btrfs_root *root)
1142{
1143 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
1144 struct btrfs_fs_devices *old_devices;
1145 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
1146 struct btrfs_device *device;
1147 u64 super_flags;
1148
1149 BUG_ON(!mutex_is_locked(&uuid_mutex));
1150 if (!fs_devices->seeding || fs_devices->opened != 1)
1151 return -EINVAL;
1152
1153 old_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
1154 if (!old_devices)
1155 return -ENOMEM;
1156
1157 memcpy(old_devices, fs_devices, sizeof(*old_devices));
1158 old_devices->opened = 1;
1159 old_devices->sprouted = 1;
1160 INIT_LIST_HEAD(&old_devices->devices);
1161 INIT_LIST_HEAD(&old_devices->alloc_list);
1162 list_splice_init(&fs_devices->devices, &old_devices->devices);
1163 list_splice_init(&fs_devices->alloc_list, &old_devices->alloc_list);
1164 list_for_each_entry(device, &old_devices->devices, dev_list) {
1165 device->fs_devices = old_devices;
1166 }
1167 list_add(&old_devices->list, &fs_uuids);
1168
1169 fs_devices->seeding = 0;
1170 fs_devices->num_devices = 0;
1171 fs_devices->open_devices = 0;
1172 fs_devices->seed = old_devices;
1173
1174 generate_random_uuid(fs_devices->fsid);
1175 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1176 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1177 super_flags = btrfs_super_flags(disk_super) &
1178 ~BTRFS_SUPER_FLAG_SEEDING;
1179 btrfs_set_super_flags(disk_super, super_flags);
1180
1181 return 0;
1182}
1183
1184/*
1185 * strore the expected generation for seed devices in device items.
1186 */
1187static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
1188 struct btrfs_root *root)
1189{
1190 struct btrfs_path *path;
1191 struct extent_buffer *leaf;
1192 struct btrfs_dev_item *dev_item;
1193 struct btrfs_device *device;
1194 struct btrfs_key key;
1195 u8 fs_uuid[BTRFS_UUID_SIZE];
1196 u8 dev_uuid[BTRFS_UUID_SIZE];
1197 u64 devid;
1198 int ret;
1199
1200 path = btrfs_alloc_path();
1201 if (!path)
1202 return -ENOMEM;
1203
1204 root = root->fs_info->chunk_root;
1205 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1206 key.offset = 0;
1207 key.type = BTRFS_DEV_ITEM_KEY;
1208
1209 while (1) {
1210 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1211 if (ret < 0)
1212 goto error;
1213
1214 leaf = path->nodes[0];
1215next_slot:
1216 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1217 ret = btrfs_next_leaf(root, path);
1218 if (ret > 0)
1219 break;
1220 if (ret < 0)
1221 goto error;
1222 leaf = path->nodes[0];
1223 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1224 btrfs_release_path(root, path);
1225 continue;
1226 }
1227
1228 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1229 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
1230 key.type != BTRFS_DEV_ITEM_KEY)
1231 break;
1232
1233 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1234 struct btrfs_dev_item);
1235 devid = btrfs_device_id(leaf, dev_item);
1236 read_extent_buffer(leaf, dev_uuid,
1237 (unsigned long)btrfs_device_uuid(dev_item),
1238 BTRFS_UUID_SIZE);
1239 read_extent_buffer(leaf, fs_uuid,
1240 (unsigned long)btrfs_device_fsid(dev_item),
1241 BTRFS_UUID_SIZE);
1242 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
1243 BUG_ON(!device);
1244
1245 if (device->fs_devices->seeding) {
1246 btrfs_set_device_generation(leaf, dev_item,
1247 device->generation);
1248 btrfs_mark_buffer_dirty(leaf);
1249 }
1250
1251 path->slots[0]++;
1252 goto next_slot;
1253 }
1254 ret = 0;
1255error:
1256 btrfs_free_path(path);
1257 return ret;
1258}
1259
1024int btrfs_init_new_device(struct btrfs_root *root, char *device_path) 1260int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1025{ 1261{
1026 struct btrfs_trans_handle *trans; 1262 struct btrfs_trans_handle *trans;
@@ -1028,26 +1264,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1028 struct block_device *bdev; 1264 struct block_device *bdev;
1029 struct list_head *cur; 1265 struct list_head *cur;
1030 struct list_head *devices; 1266 struct list_head *devices;
1267 struct super_block *sb = root->fs_info->sb;
1031 u64 total_bytes; 1268 u64 total_bytes;
1269 int seeding_dev = 0;
1032 int ret = 0; 1270 int ret = 0;
1033 1271
1272 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1273 return -EINVAL;
1034 1274
1035 bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); 1275 bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
1036 if (!bdev) { 1276 if (!bdev) {
1037 return -EIO; 1277 return -EIO;
1038 } 1278 }
1039 1279
1280 if (root->fs_info->fs_devices->seeding) {
1281 seeding_dev = 1;
1282 down_write(&sb->s_umount);
1283 mutex_lock(&uuid_mutex);
1284 }
1285
1040 filemap_write_and_wait(bdev->bd_inode->i_mapping); 1286 filemap_write_and_wait(bdev->bd_inode->i_mapping);
1041 mutex_lock(&root->fs_info->volume_mutex); 1287 mutex_lock(&root->fs_info->volume_mutex);
1042 1288
1043 trans = btrfs_start_transaction(root, 1);
1044 lock_chunks(root);
1045 devices = &root->fs_info->fs_devices->devices; 1289 devices = &root->fs_info->fs_devices->devices;
1046 list_for_each(cur, devices) { 1290 list_for_each(cur, devices) {
1047 device = list_entry(cur, struct btrfs_device, dev_list); 1291 device = list_entry(cur, struct btrfs_device, dev_list);
1048 if (device->bdev == bdev) { 1292 if (device->bdev == bdev) {
1049 ret = -EEXIST; 1293 ret = -EEXIST;
1050 goto out; 1294 goto error;
1051 } 1295 }
1052 } 1296 }
1053 1297
@@ -1055,18 +1299,31 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1055 if (!device) { 1299 if (!device) {
1056 /* we can safely leave the fs_devices entry around */ 1300 /* we can safely leave the fs_devices entry around */
1057 ret = -ENOMEM; 1301 ret = -ENOMEM;
1058 goto out_close_bdev; 1302 goto error;
1059 } 1303 }
1060 1304
1061 device->barriers = 1;
1062 device->work.func = pending_bios_fn;
1063 generate_random_uuid(device->uuid);
1064 spin_lock_init(&device->io_lock);
1065 device->name = kstrdup(device_path, GFP_NOFS); 1305 device->name = kstrdup(device_path, GFP_NOFS);
1066 if (!device->name) { 1306 if (!device->name) {
1067 kfree(device); 1307 kfree(device);
1068 goto out_close_bdev; 1308 ret = -ENOMEM;
1309 goto error;
1069 } 1310 }
1311
1312 ret = find_next_devid(root, &device->devid);
1313 if (ret) {
1314 kfree(device);
1315 goto error;
1316 }
1317
1318 trans = btrfs_start_transaction(root, 1);
1319 lock_chunks(root);
1320
1321 device->barriers = 1;
1322 device->writeable = 1;
1323 device->work.func = pending_bios_fn;
1324 generate_random_uuid(device->uuid);
1325 spin_lock_init(&device->io_lock);
1326 device->generation = trans->transid;
1070 device->io_width = root->sectorsize; 1327 device->io_width = root->sectorsize;
1071 device->io_align = root->sectorsize; 1328 device->io_align = root->sectorsize;
1072 device->sector_size = root->sectorsize; 1329 device->sector_size = root->sectorsize;
@@ -1074,12 +1331,22 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1074 device->dev_root = root->fs_info->dev_root; 1331 device->dev_root = root->fs_info->dev_root;
1075 device->bdev = bdev; 1332 device->bdev = bdev;
1076 device->in_fs_metadata = 1; 1333 device->in_fs_metadata = 1;
1334 set_blocksize(device->bdev, 4096);
1077 1335
1078 ret = btrfs_add_device(trans, root, device); 1336 if (seeding_dev) {
1079 if (ret) 1337 sb->s_flags &= ~MS_RDONLY;
1080 goto out_close_bdev; 1338 ret = btrfs_prepare_sprout(trans, root);
1339 BUG_ON(ret);
1340 }
1081 1341
1082 set_blocksize(device->bdev, 4096); 1342 device->fs_devices = root->fs_info->fs_devices;
1343 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
1344 list_add(&device->dev_alloc_list,
1345 &root->fs_info->fs_devices->alloc_list);
1346 root->fs_info->fs_devices->num_devices++;
1347 root->fs_info->fs_devices->open_devices++;
1348 root->fs_info->fs_devices->rw_devices++;
1349 root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
1083 1350
1084 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 1351 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
1085 btrfs_set_super_total_bytes(&root->fs_info->super_copy, 1352 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
@@ -1089,20 +1356,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1089 btrfs_set_super_num_devices(&root->fs_info->super_copy, 1356 btrfs_set_super_num_devices(&root->fs_info->super_copy,
1090 total_bytes + 1); 1357 total_bytes + 1);
1091 1358
1092 list_add(&device->dev_list, &root->fs_info->fs_devices->devices); 1359 if (seeding_dev) {
1093 list_add(&device->dev_alloc_list, 1360 ret = init_first_rw_device(trans, root, device);
1094 &root->fs_info->fs_devices->alloc_list); 1361 BUG_ON(ret);
1095 root->fs_info->fs_devices->num_devices++; 1362 ret = btrfs_finish_sprout(trans, root);
1096 root->fs_info->fs_devices->open_devices++; 1363 BUG_ON(ret);
1097out: 1364 } else {
1365 ret = btrfs_add_device(trans, root, device);
1366 }
1367
1098 unlock_chunks(root); 1368 unlock_chunks(root);
1099 btrfs_end_transaction(trans, root); 1369 btrfs_commit_transaction(trans, root);
1100 mutex_unlock(&root->fs_info->volume_mutex);
1101 1370
1102 return ret; 1371 if (seeding_dev) {
1372 mutex_unlock(&uuid_mutex);
1373 up_write(&sb->s_umount);
1103 1374
1104out_close_bdev: 1375 ret = btrfs_relocate_sys_chunks(root);
1376 BUG_ON(ret);
1377 }
1378out:
1379 mutex_unlock(&root->fs_info->volume_mutex);
1380 return ret;
1381error:
1105 close_bdev_excl(bdev); 1382 close_bdev_excl(bdev);
1383 if (seeding_dev) {
1384 mutex_unlock(&uuid_mutex);
1385 up_write(&sb->s_umount);
1386 }
1106 goto out; 1387 goto out;
1107} 1388}
1108 1389
@@ -1160,7 +1441,15 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
1160 u64 old_total = btrfs_super_total_bytes(super_copy); 1441 u64 old_total = btrfs_super_total_bytes(super_copy);
1161 u64 diff = new_size - device->total_bytes; 1442 u64 diff = new_size - device->total_bytes;
1162 1443
1444 if (!device->writeable)
1445 return -EACCES;
1446 if (new_size <= device->total_bytes)
1447 return -EINVAL;
1448
1163 btrfs_set_super_total_bytes(super_copy, old_total + diff); 1449 btrfs_set_super_total_bytes(super_copy, old_total + diff);
1450 device->fs_devices->total_rw_bytes += diff;
1451
1452 device->total_bytes = new_size;
1164 return btrfs_update_device(trans, device); 1453 return btrfs_update_device(trans, device);
1165} 1454}
1166 1455
@@ -1248,7 +1537,6 @@ int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1248 return ret; 1537 return ret;
1249} 1538}
1250 1539
1251
1252int btrfs_relocate_chunk(struct btrfs_root *root, 1540int btrfs_relocate_chunk(struct btrfs_root *root,
1253 u64 chunk_tree, u64 chunk_objectid, 1541 u64 chunk_tree, u64 chunk_objectid,
1254 u64 chunk_offset) 1542 u64 chunk_offset)
@@ -1328,6 +1616,64 @@ int btrfs_relocate_chunk(struct btrfs_root *root,
1328 return 0; 1616 return 0;
1329} 1617}
1330 1618
1619static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
1620{
1621 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
1622 struct btrfs_path *path;
1623 struct extent_buffer *leaf;
1624 struct btrfs_chunk *chunk;
1625 struct btrfs_key key;
1626 struct btrfs_key found_key;
1627 u64 chunk_tree = chunk_root->root_key.objectid;
1628 u64 chunk_type;
1629 int ret;
1630
1631 path = btrfs_alloc_path();
1632 if (!path)
1633 return -ENOMEM;
1634
1635 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1636 key.offset = (u64)-1;
1637 key.type = BTRFS_CHUNK_ITEM_KEY;
1638
1639 while (1) {
1640 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1641 if (ret < 0)
1642 goto error;
1643 BUG_ON(ret == 0);
1644
1645 ret = btrfs_previous_item(chunk_root, path, key.objectid,
1646 key.type);
1647 if (ret < 0)
1648 goto error;
1649 if (ret > 0)
1650 break;
1651
1652 leaf = path->nodes[0];
1653 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1654
1655 chunk = btrfs_item_ptr(leaf, path->slots[0],
1656 struct btrfs_chunk);
1657 chunk_type = btrfs_chunk_type(leaf, chunk);
1658 btrfs_release_path(chunk_root, path);
1659
1660 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
1661 ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
1662 found_key.objectid,
1663 found_key.offset);
1664 BUG_ON(ret);
1665 }
1666
1667 if (found_key.offset == 0)
1668 break;
1669 key.offset = found_key.offset - 1;
1670 }
1671 ret = 0;
1672error:
1673 btrfs_free_path(path);
1674 return ret;
1675}
1676
1331static u64 div_factor(u64 num, int factor) 1677static u64 div_factor(u64 num, int factor)
1332{ 1678{
1333 if (factor == 10) 1679 if (factor == 10)
@@ -1337,7 +1683,6 @@ static u64 div_factor(u64 num, int factor)
1337 return num; 1683 return num;
1338} 1684}
1339 1685
1340
1341int btrfs_balance(struct btrfs_root *dev_root) 1686int btrfs_balance(struct btrfs_root *dev_root)
1342{ 1687{
1343 int ret; 1688 int ret;
@@ -1353,6 +1698,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1353 struct btrfs_trans_handle *trans; 1698 struct btrfs_trans_handle *trans;
1354 struct btrfs_key found_key; 1699 struct btrfs_key found_key;
1355 1700
1701 if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
1702 return -EROFS;
1356 1703
1357 mutex_lock(&dev_root->fs_info->volume_mutex); 1704 mutex_lock(&dev_root->fs_info->volume_mutex);
1358 dev_root = dev_root->fs_info->dev_root; 1705 dev_root = dev_root->fs_info->dev_root;
@@ -1363,7 +1710,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
1363 old_size = device->total_bytes; 1710 old_size = device->total_bytes;
1364 size_to_free = div_factor(old_size, 1); 1711 size_to_free = div_factor(old_size, 1);
1365 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); 1712 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
1366 if (device->total_bytes - device->bytes_used > size_to_free) 1713 if (!device->writeable ||
1714 device->total_bytes - device->bytes_used > size_to_free)
1367 continue; 1715 continue;
1368 1716
1369 ret = btrfs_shrink_device(device, old_size - size_to_free); 1717 ret = btrfs_shrink_device(device, old_size - size_to_free);
@@ -1453,6 +1801,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1453 u64 old_total = btrfs_super_total_bytes(super_copy); 1801 u64 old_total = btrfs_super_total_bytes(super_copy);
1454 u64 diff = device->total_bytes - new_size; 1802 u64 diff = device->total_bytes - new_size;
1455 1803
1804 if (new_size >= device->total_bytes)
1805 return -EINVAL;
1456 1806
1457 path = btrfs_alloc_path(); 1807 path = btrfs_alloc_path();
1458 if (!path) 1808 if (!path)
@@ -1469,6 +1819,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1469 lock_chunks(root); 1819 lock_chunks(root);
1470 1820
1471 device->total_bytes = new_size; 1821 device->total_bytes = new_size;
1822 if (device->writeable)
1823 device->fs_devices->total_rw_bytes -= diff;
1472 ret = btrfs_update_device(trans, device); 1824 ret = btrfs_update_device(trans, device);
1473 if (ret) { 1825 if (ret) {
1474 unlock_chunks(root); 1826 unlock_chunks(root);
@@ -1561,32 +1913,27 @@ static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size,
1561 return calc_size * num_stripes; 1913 return calc_size * num_stripes;
1562} 1914}
1563 1915
1564 1916static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1565int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 1917 struct btrfs_root *extent_root,
1566 struct btrfs_root *extent_root, u64 *start, 1918 struct map_lookup **map_ret,
1567 u64 *num_bytes, u64 type) 1919 u64 *num_bytes, u64 *stripe_size,
1920 u64 start, u64 type)
1568{ 1921{
1569 u64 dev_offset;
1570 struct btrfs_fs_info *info = extent_root->fs_info; 1922 struct btrfs_fs_info *info = extent_root->fs_info;
1571 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
1572 struct btrfs_path *path;
1573 struct btrfs_stripe *stripes;
1574 struct btrfs_device *device = NULL; 1923 struct btrfs_device *device = NULL;
1575 struct btrfs_chunk *chunk; 1924 struct btrfs_fs_devices *fs_devices = info->fs_devices;
1576 struct list_head private_devs;
1577 struct list_head *dev_list;
1578 struct list_head *cur; 1925 struct list_head *cur;
1926 struct map_lookup *map = NULL;
1579 struct extent_map_tree *em_tree; 1927 struct extent_map_tree *em_tree;
1580 struct map_lookup *map;
1581 struct extent_map *em; 1928 struct extent_map *em;
1929 struct list_head private_devs;
1582 int min_stripe_size = 1 * 1024 * 1024; 1930 int min_stripe_size = 1 * 1024 * 1024;
1583 u64 physical;
1584 u64 calc_size = 1024 * 1024 * 1024; 1931 u64 calc_size = 1024 * 1024 * 1024;
1585 u64 max_chunk_size = calc_size; 1932 u64 max_chunk_size = calc_size;
1586 u64 min_free; 1933 u64 min_free;
1587 u64 avail; 1934 u64 avail;
1588 u64 max_avail = 0; 1935 u64 max_avail = 0;
1589 u64 percent_max; 1936 u64 dev_offset;
1590 int num_stripes = 1; 1937 int num_stripes = 1;
1591 int min_stripes = 1; 1938 int min_stripes = 1;
1592 int sub_stripes = 0; 1939 int sub_stripes = 0;
@@ -1594,19 +1941,17 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1594 int ret; 1941 int ret;
1595 int index; 1942 int index;
1596 int stripe_len = 64 * 1024; 1943 int stripe_len = 64 * 1024;
1597 struct btrfs_key key;
1598 1944
1599 if ((type & BTRFS_BLOCK_GROUP_RAID1) && 1945 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1600 (type & BTRFS_BLOCK_GROUP_DUP)) { 1946 (type & BTRFS_BLOCK_GROUP_DUP)) {
1601 WARN_ON(1); 1947 WARN_ON(1);
1602 type &= ~BTRFS_BLOCK_GROUP_DUP; 1948 type &= ~BTRFS_BLOCK_GROUP_DUP;
1603 } 1949 }
1604 dev_list = &extent_root->fs_info->fs_devices->alloc_list; 1950 if (list_empty(&fs_devices->alloc_list))
1605 if (list_empty(dev_list))
1606 return -ENOSPC; 1951 return -ENOSPC;
1607 1952
1608 if (type & (BTRFS_BLOCK_GROUP_RAID0)) { 1953 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
1609 num_stripes = extent_root->fs_info->fs_devices->open_devices; 1954 num_stripes = fs_devices->rw_devices;
1610 min_stripes = 2; 1955 min_stripes = 2;
1611 } 1956 }
1612 if (type & (BTRFS_BLOCK_GROUP_DUP)) { 1957 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
@@ -1614,14 +1959,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1614 min_stripes = 2; 1959 min_stripes = 2;
1615 } 1960 }
1616 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 1961 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
1617 num_stripes = min_t(u64, 2, 1962 num_stripes = min_t(u64, 2, fs_devices->rw_devices);
1618 extent_root->fs_info->fs_devices->open_devices);
1619 if (num_stripes < 2) 1963 if (num_stripes < 2)
1620 return -ENOSPC; 1964 return -ENOSPC;
1621 min_stripes = 2; 1965 min_stripes = 2;
1622 } 1966 }
1623 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 1967 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1624 num_stripes = extent_root->fs_info->fs_devices->open_devices; 1968 num_stripes = fs_devices->rw_devices;
1625 if (num_stripes < 4) 1969 if (num_stripes < 4)
1626 return -ENOSPC; 1970 return -ENOSPC;
1627 num_stripes &= ~(u32)1; 1971 num_stripes &= ~(u32)1;
@@ -1641,15 +1985,19 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1641 min_stripe_size = 1 * 1024 * 1024; 1985 min_stripe_size = 1 * 1024 * 1024;
1642 } 1986 }
1643 1987
1644 path = btrfs_alloc_path(); 1988 /* we don't want a chunk larger than 10% of writeable space */
1645 if (!path) 1989 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
1646 return -ENOMEM; 1990 max_chunk_size);
1647
1648 /* we don't want a chunk larger than 10% of the FS */
1649 percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
1650 max_chunk_size = min(percent_max, max_chunk_size);
1651 1991
1652again: 1992again:
1993 if (!map || map->num_stripes != num_stripes) {
1994 kfree(map);
1995 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1996 if (!map)
1997 return -ENOMEM;
1998 map->num_stripes = num_stripes;
1999 }
2000
1653 if (calc_size * num_stripes > max_chunk_size) { 2001 if (calc_size * num_stripes > max_chunk_size) {
1654 calc_size = max_chunk_size; 2002 calc_size = max_chunk_size;
1655 do_div(calc_size, num_stripes); 2003 do_div(calc_size, num_stripes);
@@ -1662,8 +2010,7 @@ again:
1662 do_div(calc_size, stripe_len); 2010 do_div(calc_size, stripe_len);
1663 calc_size *= stripe_len; 2011 calc_size *= stripe_len;
1664 2012
1665 INIT_LIST_HEAD(&private_devs); 2013 cur = fs_devices->alloc_list.next;
1666 cur = dev_list->next;
1667 index = 0; 2014 index = 0;
1668 2015
1669 if (type & BTRFS_BLOCK_GROUP_DUP) 2016 if (type & BTRFS_BLOCK_GROUP_DUP)
@@ -1679,10 +2026,10 @@ again:
1679 if (!looped) 2026 if (!looped)
1680 min_free += 1024 * 1024; 2027 min_free += 1024 * 1024;
1681 2028
1682 /* build a private list of devices we will allocate from */ 2029 INIT_LIST_HEAD(&private_devs);
1683 while(index < num_stripes) { 2030 while(index < num_stripes) {
1684 device = list_entry(cur, struct btrfs_device, dev_alloc_list); 2031 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
1685 2032 BUG_ON(!device->writeable);
1686 if (device->total_bytes > device->bytes_used) 2033 if (device->total_bytes > device->bytes_used)
1687 avail = device->total_bytes - device->bytes_used; 2034 avail = device->total_bytes - device->bytes_used;
1688 else 2035 else
@@ -1690,24 +2037,28 @@ again:
1690 cur = cur->next; 2037 cur = cur->next;
1691 2038
1692 if (device->in_fs_metadata && avail >= min_free) { 2039 if (device->in_fs_metadata && avail >= min_free) {
1693 u64 ignored_start = 0; 2040 ret = find_free_dev_extent(trans, device,
1694 ret = find_free_dev_extent(trans, device, path, 2041 min_free, &dev_offset);
1695 min_free,
1696 &ignored_start);
1697 if (ret == 0) { 2042 if (ret == 0) {
1698 list_move_tail(&device->dev_alloc_list, 2043 list_move_tail(&device->dev_alloc_list,
1699 &private_devs); 2044 &private_devs);
2045 map->stripes[index].dev = device;
2046 map->stripes[index].physical = dev_offset;
1700 index++; 2047 index++;
1701 if (type & BTRFS_BLOCK_GROUP_DUP) 2048 if (type & BTRFS_BLOCK_GROUP_DUP) {
2049 map->stripes[index].dev = device;
2050 map->stripes[index].physical =
2051 dev_offset + calc_size;
1702 index++; 2052 index++;
2053 }
1703 } 2054 }
1704 } else if (device->in_fs_metadata && avail > max_avail) 2055 } else if (device->in_fs_metadata && avail > max_avail)
1705 max_avail = avail; 2056 max_avail = avail;
1706 if (cur == dev_list) 2057 if (cur == &fs_devices->alloc_list)
1707 break; 2058 break;
1708 } 2059 }
2060 list_splice(&private_devs, &fs_devices->alloc_list);
1709 if (index < num_stripes) { 2061 if (index < num_stripes) {
1710 list_splice(&private_devs, dev_list);
1711 if (index >= min_stripes) { 2062 if (index >= min_stripes) {
1712 num_stripes = index; 2063 num_stripes = index;
1713 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 2064 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
@@ -1722,115 +2073,246 @@ again:
1722 calc_size = max_avail; 2073 calc_size = max_avail;
1723 goto again; 2074 goto again;
1724 } 2075 }
1725 btrfs_free_path(path); 2076 kfree(map);
1726 return -ENOSPC; 2077 return -ENOSPC;
1727 } 2078 }
1728 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2079 map->sector_size = extent_root->sectorsize;
1729 key.type = BTRFS_CHUNK_ITEM_KEY; 2080 map->stripe_len = stripe_len;
1730 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, 2081 map->io_align = stripe_len;
1731 &key.offset); 2082 map->io_width = stripe_len;
1732 if (ret) { 2083 map->type = type;
1733 btrfs_free_path(path); 2084 map->num_stripes = num_stripes;
1734 return ret; 2085 map->sub_stripes = sub_stripes;
1735 }
1736 2086
1737 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); 2087 *map_ret = map;
1738 if (!chunk) { 2088 *stripe_size = calc_size;
1739 btrfs_free_path(path); 2089 *num_bytes = chunk_bytes_by_type(type, calc_size,
1740 return -ENOMEM; 2090 num_stripes, sub_stripes);
1741 }
1742 2091
1743 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 2092 em = alloc_extent_map(GFP_NOFS);
1744 if (!map) { 2093 if (!em) {
1745 kfree(chunk); 2094 kfree(map);
1746 btrfs_free_path(path);
1747 return -ENOMEM; 2095 return -ENOMEM;
1748 } 2096 }
1749 btrfs_free_path(path); 2097 em->bdev = (struct block_device *)map;
1750 path = NULL; 2098 em->start = start;
2099 em->len = *num_bytes;
2100 em->block_start = 0;
2101 em->block_len = em->len;
1751 2102
1752 stripes = &chunk->stripe; 2103 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
1753 *num_bytes = chunk_bytes_by_type(type, calc_size, 2104 spin_lock(&em_tree->lock);
1754 num_stripes, sub_stripes); 2105 ret = add_extent_mapping(em_tree, em);
2106 spin_unlock(&em_tree->lock);
2107 BUG_ON(ret);
2108 free_extent_map(em);
1755 2109
1756 index = 0; 2110 ret = btrfs_make_block_group(trans, extent_root, 0, type,
1757 while(index < num_stripes) { 2111 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1758 struct btrfs_stripe *stripe; 2112 start, *num_bytes);
1759 BUG_ON(list_empty(&private_devs)); 2113 BUG_ON(ret);
1760 cur = private_devs.next;
1761 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
1762 2114
1763 /* loop over this device again if we're doing a dup group */ 2115 index = 0;
1764 if (!(type & BTRFS_BLOCK_GROUP_DUP) || 2116 while (index < map->num_stripes) {
1765 (index == num_stripes - 1)) 2117 device = map->stripes[index].dev;
1766 list_move_tail(&device->dev_alloc_list, dev_list); 2118 dev_offset = map->stripes[index].physical;
1767 2119
1768 ret = btrfs_alloc_dev_extent(trans, device, 2120 ret = btrfs_alloc_dev_extent(trans, device,
1769 info->chunk_root->root_key.objectid, 2121 info->chunk_root->root_key.objectid,
1770 BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset, 2122 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1771 calc_size, &dev_offset); 2123 start, dev_offset, calc_size);
1772 BUG_ON(ret); 2124 BUG_ON(ret);
1773 device->bytes_used += calc_size; 2125 index++;
2126 }
2127
2128 return 0;
2129}
2130
2131static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
2132 struct btrfs_root *extent_root,
2133 struct map_lookup *map, u64 chunk_offset,
2134 u64 chunk_size, u64 stripe_size)
2135{
2136 u64 dev_offset;
2137 struct btrfs_key key;
2138 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2139 struct btrfs_device *device;
2140 struct btrfs_chunk *chunk;
2141 struct btrfs_stripe *stripe;
2142 size_t item_size = btrfs_chunk_item_size(map->num_stripes);
2143 int index = 0;
2144 int ret;
2145
2146 chunk = kzalloc(item_size, GFP_NOFS);
2147 if (!chunk)
2148 return -ENOMEM;
2149
2150 index = 0;
2151 while (index < map->num_stripes) {
2152 device = map->stripes[index].dev;
2153 device->bytes_used += stripe_size;
1774 ret = btrfs_update_device(trans, device); 2154 ret = btrfs_update_device(trans, device);
1775 BUG_ON(ret); 2155 BUG_ON(ret);
2156 index++;
2157 }
2158
2159 index = 0;
2160 stripe = &chunk->stripe;
2161 while (index < map->num_stripes) {
2162 device = map->stripes[index].dev;
2163 dev_offset = map->stripes[index].physical;
1776 2164
1777 map->stripes[index].dev = device;
1778 map->stripes[index].physical = dev_offset;
1779 stripe = stripes + index;
1780 btrfs_set_stack_stripe_devid(stripe, device->devid); 2165 btrfs_set_stack_stripe_devid(stripe, device->devid);
1781 btrfs_set_stack_stripe_offset(stripe, dev_offset); 2166 btrfs_set_stack_stripe_offset(stripe, dev_offset);
1782 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); 2167 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
1783 physical = dev_offset; 2168 stripe++;
1784 index++; 2169 index++;
1785 } 2170 }
1786 BUG_ON(!list_empty(&private_devs));
1787 2171
1788 /* key was set above */ 2172 btrfs_set_stack_chunk_length(chunk, chunk_size);
1789 btrfs_set_stack_chunk_length(chunk, *num_bytes);
1790 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); 2173 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
1791 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); 2174 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
1792 btrfs_set_stack_chunk_type(chunk, type); 2175 btrfs_set_stack_chunk_type(chunk, map->type);
1793 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); 2176 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
1794 btrfs_set_stack_chunk_io_align(chunk, stripe_len); 2177 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
1795 btrfs_set_stack_chunk_io_width(chunk, stripe_len); 2178 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
1796 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); 2179 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
1797 btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes); 2180 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
1798 map->sector_size = extent_root->sectorsize;
1799 map->stripe_len = stripe_len;
1800 map->io_align = stripe_len;
1801 map->io_width = stripe_len;
1802 map->type = type;
1803 map->num_stripes = num_stripes;
1804 map->sub_stripes = sub_stripes;
1805 2181
1806 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, 2182 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1807 btrfs_chunk_item_size(num_stripes)); 2183 key.type = BTRFS_CHUNK_ITEM_KEY;
1808 BUG_ON(ret); 2184 key.offset = chunk_offset;
1809 *start = key.offset;;
1810 2185
1811 em = alloc_extent_map(GFP_NOFS); 2186 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
1812 if (!em) 2187 BUG_ON(ret);
1813 return -ENOMEM;
1814 em->bdev = (struct block_device *)map;
1815 em->start = key.offset;
1816 em->len = *num_bytes;
1817 em->block_start = 0;
1818 em->block_len = em->len;
1819 2188
1820 if (type & BTRFS_BLOCK_GROUP_SYSTEM) { 2189 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1821 ret = btrfs_add_system_chunk(trans, chunk_root, &key, 2190 ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk,
1822 chunk, btrfs_chunk_item_size(num_stripes)); 2191 item_size);
1823 BUG_ON(ret); 2192 BUG_ON(ret);
1824 } 2193 }
1825 kfree(chunk); 2194 kfree(chunk);
2195 return 0;
2196}
1826 2197
1827 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 2198/*
1828 spin_lock(&em_tree->lock); 2199 * Chunk allocation falls into two parts. The first part does works
1829 ret = add_extent_mapping(em_tree, em); 2200 * that make the new allocated chunk useable, but not do any operation
1830 spin_unlock(&em_tree->lock); 2201 * that modifies the chunk tree. The second part does the works that
2202 * require modifying the chunk tree. This division is important for the
2203 * bootstrap process of adding storage to a seed btrfs.
2204 */
2205int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2206 struct btrfs_root *extent_root, u64 type)
2207{
2208 u64 chunk_offset;
2209 u64 chunk_size;
2210 u64 stripe_size;
2211 struct map_lookup *map;
2212 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
2213 int ret;
2214
2215 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
2216 &chunk_offset);
2217 if (ret)
2218 return ret;
2219
2220 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2221 &stripe_size, chunk_offset, type);
2222 if (ret)
2223 return ret;
2224
2225 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2226 chunk_size, stripe_size);
1831 BUG_ON(ret); 2227 BUG_ON(ret);
2228 return 0;
2229}
2230
2231static int noinline init_first_rw_device(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root,
2233 struct btrfs_device *device)
2234{
2235 u64 chunk_offset;
2236 u64 sys_chunk_offset;
2237 u64 chunk_size;
2238 u64 sys_chunk_size;
2239 u64 stripe_size;
2240 u64 sys_stripe_size;
2241 u64 alloc_profile;
2242 struct map_lookup *map;
2243 struct map_lookup *sys_map;
2244 struct btrfs_fs_info *fs_info = root->fs_info;
2245 struct btrfs_root *extent_root = fs_info->extent_root;
2246 int ret;
2247
2248 ret = find_next_chunk(fs_info->chunk_root,
2249 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
2250 BUG_ON(ret);
2251
2252 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2253 (fs_info->metadata_alloc_profile &
2254 fs_info->avail_metadata_alloc_bits);
2255 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2256
2257 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
2258 &stripe_size, chunk_offset, alloc_profile);
2259 BUG_ON(ret);
2260
2261 sys_chunk_offset = chunk_offset + chunk_size;
2262
2263 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
2264 (fs_info->system_alloc_profile &
2265 fs_info->avail_system_alloc_bits);
2266 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2267
2268 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
2269 &sys_chunk_size, &sys_stripe_size,
2270 sys_chunk_offset, alloc_profile);
2271 BUG_ON(ret);
2272
2273 ret = btrfs_add_device(trans, fs_info->chunk_root, device);
2274 BUG_ON(ret);
2275
2276 /*
2277 * Modifying chunk tree needs allocating new blocks from both
2278 * system block group and metadata block group. So we only can
2279 * do operations require modifying the chunk tree after both
2280 * block groups were created.
2281 */
2282 ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
2283 chunk_size, stripe_size);
2284 BUG_ON(ret);
2285
2286 ret = __finish_chunk_alloc(trans, extent_root, sys_map,
2287 sys_chunk_offset, sys_chunk_size,
2288 sys_stripe_size);
2289 BUG_ON(ret);
2290 return 0;
2291}
2292
2293int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2294{
2295 struct extent_map *em;
2296 struct map_lookup *map;
2297 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2298 int readonly = 0;
2299 int i;
2300
2301 spin_lock(&map_tree->map_tree.lock);
2302 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2303 spin_unlock(&map_tree->map_tree.lock);
2304 if (!em)
2305 return 1;
2306
2307 map = (struct map_lookup *)em->bdev;
2308 for (i = 0; i < map->num_stripes; i++) {
2309 if (!map->stripes[i].dev->writeable) {
2310 readonly = 1;
2311 break;
2312 }
2313 }
1832 free_extent_map(em); 2314 free_extent_map(em);
1833 return ret; 2315 return readonly;
1834} 2316}
1835 2317
1836void btrfs_mapping_init(struct btrfs_mapping_tree *tree) 2318void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
@@ -2227,6 +2709,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
2227 } 2709 }
2228 bio->bi_sector = multi->stripes[dev_nr].physical >> 9; 2710 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2229 dev = multi->stripes[dev_nr].dev; 2711 dev = multi->stripes[dev_nr].dev;
2712 BUG_ON(rw == WRITE && !dev->writeable);
2230 if (dev && dev->bdev) { 2713 if (dev && dev->bdev) {
2231 bio->bi_bdev = dev->bdev; 2714 bio->bi_bdev = dev->bdev;
2232 if (async_submit) 2715 if (async_submit)
@@ -2246,11 +2729,23 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
2246} 2729}
2247 2730
2248struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, 2731struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
2249 u8 *uuid) 2732 u8 *uuid, u8 *fsid)
2250{ 2733{
2251 struct list_head *head = &root->fs_info->fs_devices->devices; 2734 struct btrfs_device *device;
2252 2735 struct btrfs_fs_devices *cur_devices;
2253 return __find_device(head, devid, uuid); 2736
2737 cur_devices = root->fs_info->fs_devices;
2738 while (cur_devices) {
2739 if (!fsid ||
2740 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2741 device = __find_device(&cur_devices->devices,
2742 devid, uuid);
2743 if (device)
2744 return device;
2745 }
2746 cur_devices = cur_devices->seed;
2747 }
2748 return NULL;
2254} 2749}
2255 2750
2256static struct btrfs_device *add_missing_dev(struct btrfs_root *root, 2751static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
@@ -2262,8 +2757,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2262 device = kzalloc(sizeof(*device), GFP_NOFS); 2757 device = kzalloc(sizeof(*device), GFP_NOFS);
2263 list_add(&device->dev_list, 2758 list_add(&device->dev_list,
2264 &fs_devices->devices); 2759 &fs_devices->devices);
2265 list_add(&device->dev_alloc_list,
2266 &fs_devices->alloc_list);
2267 device->barriers = 1; 2760 device->barriers = 1;
2268 device->dev_root = root->fs_info->dev_root; 2761 device->dev_root = root->fs_info->dev_root;
2269 device->devid = devid; 2762 device->devid = devid;
@@ -2274,7 +2767,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2274 return device; 2767 return device;
2275} 2768}
2276 2769
2277
2278static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, 2770static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2279 struct extent_buffer *leaf, 2771 struct extent_buffer *leaf,
2280 struct btrfs_chunk *chunk) 2772 struct btrfs_chunk *chunk)
@@ -2339,8 +2831,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2339 read_extent_buffer(leaf, uuid, (unsigned long) 2831 read_extent_buffer(leaf, uuid, (unsigned long)
2340 btrfs_stripe_dev_uuid_nr(chunk, i), 2832 btrfs_stripe_dev_uuid_nr(chunk, i),
2341 BTRFS_UUID_SIZE); 2833 BTRFS_UUID_SIZE);
2342 map->stripes[i].dev = btrfs_find_device(root, devid, uuid); 2834 map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
2343 2835 NULL);
2344 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { 2836 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
2345 kfree(map); 2837 kfree(map);
2346 free_extent_map(em); 2838 free_extent_map(em);
@@ -2387,6 +2879,50 @@ static int fill_device_from_item(struct extent_buffer *leaf,
2387 return 0; 2879 return 0;
2388} 2880}
2389 2881
2882static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
2883{
2884 struct btrfs_fs_devices *fs_devices;
2885 int ret;
2886
2887 mutex_lock(&uuid_mutex);
2888
2889 fs_devices = root->fs_info->fs_devices->seed;
2890 while (fs_devices) {
2891 if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
2892 ret = 0;
2893 goto out;
2894 }
2895 fs_devices = fs_devices->seed;
2896 }
2897
2898 fs_devices = find_fsid(fsid);
2899 if (!fs_devices) {
2900 ret = -ENOENT;
2901 goto out;
2902 }
2903 if (fs_devices->opened) {
2904 ret = -EBUSY;
2905 goto out;
2906 }
2907
2908 ret = __btrfs_open_devices(fs_devices, root->fs_info->bdev_holder);
2909 if (ret)
2910 goto out;
2911
2912 if (!fs_devices->seeding) {
2913 __btrfs_close_devices(fs_devices);
2914 ret = -EINVAL;
2915 goto out;
2916 }
2917
2918 fs_devices->seed = root->fs_info->fs_devices->seed;
2919 root->fs_info->fs_devices->seed = fs_devices;
2920 fs_devices->sprouted = 1;
2921out:
2922 mutex_unlock(&uuid_mutex);
2923 return ret;
2924}
2925
2390static int read_one_dev(struct btrfs_root *root, 2926static int read_one_dev(struct btrfs_root *root,
2391 struct extent_buffer *leaf, 2927 struct extent_buffer *leaf,
2392 struct btrfs_dev_item *dev_item) 2928 struct btrfs_dev_item *dev_item)
@@ -2394,23 +2930,50 @@ static int read_one_dev(struct btrfs_root *root,
2394 struct btrfs_device *device; 2930 struct btrfs_device *device;
2395 u64 devid; 2931 u64 devid;
2396 int ret; 2932 int ret;
2933 int seed_devices = 0;
2934 u8 fs_uuid[BTRFS_UUID_SIZE];
2397 u8 dev_uuid[BTRFS_UUID_SIZE]; 2935 u8 dev_uuid[BTRFS_UUID_SIZE];
2398 2936
2399 devid = btrfs_device_id(leaf, dev_item); 2937 devid = btrfs_device_id(leaf, dev_item);
2400 read_extent_buffer(leaf, dev_uuid, 2938 read_extent_buffer(leaf, dev_uuid,
2401 (unsigned long)btrfs_device_uuid(dev_item), 2939 (unsigned long)btrfs_device_uuid(dev_item),
2402 BTRFS_UUID_SIZE); 2940 BTRFS_UUID_SIZE);
2403 device = btrfs_find_device(root, devid, dev_uuid); 2941 read_extent_buffer(leaf, fs_uuid,
2404 if (!device) { 2942 (unsigned long)btrfs_device_fsid(dev_item),
2405 printk("warning devid %Lu missing\n", devid); 2943 BTRFS_UUID_SIZE);
2406 device = add_missing_dev(root, devid, dev_uuid); 2944
2407 if (!device) 2945 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
2408 return -ENOMEM; 2946 ret = open_seed_devices(root, fs_uuid);
2947 if (ret)
2948 return ret;
2949 seed_devices = 1;
2950 }
2951
2952 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
2953 if (!device || !device->bdev) {
2954 if (!btrfs_test_opt(root, DEGRADED) || seed_devices)
2955 return -EIO;
2956
2957 if (!device) {
2958 printk("warning devid %Lu missing\n", devid);
2959 device = add_missing_dev(root, devid, dev_uuid);
2960 if (!device)
2961 return -ENOMEM;
2962 }
2963 }
2964
2965 if (device->fs_devices != root->fs_info->fs_devices) {
2966 BUG_ON(device->writeable);
2967 if (device->generation !=
2968 btrfs_device_generation(leaf, dev_item))
2969 return -EINVAL;
2409 } 2970 }
2410 2971
2411 fill_device_from_item(leaf, dev_item, device); 2972 fill_device_from_item(leaf, dev_item, device);
2412 device->dev_root = root->fs_info->dev_root; 2973 device->dev_root = root->fs_info->dev_root;
2413 device->in_fs_metadata = 1; 2974 device->in_fs_metadata = 1;
2975 if (device->writeable)
2976 device->fs_devices->total_rw_bytes += device->total_bytes;
2414 ret = 0; 2977 ret = 0;
2415#if 0 2978#if 0
2416 ret = btrfs_open_device(device); 2979 ret = btrfs_open_device(device);
@@ -2528,12 +3091,15 @@ again:
2528 dev_item = btrfs_item_ptr(leaf, slot, 3091 dev_item = btrfs_item_ptr(leaf, slot,
2529 struct btrfs_dev_item); 3092 struct btrfs_dev_item);
2530 ret = read_one_dev(root, leaf, dev_item); 3093 ret = read_one_dev(root, leaf, dev_item);
2531 BUG_ON(ret); 3094 if (ret)
3095 goto error;
2532 } 3096 }
2533 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { 3097 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
2534 struct btrfs_chunk *chunk; 3098 struct btrfs_chunk *chunk;
2535 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 3099 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2536 ret = read_one_chunk(root, &found_key, leaf, chunk); 3100 ret = read_one_chunk(root, &found_key, leaf, chunk);
3101 if (ret)
3102 goto error;
2537 } 3103 }
2538 path->slots[0]++; 3104 path->slots[0]++;
2539 } 3105 }
@@ -2542,9 +3108,8 @@ again:
2542 btrfs_release_path(root, path); 3108 btrfs_release_path(root, path);
2543 goto again; 3109 goto again;
2544 } 3110 }
2545
2546 btrfs_free_path(path);
2547 ret = 0; 3111 ret = 0;
2548error: 3112error:
3113 btrfs_free_path(path);
2549 return ret; 3114 return ret;
2550} 3115}