diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-04-25 16:53:30 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:02 -0400 |
commit | 8f18cf13396caae5a3d7ae91201cfb15181a9642 (patch) | |
tree | ff4bfc2e7f45f10d176b969408cdb469ae3f7194 /fs/btrfs/volumes.c | |
parent | 5e478dc9828ad33d7b08dcdf277e13f14a7c1be7 (diff) |
Btrfs: Make the resizer work based on shrinking and growing devices
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 324 |
1 files changed, 312 insertions, 12 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c63a982e31d0..a2c56de1548a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -77,7 +77,7 @@ static struct btrfs_device *__find_device(struct list_head *head, u64 devid, | |||
77 | list_for_each(cur, head) { | 77 | list_for_each(cur, head) { |
78 | dev = list_entry(cur, struct btrfs_device, dev_list); | 78 | dev = list_entry(cur, struct btrfs_device, dev_list); |
79 | if (dev->devid == devid && | 79 | if (dev->devid == devid && |
80 | !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE)) { | 80 | (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { |
81 | return dev; | 81 | return dev; |
82 | } | 82 | } |
83 | } | 83 | } |
@@ -293,6 +293,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
293 | * so we make sure to start at an offset of at least 1MB | 293 | * so we make sure to start at an offset of at least 1MB |
294 | */ | 294 | */ |
295 | search_start = max((u64)1024 * 1024, search_start); | 295 | search_start = max((u64)1024 * 1024, search_start); |
296 | |||
297 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | ||
298 | search_start = max(root->fs_info->alloc_start, search_start); | ||
299 | |||
296 | key.objectid = device->devid; | 300 | key.objectid = device->devid; |
297 | key.offset = search_start; | 301 | key.offset = search_start; |
298 | key.type = BTRFS_DEV_EXTENT_KEY; | 302 | key.type = BTRFS_DEV_EXTENT_KEY; |
@@ -380,6 +384,33 @@ error: | |||
380 | return ret; | 384 | return ret; |
381 | } | 385 | } |
382 | 386 | ||
387 | int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | ||
388 | struct btrfs_device *device, | ||
389 | u64 start) | ||
390 | { | ||
391 | int ret; | ||
392 | struct btrfs_path *path; | ||
393 | struct btrfs_root *root = device->dev_root; | ||
394 | struct btrfs_key key; | ||
395 | |||
396 | path = btrfs_alloc_path(); | ||
397 | if (!path) | ||
398 | return -ENOMEM; | ||
399 | |||
400 | key.objectid = device->devid; | ||
401 | key.offset = start; | ||
402 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
403 | |||
404 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
405 | BUG_ON(ret); | ||
406 | |||
407 | ret = btrfs_del_item(trans, root, path); | ||
408 | BUG_ON(ret); | ||
409 | |||
410 | btrfs_free_path(path); | ||
411 | return ret; | ||
412 | } | ||
413 | |||
383 | int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | 414 | int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, |
384 | struct btrfs_device *device, | 415 | struct btrfs_device *device, |
385 | u64 chunk_tree, u64 chunk_objectid, | 416 | u64 chunk_tree, u64 chunk_objectid, |
@@ -560,6 +591,7 @@ out: | |||
560 | btrfs_free_path(path); | 591 | btrfs_free_path(path); |
561 | return ret; | 592 | return ret; |
562 | } | 593 | } |
594 | |||
563 | int btrfs_update_device(struct btrfs_trans_handle *trans, | 595 | int btrfs_update_device(struct btrfs_trans_handle *trans, |
564 | struct btrfs_device *device) | 596 | struct btrfs_device *device) |
565 | { | 597 | { |
@@ -606,6 +638,254 @@ out: | |||
606 | return ret; | 638 | return ret; |
607 | } | 639 | } |
608 | 640 | ||
641 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | ||
642 | struct btrfs_device *device, u64 new_size) | ||
643 | { | ||
644 | struct btrfs_super_block *super_copy = | ||
645 | &device->dev_root->fs_info->super_copy; | ||
646 | u64 old_total = btrfs_super_total_bytes(super_copy); | ||
647 | u64 diff = new_size - device->total_bytes; | ||
648 | |||
649 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | ||
650 | return btrfs_update_device(trans, device); | ||
651 | } | ||
652 | |||
653 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | ||
654 | struct btrfs_root *root, | ||
655 | u64 chunk_tree, u64 chunk_objectid, | ||
656 | u64 chunk_offset) | ||
657 | { | ||
658 | int ret; | ||
659 | struct btrfs_path *path; | ||
660 | struct btrfs_key key; | ||
661 | |||
662 | root = root->fs_info->chunk_root; | ||
663 | path = btrfs_alloc_path(); | ||
664 | if (!path) | ||
665 | return -ENOMEM; | ||
666 | |||
667 | key.objectid = chunk_objectid; | ||
668 | key.offset = chunk_offset; | ||
669 | key.type = BTRFS_CHUNK_ITEM_KEY; | ||
670 | |||
671 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | ||
672 | BUG_ON(ret); | ||
673 | |||
674 | ret = btrfs_del_item(trans, root, path); | ||
675 | BUG_ON(ret); | ||
676 | |||
677 | btrfs_free_path(path); | ||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | ||
682 | chunk_offset) | ||
683 | { | ||
684 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | ||
685 | struct btrfs_disk_key *disk_key; | ||
686 | struct btrfs_chunk *chunk; | ||
687 | u8 *ptr; | ||
688 | int ret = 0; | ||
689 | u32 num_stripes; | ||
690 | u32 array_size; | ||
691 | u32 len = 0; | ||
692 | u32 cur; | ||
693 | struct btrfs_key key; | ||
694 | |||
695 | array_size = btrfs_super_sys_array_size(super_copy); | ||
696 | |||
697 | ptr = super_copy->sys_chunk_array; | ||
698 | cur = 0; | ||
699 | |||
700 | while (cur < array_size) { | ||
701 | disk_key = (struct btrfs_disk_key *)ptr; | ||
702 | btrfs_disk_key_to_cpu(&key, disk_key); | ||
703 | |||
704 | len = sizeof(*disk_key); | ||
705 | |||
706 | if (key.type == BTRFS_CHUNK_ITEM_KEY) { | ||
707 | chunk = (struct btrfs_chunk *)(ptr + len); | ||
708 | num_stripes = btrfs_stack_chunk_num_stripes(chunk); | ||
709 | len += btrfs_chunk_item_size(num_stripes); | ||
710 | } else { | ||
711 | ret = -EIO; | ||
712 | break; | ||
713 | } | ||
714 | if (key.objectid == chunk_objectid && | ||
715 | key.offset == chunk_offset) { | ||
716 | memmove(ptr, ptr + len, array_size - (cur + len)); | ||
717 | array_size -= len; | ||
718 | btrfs_set_super_sys_array_size(super_copy, array_size); | ||
719 | } else { | ||
720 | ptr += len; | ||
721 | cur += len; | ||
722 | } | ||
723 | } | ||
724 | return ret; | ||
725 | } | ||
726 | |||
727 | |||
728 | int btrfs_relocate_chunk(struct btrfs_root *root, | ||
729 | u64 chunk_tree, u64 chunk_objectid, | ||
730 | u64 chunk_offset) | ||
731 | { | ||
732 | struct extent_map_tree *em_tree; | ||
733 | struct btrfs_root *extent_root; | ||
734 | struct btrfs_trans_handle *trans; | ||
735 | struct extent_map *em; | ||
736 | struct map_lookup *map; | ||
737 | int ret; | ||
738 | int i; | ||
739 | |||
740 | root = root->fs_info->chunk_root; | ||
741 | extent_root = root->fs_info->extent_root; | ||
742 | em_tree = &root->fs_info->mapping_tree.map_tree; | ||
743 | |||
744 | /* step one, relocate all the extents inside this chunk */ | ||
745 | ret = btrfs_shrink_extent_tree(extent_root, chunk_offset); | ||
746 | BUG_ON(ret); | ||
747 | |||
748 | trans = btrfs_start_transaction(root, 1); | ||
749 | BUG_ON(!trans); | ||
750 | |||
751 | /* | ||
752 | * step two, delete the device extents and the | ||
753 | * chunk tree entries | ||
754 | */ | ||
755 | spin_lock(&em_tree->lock); | ||
756 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | ||
757 | spin_unlock(&em_tree->lock); | ||
758 | |||
759 | BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); | ||
760 | map = (struct map_lookup *)em->bdev; | ||
761 | |||
762 | for (i = 0; i < map->num_stripes; i++) { | ||
763 | ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, | ||
764 | map->stripes[i].physical); | ||
765 | BUG_ON(ret); | ||
766 | } | ||
767 | ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, | ||
768 | chunk_offset); | ||
769 | |||
770 | BUG_ON(ret); | ||
771 | |||
772 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | ||
773 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | ||
774 | BUG_ON(ret); | ||
775 | goto out; | ||
776 | } | ||
777 | |||
778 | |||
779 | |||
780 | spin_lock(&em_tree->lock); | ||
781 | remove_extent_mapping(em_tree, em); | ||
782 | kfree(map); | ||
783 | em->bdev = NULL; | ||
784 | |||
785 | /* once for the tree */ | ||
786 | free_extent_map(em); | ||
787 | spin_unlock(&em_tree->lock); | ||
788 | |||
789 | out: | ||
790 | /* once for us */ | ||
791 | free_extent_map(em); | ||
792 | |||
793 | btrfs_end_transaction(trans, root); | ||
794 | return 0; | ||
795 | } | ||
796 | |||
797 | /* | ||
798 | * shrinking a device means finding all of the device extents past | ||
799 | * the new size, and then following the back refs to the chunks. | ||
800 | * The chunk relocation code actually frees the device extent | ||
801 | */ | ||
802 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | ||
803 | { | ||
804 | struct btrfs_trans_handle *trans; | ||
805 | struct btrfs_root *root = device->dev_root; | ||
806 | struct btrfs_dev_extent *dev_extent = NULL; | ||
807 | struct btrfs_path *path; | ||
808 | u64 length; | ||
809 | u64 chunk_tree; | ||
810 | u64 chunk_objectid; | ||
811 | u64 chunk_offset; | ||
812 | int ret; | ||
813 | int slot; | ||
814 | struct extent_buffer *l; | ||
815 | struct btrfs_key key; | ||
816 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | ||
817 | u64 old_total = btrfs_super_total_bytes(super_copy); | ||
818 | u64 diff = device->total_bytes - new_size; | ||
819 | |||
820 | |||
821 | path = btrfs_alloc_path(); | ||
822 | if (!path) | ||
823 | return -ENOMEM; | ||
824 | |||
825 | trans = btrfs_start_transaction(root, 1); | ||
826 | if (!trans) { | ||
827 | ret = -ENOMEM; | ||
828 | goto done; | ||
829 | } | ||
830 | |||
831 | path->reada = 2; | ||
832 | |||
833 | device->total_bytes = new_size; | ||
834 | ret = btrfs_update_device(trans, device); | ||
835 | if (ret) { | ||
836 | btrfs_end_transaction(trans, root); | ||
837 | goto done; | ||
838 | } | ||
839 | WARN_ON(diff > old_total); | ||
840 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
841 | btrfs_end_transaction(trans, root); | ||
842 | |||
843 | key.objectid = device->devid; | ||
844 | key.offset = (u64)-1; | ||
845 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
846 | |||
847 | while (1) { | ||
848 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
849 | if (ret < 0) | ||
850 | goto done; | ||
851 | |||
852 | ret = btrfs_previous_item(root, path, 0, key.type); | ||
853 | if (ret < 0) | ||
854 | goto done; | ||
855 | if (ret) { | ||
856 | ret = 0; | ||
857 | goto done; | ||
858 | } | ||
859 | |||
860 | l = path->nodes[0]; | ||
861 | slot = path->slots[0]; | ||
862 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
863 | |||
864 | if (key.objectid != device->devid) | ||
865 | goto done; | ||
866 | |||
867 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
868 | length = btrfs_dev_extent_length(l, dev_extent); | ||
869 | |||
870 | if (key.offset + length <= new_size) | ||
871 | goto done; | ||
872 | |||
873 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | ||
874 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | ||
875 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); | ||
876 | btrfs_release_path(root, path); | ||
877 | |||
878 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | ||
879 | chunk_offset); | ||
880 | if (ret) | ||
881 | goto done; | ||
882 | } | ||
883 | |||
884 | done: | ||
885 | btrfs_free_path(path); | ||
886 | return ret; | ||
887 | } | ||
888 | |||
609 | int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, | 889 | int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, |
610 | struct btrfs_root *root, | 890 | struct btrfs_root *root, |
611 | struct btrfs_key *key, | 891 | struct btrfs_key *key, |
@@ -658,6 +938,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
658 | u64 dev_offset; | 938 | u64 dev_offset; |
659 | struct btrfs_fs_info *info = extent_root->fs_info; | 939 | struct btrfs_fs_info *info = extent_root->fs_info; |
660 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; | 940 | struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; |
941 | struct btrfs_path *path; | ||
661 | struct btrfs_stripe *stripes; | 942 | struct btrfs_stripe *stripes; |
662 | struct btrfs_device *device = NULL; | 943 | struct btrfs_device *device = NULL; |
663 | struct btrfs_chunk *chunk; | 944 | struct btrfs_chunk *chunk; |
@@ -724,6 +1005,10 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
724 | min_stripe_size = 1 * 1024 * 1024; | 1005 | min_stripe_size = 1 * 1024 * 1024; |
725 | } | 1006 | } |
726 | 1007 | ||
1008 | path = btrfs_alloc_path(); | ||
1009 | if (!path) | ||
1010 | return -ENOMEM; | ||
1011 | |||
727 | /* we don't want a chunk larger than 10% of the FS */ | 1012 | /* we don't want a chunk larger than 10% of the FS */ |
728 | percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); | 1013 | percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); |
729 | max_chunk_size = min(percent_max, max_chunk_size); | 1014 | max_chunk_size = min(percent_max, max_chunk_size); |
@@ -759,11 +1044,19 @@ again: | |||
759 | 1044 | ||
760 | avail = device->total_bytes - device->bytes_used; | 1045 | avail = device->total_bytes - device->bytes_used; |
761 | cur = cur->next; | 1046 | cur = cur->next; |
1047 | |||
762 | if (avail >= min_free) { | 1048 | if (avail >= min_free) { |
763 | list_move_tail(&device->dev_alloc_list, &private_devs); | 1049 | u64 ignored_start = 0; |
764 | index++; | 1050 | ret = find_free_dev_extent(trans, device, path, |
765 | if (type & BTRFS_BLOCK_GROUP_DUP) | 1051 | min_free, |
1052 | &ignored_start); | ||
1053 | if (ret == 0) { | ||
1054 | list_move_tail(&device->dev_alloc_list, | ||
1055 | &private_devs); | ||
766 | index++; | 1056 | index++; |
1057 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
1058 | index++; | ||
1059 | } | ||
767 | } else if (avail > max_avail) | 1060 | } else if (avail > max_avail) |
768 | max_avail = avail; | 1061 | max_avail = avail; |
769 | if (cur == dev_list) | 1062 | if (cur == dev_list) |
@@ -785,30 +1078,37 @@ again: | |||
785 | calc_size = max_avail; | 1078 | calc_size = max_avail; |
786 | goto again; | 1079 | goto again; |
787 | } | 1080 | } |
1081 | btrfs_free_path(path); | ||
788 | return -ENOSPC; | 1082 | return -ENOSPC; |
789 | } | 1083 | } |
790 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 1084 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
791 | key.type = BTRFS_CHUNK_ITEM_KEY; | 1085 | key.type = BTRFS_CHUNK_ITEM_KEY; |
792 | ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 1086 | ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
793 | &key.offset); | 1087 | &key.offset); |
794 | if (ret) | 1088 | if (ret) { |
1089 | btrfs_free_path(path); | ||
795 | return ret; | 1090 | return ret; |
1091 | } | ||
796 | 1092 | ||
797 | chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); | 1093 | chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); |
798 | if (!chunk) | 1094 | if (!chunk) { |
1095 | btrfs_free_path(path); | ||
799 | return -ENOMEM; | 1096 | return -ENOMEM; |
1097 | } | ||
800 | 1098 | ||
801 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | 1099 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); |
802 | if (!map) { | 1100 | if (!map) { |
803 | kfree(chunk); | 1101 | kfree(chunk); |
1102 | btrfs_free_path(path); | ||
804 | return -ENOMEM; | 1103 | return -ENOMEM; |
805 | } | 1104 | } |
1105 | btrfs_free_path(path); | ||
1106 | path = NULL; | ||
806 | 1107 | ||
807 | stripes = &chunk->stripe; | 1108 | stripes = &chunk->stripe; |
808 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 1109 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
809 | num_stripes, sub_stripes); | 1110 | num_stripes, sub_stripes); |
810 | 1111 | ||
811 | |||
812 | index = 0; | 1112 | index = 0; |
813 | printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); | 1113 | printk("new chunk type %Lu start %Lu size %Lu\n", type, key.offset, *num_bytes); |
814 | while(index < num_stripes) { | 1114 | while(index < num_stripes) { |
@@ -874,6 +1174,11 @@ printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.offset, cal | |||
874 | em->len = *num_bytes; | 1174 | em->len = *num_bytes; |
875 | em->block_start = 0; | 1175 | em->block_start = 0; |
876 | 1176 | ||
1177 | if (type & BTRFS_BLOCK_GROUP_SYSTEM) { | ||
1178 | ret = btrfs_add_system_chunk(trans, chunk_root, &key, | ||
1179 | chunk, btrfs_chunk_item_size(num_stripes)); | ||
1180 | BUG_ON(ret); | ||
1181 | } | ||
877 | kfree(chunk); | 1182 | kfree(chunk); |
878 | 1183 | ||
879 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 1184 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
@@ -1376,11 +1681,6 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
1376 | 1681 | ||
1377 | array_size = btrfs_super_sys_array_size(super_copy); | 1682 | array_size = btrfs_super_sys_array_size(super_copy); |
1378 | 1683 | ||
1379 | /* | ||
1380 | * we do this loop twice, once for the device items and | ||
1381 | * once for all of the chunks. This way there are device | ||
1382 | * structs filled in for every chunk | ||
1383 | */ | ||
1384 | ptr = super_copy->sys_chunk_array; | 1684 | ptr = super_copy->sys_chunk_array; |
1385 | sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); | 1685 | sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); |
1386 | cur = 0; | 1686 | cur = 0; |