aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2012-01-16 15:26:02 -0500
committerChris Mason <chris.mason@oracle.com>2012-01-16 15:26:02 -0500
commit27263e28321db438bc43dc0c0be432ce91526224 (patch)
treeb7571dd52d6c1d48528e383d273b330da94439bd /fs/btrfs
parent64e05503ab5c73b2ffb8d55d2f7aab74f34fc691 (diff)
parent19a39dce3b9bf0244d19a446718ad6f7605ff099 (diff)
Merge branch 'restriper' of git://github.com/idryomov/btrfs-unstable into integration
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h205
-rw-r--r--fs/btrfs/disk-io.c18
-rw-r--r--fs/btrfs/extent-tree.c128
-rw-r--r--fs/btrfs/ioctl.c226
-rw-r--r--fs/btrfs/ioctl.h54
-rw-r--r--fs/btrfs/super.c11
-rw-r--r--fs/btrfs/volumes.c790
-rw-r--r--fs/btrfs/volumes.h51
8 files changed, 1385 insertions, 98 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 67385033323d..dfc136cc07d7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -86,6 +86,9 @@ struct btrfs_ordered_sum;
86/* holds checksums of all the data extents */ 86/* holds checksums of all the data extents */
87#define BTRFS_CSUM_TREE_OBJECTID 7ULL 87#define BTRFS_CSUM_TREE_OBJECTID 7ULL
88 88
89/* for storing balance parameters in the root tree */
90#define BTRFS_BALANCE_OBJECTID -4ULL
91
89/* orhpan objectid for tracking unlinked/truncated files */ 92/* orhpan objectid for tracking unlinked/truncated files */
90#define BTRFS_ORPHAN_OBJECTID -5ULL 93#define BTRFS_ORPHAN_OBJECTID -5ULL
91 94
@@ -692,6 +695,54 @@ struct btrfs_root_ref {
692 __le16 name_len; 695 __le16 name_len;
693} __attribute__ ((__packed__)); 696} __attribute__ ((__packed__));
694 697
698struct btrfs_disk_balance_args {
699 /*
700 * profiles to operate on, single is denoted by
701 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
702 */
703 __le64 profiles;
704
705 /* usage filter */
706 __le64 usage;
707
708 /* devid filter */
709 __le64 devid;
710
711 /* devid subset filter [pstart..pend) */
712 __le64 pstart;
713 __le64 pend;
714
715 /* btrfs virtual address space subset filter [vstart..vend) */
716 __le64 vstart;
717 __le64 vend;
718
719 /*
720 * profile to convert to, single is denoted by
721 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
722 */
723 __le64 target;
724
725 /* BTRFS_BALANCE_ARGS_* */
726 __le64 flags;
727
728 __le64 unused[8];
729} __attribute__ ((__packed__));
730
731/*
732 * store balance parameters to disk so that balance can be properly
733 * resumed after crash or unmount
734 */
735struct btrfs_balance_item {
736 /* BTRFS_BALANCE_* */
737 __le64 flags;
738
739 struct btrfs_disk_balance_args data;
740 struct btrfs_disk_balance_args meta;
741 struct btrfs_disk_balance_args sys;
742
743 __le64 unused[4];
744} __attribute__ ((__packed__));
745
695#define BTRFS_FILE_EXTENT_INLINE 0 746#define BTRFS_FILE_EXTENT_INLINE 0
696#define BTRFS_FILE_EXTENT_REG 1 747#define BTRFS_FILE_EXTENT_REG 1
697#define BTRFS_FILE_EXTENT_PREALLOC 2 748#define BTRFS_FILE_EXTENT_PREALLOC 2
@@ -751,14 +802,32 @@ struct btrfs_csum_item {
751} __attribute__ ((__packed__)); 802} __attribute__ ((__packed__));
752 803
753/* different types of block groups (and chunks) */ 804/* different types of block groups (and chunks) */
754#define BTRFS_BLOCK_GROUP_DATA (1 << 0) 805#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
755#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) 806#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
756#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) 807#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
757#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3) 808#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
758#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) 809#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
759#define BTRFS_BLOCK_GROUP_DUP (1 << 5) 810#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
760#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) 811#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
761#define BTRFS_NR_RAID_TYPES 5 812#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
813#define BTRFS_NR_RAID_TYPES 5
814
815#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
816 BTRFS_BLOCK_GROUP_SYSTEM | \
817 BTRFS_BLOCK_GROUP_METADATA)
818
819#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
820 BTRFS_BLOCK_GROUP_RAID1 | \
821 BTRFS_BLOCK_GROUP_DUP | \
822 BTRFS_BLOCK_GROUP_RAID10)
823/*
824 * We need a bit for restriper to be able to tell when chunks of type
825 * SINGLE are available. This "extended" profile format is used in
826 * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
827 * (on-disk). The corresponding on-disk bit in chunk.type is reserved
828 * to avoid remappings between two formats in future.
829 */
830#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
762 831
763struct btrfs_block_group_item { 832struct btrfs_block_group_item {
764 __le64 used; 833 __le64 used;
@@ -916,6 +985,7 @@ struct btrfs_block_group_cache {
916struct reloc_control; 985struct reloc_control;
917struct btrfs_device; 986struct btrfs_device;
918struct btrfs_fs_devices; 987struct btrfs_fs_devices;
988struct btrfs_balance_control;
919struct btrfs_delayed_root; 989struct btrfs_delayed_root;
920struct btrfs_fs_info { 990struct btrfs_fs_info {
921 u8 fsid[BTRFS_FSID_SIZE]; 991 u8 fsid[BTRFS_FSID_SIZE];
@@ -1132,12 +1202,23 @@ struct btrfs_fs_info {
1132 spinlock_t ref_cache_lock; 1202 spinlock_t ref_cache_lock;
1133 u64 total_ref_cache_size; 1203 u64 total_ref_cache_size;
1134 1204
1205 /*
1206 * these three are in extended format (availability of single
1207 * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
1208 * types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
1209 */
1135 u64 avail_data_alloc_bits; 1210 u64 avail_data_alloc_bits;
1136 u64 avail_metadata_alloc_bits; 1211 u64 avail_metadata_alloc_bits;
1137 u64 avail_system_alloc_bits; 1212 u64 avail_system_alloc_bits;
1138 u64 data_alloc_profile; 1213
1139 u64 metadata_alloc_profile; 1214 /* restriper state */
1140 u64 system_alloc_profile; 1215 spinlock_t balance_lock;
1216 struct mutex balance_mutex;
1217 atomic_t balance_running;
1218 atomic_t balance_pause_req;
1219 atomic_t balance_cancel_req;
1220 struct btrfs_balance_control *balance_ctl;
1221 wait_queue_head_t balance_wait_q;
1141 1222
1142 unsigned data_chunk_allocations; 1223 unsigned data_chunk_allocations;
1143 unsigned metadata_ratio; 1224 unsigned metadata_ratio;
@@ -1383,6 +1464,8 @@ struct btrfs_ioctl_defrag_range_args {
1383#define BTRFS_DEV_ITEM_KEY 216 1464#define BTRFS_DEV_ITEM_KEY 216
1384#define BTRFS_CHUNK_ITEM_KEY 228 1465#define BTRFS_CHUNK_ITEM_KEY 228
1385 1466
1467#define BTRFS_BALANCE_ITEM_KEY 248
1468
1386/* 1469/*
1387 * string items are for debugging. They just store a short string of 1470 * string items are for debugging. They just store a short string of
1388 * data in the FS 1471 * data in the FS
@@ -1413,6 +1496,7 @@ struct btrfs_ioctl_defrag_range_args {
1413#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) 1496#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1414#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) 1497#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
1415#define BTRFS_MOUNT_RECOVERY (1 << 18) 1498#define BTRFS_MOUNT_RECOVERY (1 << 18)
1499#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
1416 1500
1417#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1501#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1418#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1502#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2077,8 +2161,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
2077BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, 2161BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
2078 num_devices, 64); 2162 num_devices, 64);
2079 2163
2080/* struct btrfs_super_block */ 2164/* struct btrfs_balance_item */
2165BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
2166
2167static inline void btrfs_balance_data(struct extent_buffer *eb,
2168 struct btrfs_balance_item *bi,
2169 struct btrfs_disk_balance_args *ba)
2170{
2171 read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
2172}
2173
2174static inline void btrfs_set_balance_data(struct extent_buffer *eb,
2175 struct btrfs_balance_item *bi,
2176 struct btrfs_disk_balance_args *ba)
2177{
2178 write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
2179}
2180
2181static inline void btrfs_balance_meta(struct extent_buffer *eb,
2182 struct btrfs_balance_item *bi,
2183 struct btrfs_disk_balance_args *ba)
2184{
2185 read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
2186}
2081 2187
2188static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
2189 struct btrfs_balance_item *bi,
2190 struct btrfs_disk_balance_args *ba)
2191{
2192 write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
2193}
2194
2195static inline void btrfs_balance_sys(struct extent_buffer *eb,
2196 struct btrfs_balance_item *bi,
2197 struct btrfs_disk_balance_args *ba)
2198{
2199 read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
2200}
2201
2202static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
2203 struct btrfs_balance_item *bi,
2204 struct btrfs_disk_balance_args *ba)
2205{
2206 write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
2207}
2208
2209static inline void
2210btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
2211 struct btrfs_disk_balance_args *disk)
2212{
2213 memset(cpu, 0, sizeof(*cpu));
2214
2215 cpu->profiles = le64_to_cpu(disk->profiles);
2216 cpu->usage = le64_to_cpu(disk->usage);
2217 cpu->devid = le64_to_cpu(disk->devid);
2218 cpu->pstart = le64_to_cpu(disk->pstart);
2219 cpu->pend = le64_to_cpu(disk->pend);
2220 cpu->vstart = le64_to_cpu(disk->vstart);
2221 cpu->vend = le64_to_cpu(disk->vend);
2222 cpu->target = le64_to_cpu(disk->target);
2223 cpu->flags = le64_to_cpu(disk->flags);
2224}
2225
2226static inline void
2227btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
2228 struct btrfs_balance_args *cpu)
2229{
2230 memset(disk, 0, sizeof(*disk));
2231
2232 disk->profiles = cpu_to_le64(cpu->profiles);
2233 disk->usage = cpu_to_le64(cpu->usage);
2234 disk->devid = cpu_to_le64(cpu->devid);
2235 disk->pstart = cpu_to_le64(cpu->pstart);
2236 disk->pend = cpu_to_le64(cpu->pend);
2237 disk->vstart = cpu_to_le64(cpu->vstart);
2238 disk->vend = cpu_to_le64(cpu->vend);
2239 disk->target = cpu_to_le64(cpu->target);
2240 disk->flags = cpu_to_le64(cpu->flags);
2241}
2242
2243/* struct btrfs_super_block */
2082BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); 2244BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
2083BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); 2245BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
2084BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, 2246BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
@@ -2500,6 +2662,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
2500} 2662}
2501static inline void free_fs_info(struct btrfs_fs_info *fs_info) 2663static inline void free_fs_info(struct btrfs_fs_info *fs_info)
2502{ 2664{
2665 kfree(fs_info->balance_ctl);
2503 kfree(fs_info->delayed_root); 2666 kfree(fs_info->delayed_root);
2504 kfree(fs_info->extent_root); 2667 kfree(fs_info->extent_root);
2505 kfree(fs_info->tree_root); 2668 kfree(fs_info->tree_root);
@@ -2510,6 +2673,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
2510 kfree(fs_info->super_for_commit); 2673 kfree(fs_info->super_for_commit);
2511 kfree(fs_info); 2674 kfree(fs_info);
2512} 2675}
2676/**
2677 * profile_is_valid - tests whether a given profile is valid and reduced
2678 * @flags: profile to validate
2679 * @extended: if true @flags is treated as an extended profile
2680 */
2681static inline int profile_is_valid(u64 flags, int extended)
2682{
2683 u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
2684
2685 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
2686 if (extended)
2687 mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2688
2689 if (flags & mask)
2690 return 0;
2691 /* true if zero or exactly one bit set */
2692 return (flags & (~flags + 1)) == flags;
2693}
2513 2694
2514/* root-item.c */ 2695/* root-item.c */
2515int btrfs_find_root_ref(struct btrfs_root *tree_root, 2696int btrfs_find_root_ref(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f44b3928dc2d..9c1a744e595b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2002,6 +2002,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2002 init_rwsem(&fs_info->scrub_super_lock); 2002 init_rwsem(&fs_info->scrub_super_lock);
2003 fs_info->scrub_workers_refcnt = 0; 2003 fs_info->scrub_workers_refcnt = 0;
2004 2004
2005 spin_lock_init(&fs_info->balance_lock);
2006 mutex_init(&fs_info->balance_mutex);
2007 atomic_set(&fs_info->balance_running, 0);
2008 atomic_set(&fs_info->balance_pause_req, 0);
2009 atomic_set(&fs_info->balance_cancel_req, 0);
2010 fs_info->balance_ctl = NULL;
2011 init_waitqueue_head(&fs_info->balance_wait_q);
2012
2005 sb->s_blocksize = 4096; 2013 sb->s_blocksize = 4096;
2006 sb->s_blocksize_bits = blksize_bits(4096); 2014 sb->s_blocksize_bits = blksize_bits(4096);
2007 sb->s_bdi = &fs_info->bdi; 2015 sb->s_bdi = &fs_info->bdi;
@@ -2321,9 +2329,6 @@ retry_root_backup:
2321 2329
2322 fs_info->generation = generation; 2330 fs_info->generation = generation;
2323 fs_info->last_trans_committed = generation; 2331 fs_info->last_trans_committed = generation;
2324 fs_info->data_alloc_profile = (u64)-1;
2325 fs_info->metadata_alloc_profile = (u64)-1;
2326 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
2327 2332
2328 ret = btrfs_init_space_info(fs_info); 2333 ret = btrfs_init_space_info(fs_info);
2329 if (ret) { 2334 if (ret) {
@@ -2426,6 +2431,10 @@ retry_root_backup:
2426 if (!err) 2431 if (!err)
2427 err = btrfs_orphan_cleanup(fs_info->tree_root); 2432 err = btrfs_orphan_cleanup(fs_info->tree_root);
2428 up_read(&fs_info->cleanup_work_sem); 2433 up_read(&fs_info->cleanup_work_sem);
2434
2435 if (!err)
2436 err = btrfs_recover_balance(fs_info->tree_root);
2437
2429 if (err) { 2438 if (err) {
2430 close_ctree(tree_root); 2439 close_ctree(tree_root);
2431 return ERR_PTR(err); 2440 return ERR_PTR(err);
@@ -2975,6 +2984,9 @@ int close_ctree(struct btrfs_root *root)
2975 fs_info->closing = 1; 2984 fs_info->closing = 1;
2976 smp_mb(); 2985 smp_mb();
2977 2986
2987 /* pause restriper - we want to resume on mount */
2988 btrfs_pause_balance(root->fs_info);
2989
2978 btrfs_scrub_cancel(root); 2990 btrfs_scrub_cancel(root);
2979 2991
2980 /* wait for any defraggers to finish */ 2992 /* wait for any defraggers to finish */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 37594e4bf660..352083ad233c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
618 struct list_head *head = &info->space_info; 618 struct list_head *head = &info->space_info;
619 struct btrfs_space_info *found; 619 struct btrfs_space_info *found;
620 620
621 flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | 621 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
622 BTRFS_BLOCK_GROUP_METADATA;
623 622
624 rcu_read_lock(); 623 rcu_read_lock();
625 list_for_each_entry_rcu(found, head, list) { 624 list_for_each_entry_rcu(found, head, list) {
@@ -2999,9 +2998,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2999 INIT_LIST_HEAD(&found->block_groups[i]); 2998 INIT_LIST_HEAD(&found->block_groups[i]);
3000 init_rwsem(&found->groups_sem); 2999 init_rwsem(&found->groups_sem);
3001 spin_lock_init(&found->lock); 3000 spin_lock_init(&found->lock);
3002 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | 3001 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
3003 BTRFS_BLOCK_GROUP_SYSTEM |
3004 BTRFS_BLOCK_GROUP_METADATA);
3005 found->total_bytes = total_bytes; 3002 found->total_bytes = total_bytes;
3006 found->disk_total = total_bytes * factor; 3003 found->disk_total = total_bytes * factor;
3007 found->bytes_used = bytes_used; 3004 found->bytes_used = bytes_used;
@@ -3022,20 +3019,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3022 3019
3023static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 3020static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3024{ 3021{
3025 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 3022 u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
3026 BTRFS_BLOCK_GROUP_RAID1 | 3023
3027 BTRFS_BLOCK_GROUP_RAID10 | 3024 /* chunk -> extended profile */
3028 BTRFS_BLOCK_GROUP_DUP); 3025 if (extra_flags == 0)
3029 if (extra_flags) { 3026 extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3030 if (flags & BTRFS_BLOCK_GROUP_DATA) 3027
3031 fs_info->avail_data_alloc_bits |= extra_flags; 3028 if (flags & BTRFS_BLOCK_GROUP_DATA)
3032 if (flags & BTRFS_BLOCK_GROUP_METADATA) 3029 fs_info->avail_data_alloc_bits |= extra_flags;
3033 fs_info->avail_metadata_alloc_bits |= extra_flags; 3030 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3034 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 3031 fs_info->avail_metadata_alloc_bits |= extra_flags;
3035 fs_info->avail_system_alloc_bits |= extra_flags; 3032 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3036 } 3033 fs_info->avail_system_alloc_bits |= extra_flags;
3037} 3034}
3038 3035
3036/*
3037 * @flags: available profiles in extended format (see ctree.h)
3038 *
3039 * Returns reduced profile in chunk format. If profile changing is in
3040 * progress (either running or paused) picks the target profile (if it's
3041 * already available), otherwise falls back to plain reducing.
3042 */
3039u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3043u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3040{ 3044{
3041 /* 3045 /*
@@ -3046,6 +3050,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3046 u64 num_devices = root->fs_info->fs_devices->rw_devices + 3050 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3047 root->fs_info->fs_devices->missing_devices; 3051 root->fs_info->fs_devices->missing_devices;
3048 3052
3053 /* pick restriper's target profile if it's available */
3054 spin_lock(&root->fs_info->balance_lock);
3055 if (root->fs_info->balance_ctl) {
3056 struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
3057 u64 tgt = 0;
3058
3059 if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
3060 (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3061 (flags & bctl->data.target)) {
3062 tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
3063 } else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
3064 (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3065 (flags & bctl->sys.target)) {
3066 tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
3067 } else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
3068 (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3069 (flags & bctl->meta.target)) {
3070 tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
3071 }
3072
3073 if (tgt) {
3074 spin_unlock(&root->fs_info->balance_lock);
3075 flags = tgt;
3076 goto out;
3077 }
3078 }
3079 spin_unlock(&root->fs_info->balance_lock);
3080
3049 if (num_devices == 1) 3081 if (num_devices == 1)
3050 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 3082 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
3051 if (num_devices < 4) 3083 if (num_devices < 4)
@@ -3065,22 +3097,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3065 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 3097 if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
3066 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 3098 ((flags & BTRFS_BLOCK_GROUP_RAID1) |
3067 (flags & BTRFS_BLOCK_GROUP_RAID10) | 3099 (flags & BTRFS_BLOCK_GROUP_RAID10) |
3068 (flags & BTRFS_BLOCK_GROUP_DUP))) 3100 (flags & BTRFS_BLOCK_GROUP_DUP))) {
3069 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 3101 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
3102 }
3103
3104out:
3105 /* extended -> chunk profile */
3106 flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
3070 return flags; 3107 return flags;
3071} 3108}
3072 3109
3073static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 3110static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
3074{ 3111{
3075 if (flags & BTRFS_BLOCK_GROUP_DATA) 3112 if (flags & BTRFS_BLOCK_GROUP_DATA)
3076 flags |= root->fs_info->avail_data_alloc_bits & 3113 flags |= root->fs_info->avail_data_alloc_bits;
3077 root->fs_info->data_alloc_profile;
3078 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 3114 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3079 flags |= root->fs_info->avail_system_alloc_bits & 3115 flags |= root->fs_info->avail_system_alloc_bits;
3080 root->fs_info->system_alloc_profile;
3081 else if (flags & BTRFS_BLOCK_GROUP_METADATA) 3116 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3082 flags |= root->fs_info->avail_metadata_alloc_bits & 3117 flags |= root->fs_info->avail_metadata_alloc_bits;
3083 root->fs_info->metadata_alloc_profile; 3118
3084 return btrfs_reduce_alloc_profile(root, flags); 3119 return btrfs_reduce_alloc_profile(root, flags);
3085} 3120}
3086 3121
@@ -3282,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3282 int wait_for_alloc = 0; 3317 int wait_for_alloc = 0;
3283 int ret = 0; 3318 int ret = 0;
3284 3319
3285 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3320 BUG_ON(!profile_is_valid(flags, 0));
3286 3321
3287 space_info = __find_space_info(extent_root->fs_info, flags); 3322 space_info = __find_space_info(extent_root->fs_info, flags);
3288 if (!space_info) { 3323 if (!space_info) {
@@ -6792,6 +6827,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
6792 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6827 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
6793 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6828 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
6794 6829
6830 if (root->fs_info->balance_ctl) {
6831 struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
6832 u64 tgt = 0;
6833
6834 /* pick restriper's target profile and return */
6835 if (flags & BTRFS_BLOCK_GROUP_DATA &&
6836 bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
6837 tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
6838 } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
6839 bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
6840 tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
6841 } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
6842 bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
6843 tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
6844 }
6845
6846 if (tgt) {
6847 /* extended -> chunk profile */
6848 tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
6849 return tgt;
6850 }
6851 }
6852
6795 /* 6853 /*
6796 * we add in the count of missing devices because we want 6854 * we add in the count of missing devices because we want
6797 * to make sure that any RAID levels on a degraded FS 6855 * to make sure that any RAID levels on a degraded FS
@@ -7466,6 +7524,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7466 return 0; 7524 return 0;
7467} 7525}
7468 7526
7527static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
7528{
7529 u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
7530
7531 /* chunk -> extended profile */
7532 if (extra_flags == 0)
7533 extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
7534
7535 if (flags & BTRFS_BLOCK_GROUP_DATA)
7536 fs_info->avail_data_alloc_bits &= ~extra_flags;
7537 if (flags & BTRFS_BLOCK_GROUP_METADATA)
7538 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
7539 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
7540 fs_info->avail_system_alloc_bits &= ~extra_flags;
7541}
7542
7469int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7543int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7470 struct btrfs_root *root, u64 group_start) 7544 struct btrfs_root *root, u64 group_start)
7471{ 7545{
@@ -7476,6 +7550,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7476 struct btrfs_key key; 7550 struct btrfs_key key;
7477 struct inode *inode; 7551 struct inode *inode;
7478 int ret; 7552 int ret;
7553 int index;
7479 int factor; 7554 int factor;
7480 7555
7481 root = root->fs_info->extent_root; 7556 root = root->fs_info->extent_root;
@@ -7491,6 +7566,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7491 free_excluded_extents(root, block_group); 7566 free_excluded_extents(root, block_group);
7492 7567
7493 memcpy(&key, &block_group->key, sizeof(key)); 7568 memcpy(&key, &block_group->key, sizeof(key));
7569 index = get_block_group_index(block_group);
7494 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 7570 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
7495 BTRFS_BLOCK_GROUP_RAID1 | 7571 BTRFS_BLOCK_GROUP_RAID1 |
7496 BTRFS_BLOCK_GROUP_RAID10)) 7572 BTRFS_BLOCK_GROUP_RAID10))
@@ -7565,6 +7641,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7565 * are still on the list after taking the semaphore 7641 * are still on the list after taking the semaphore
7566 */ 7642 */
7567 list_del_init(&block_group->list); 7643 list_del_init(&block_group->list);
7644 if (list_empty(&block_group->space_info->block_groups[index]))
7645 clear_avail_alloc_bits(root->fs_info, block_group->flags);
7568 up_write(&block_group->space_info->groups_sem); 7646 up_write(&block_group->space_info->groups_sem);
7569 7647
7570 if (block_group->cached == BTRFS_CACHE_STARTED) 7648 if (block_group->cached == BTRFS_CACHE_STARTED)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c04f02c7d5bb..1e7a9bac31ab 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1203,13 +1203,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1203 if (!capable(CAP_SYS_ADMIN)) 1203 if (!capable(CAP_SYS_ADMIN))
1204 return -EPERM; 1204 return -EPERM;
1205 1205
1206 mutex_lock(&root->fs_info->volume_mutex);
1207 if (root->fs_info->balance_ctl) {
1208 printk(KERN_INFO "btrfs: balance in progress\n");
1209 ret = -EINVAL;
1210 goto out;
1211 }
1212
1206 vol_args = memdup_user(arg, sizeof(*vol_args)); 1213 vol_args = memdup_user(arg, sizeof(*vol_args));
1207 if (IS_ERR(vol_args)) 1214 if (IS_ERR(vol_args)) {
1208 return PTR_ERR(vol_args); 1215 ret = PTR_ERR(vol_args);
1216 goto out;
1217 }
1209 1218
1210 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1219 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1211 1220
1212 mutex_lock(&root->fs_info->volume_mutex);
1213 sizestr = vol_args->name; 1221 sizestr = vol_args->name;
1214 devstr = strchr(sizestr, ':'); 1222 devstr = strchr(sizestr, ':');
1215 if (devstr) { 1223 if (devstr) {
@@ -1226,7 +1234,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1226 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1234 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1227 (unsigned long long)devid); 1235 (unsigned long long)devid);
1228 ret = -EINVAL; 1236 ret = -EINVAL;
1229 goto out_unlock; 1237 goto out_free;
1230 } 1238 }
1231 if (!strcmp(sizestr, "max")) 1239 if (!strcmp(sizestr, "max"))
1232 new_size = device->bdev->bd_inode->i_size; 1240 new_size = device->bdev->bd_inode->i_size;
@@ -1241,7 +1249,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1241 new_size = memparse(sizestr, NULL); 1249 new_size = memparse(sizestr, NULL);
1242 if (new_size == 0) { 1250 if (new_size == 0) {
1243 ret = -EINVAL; 1251 ret = -EINVAL;
1244 goto out_unlock; 1252 goto out_free;
1245 } 1253 }
1246 } 1254 }
1247 1255
@@ -1250,7 +1258,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1250 if (mod < 0) { 1258 if (mod < 0) {
1251 if (new_size > old_size) { 1259 if (new_size > old_size) {
1252 ret = -EINVAL; 1260 ret = -EINVAL;
1253 goto out_unlock; 1261 goto out_free;
1254 } 1262 }
1255 new_size = old_size - new_size; 1263 new_size = old_size - new_size;
1256 } else if (mod > 0) { 1264 } else if (mod > 0) {
@@ -1259,11 +1267,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1259 1267
1260 if (new_size < 256 * 1024 * 1024) { 1268 if (new_size < 256 * 1024 * 1024) {
1261 ret = -EINVAL; 1269 ret = -EINVAL;
1262 goto out_unlock; 1270 goto out_free;
1263 } 1271 }
1264 if (new_size > device->bdev->bd_inode->i_size) { 1272 if (new_size > device->bdev->bd_inode->i_size) {
1265 ret = -EFBIG; 1273 ret = -EFBIG;
1266 goto out_unlock; 1274 goto out_free;
1267 } 1275 }
1268 1276
1269 do_div(new_size, root->sectorsize); 1277 do_div(new_size, root->sectorsize);
@@ -1276,7 +1284,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1276 trans = btrfs_start_transaction(root, 0); 1284 trans = btrfs_start_transaction(root, 0);
1277 if (IS_ERR(trans)) { 1285 if (IS_ERR(trans)) {
1278 ret = PTR_ERR(trans); 1286 ret = PTR_ERR(trans);
1279 goto out_unlock; 1287 goto out_free;
1280 } 1288 }
1281 ret = btrfs_grow_device(trans, device, new_size); 1289 ret = btrfs_grow_device(trans, device, new_size);
1282 btrfs_commit_transaction(trans, root); 1290 btrfs_commit_transaction(trans, root);
@@ -1284,9 +1292,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1284 ret = btrfs_shrink_device(device, new_size); 1292 ret = btrfs_shrink_device(device, new_size);
1285 } 1293 }
1286 1294
1287out_unlock: 1295out_free:
1288 mutex_unlock(&root->fs_info->volume_mutex);
1289 kfree(vol_args); 1296 kfree(vol_args);
1297out:
1298 mutex_unlock(&root->fs_info->volume_mutex);
1290 return ret; 1299 return ret;
1291} 1300}
1292 1301
@@ -2052,14 +2061,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2052 if (!capable(CAP_SYS_ADMIN)) 2061 if (!capable(CAP_SYS_ADMIN))
2053 return -EPERM; 2062 return -EPERM;
2054 2063
2064 mutex_lock(&root->fs_info->volume_mutex);
2065 if (root->fs_info->balance_ctl) {
2066 printk(KERN_INFO "btrfs: balance in progress\n");
2067 ret = -EINVAL;
2068 goto out;
2069 }
2070
2055 vol_args = memdup_user(arg, sizeof(*vol_args)); 2071 vol_args = memdup_user(arg, sizeof(*vol_args));
2056 if (IS_ERR(vol_args)) 2072 if (IS_ERR(vol_args)) {
2057 return PTR_ERR(vol_args); 2073 ret = PTR_ERR(vol_args);
2074 goto out;
2075 }
2058 2076
2059 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2077 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2060 ret = btrfs_init_new_device(root, vol_args->name); 2078 ret = btrfs_init_new_device(root, vol_args->name);
2061 2079
2062 kfree(vol_args); 2080 kfree(vol_args);
2081out:
2082 mutex_unlock(&root->fs_info->volume_mutex);
2063 return ret; 2083 return ret;
2064} 2084}
2065 2085
@@ -2074,14 +2094,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2074 if (root->fs_info->sb->s_flags & MS_RDONLY) 2094 if (root->fs_info->sb->s_flags & MS_RDONLY)
2075 return -EROFS; 2095 return -EROFS;
2076 2096
2097 mutex_lock(&root->fs_info->volume_mutex);
2098 if (root->fs_info->balance_ctl) {
2099 printk(KERN_INFO "btrfs: balance in progress\n");
2100 ret = -EINVAL;
2101 goto out;
2102 }
2103
2077 vol_args = memdup_user(arg, sizeof(*vol_args)); 2104 vol_args = memdup_user(arg, sizeof(*vol_args));
2078 if (IS_ERR(vol_args)) 2105 if (IS_ERR(vol_args)) {
2079 return PTR_ERR(vol_args); 2106 ret = PTR_ERR(vol_args);
2107 goto out;
2108 }
2080 2109
2081 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2110 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2082 ret = btrfs_rm_device(root, vol_args->name); 2111 ret = btrfs_rm_device(root, vol_args->name);
2083 2112
2084 kfree(vol_args); 2113 kfree(vol_args);
2114out:
2115 mutex_unlock(&root->fs_info->volume_mutex);
2085 return ret; 2116 return ret;
2086} 2117}
2087 2118
@@ -3034,6 +3065,163 @@ out:
3034 return ret; 3065 return ret;
3035} 3066}
3036 3067
3068void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3069 struct btrfs_ioctl_balance_args *bargs)
3070{
3071 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3072
3073 bargs->flags = bctl->flags;
3074
3075 if (atomic_read(&fs_info->balance_running))
3076 bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
3077 if (atomic_read(&fs_info->balance_pause_req))
3078 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
3079 if (atomic_read(&fs_info->balance_cancel_req))
3080 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
3081
3082 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
3083 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
3084 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3085
3086 if (lock) {
3087 spin_lock(&fs_info->balance_lock);
3088 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3089 spin_unlock(&fs_info->balance_lock);
3090 } else {
3091 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3092 }
3093}
3094
3095static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
3096{
3097 struct btrfs_fs_info *fs_info = root->fs_info;
3098 struct btrfs_ioctl_balance_args *bargs;
3099 struct btrfs_balance_control *bctl;
3100 int ret;
3101
3102 if (!capable(CAP_SYS_ADMIN))
3103 return -EPERM;
3104
3105 if (fs_info->sb->s_flags & MS_RDONLY)
3106 return -EROFS;
3107
3108 mutex_lock(&fs_info->volume_mutex);
3109 mutex_lock(&fs_info->balance_mutex);
3110
3111 if (arg) {
3112 bargs = memdup_user(arg, sizeof(*bargs));
3113 if (IS_ERR(bargs)) {
3114 ret = PTR_ERR(bargs);
3115 goto out;
3116 }
3117
3118 if (bargs->flags & BTRFS_BALANCE_RESUME) {
3119 if (!fs_info->balance_ctl) {
3120 ret = -ENOTCONN;
3121 goto out_bargs;
3122 }
3123
3124 bctl = fs_info->balance_ctl;
3125 spin_lock(&fs_info->balance_lock);
3126 bctl->flags |= BTRFS_BALANCE_RESUME;
3127 spin_unlock(&fs_info->balance_lock);
3128
3129 goto do_balance;
3130 }
3131 } else {
3132 bargs = NULL;
3133 }
3134
3135 if (fs_info->balance_ctl) {
3136 ret = -EINPROGRESS;
3137 goto out_bargs;
3138 }
3139
3140 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3141 if (!bctl) {
3142 ret = -ENOMEM;
3143 goto out_bargs;
3144 }
3145
3146 bctl->fs_info = fs_info;
3147 if (arg) {
3148 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
3149 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
3150 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
3151
3152 bctl->flags = bargs->flags;
3153 } else {
3154 /* balance everything - no filters */
3155 bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
3156 }
3157
3158do_balance:
3159 ret = btrfs_balance(bctl, bargs);
3160 /*
3161 * bctl is freed in __cancel_balance or in free_fs_info if
3162 * restriper was paused all the way until unmount
3163 */
3164 if (arg) {
3165 if (copy_to_user(arg, bargs, sizeof(*bargs)))
3166 ret = -EFAULT;
3167 }
3168
3169out_bargs:
3170 kfree(bargs);
3171out:
3172 mutex_unlock(&fs_info->balance_mutex);
3173 mutex_unlock(&fs_info->volume_mutex);
3174 return ret;
3175}
3176
3177static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
3178{
3179 if (!capable(CAP_SYS_ADMIN))
3180 return -EPERM;
3181
3182 switch (cmd) {
3183 case BTRFS_BALANCE_CTL_PAUSE:
3184 return btrfs_pause_balance(root->fs_info);
3185 case BTRFS_BALANCE_CTL_CANCEL:
3186 return btrfs_cancel_balance(root->fs_info);
3187 }
3188
3189 return -EINVAL;
3190}
3191
3192static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
3193 void __user *arg)
3194{
3195 struct btrfs_fs_info *fs_info = root->fs_info;
3196 struct btrfs_ioctl_balance_args *bargs;
3197 int ret = 0;
3198
3199 if (!capable(CAP_SYS_ADMIN))
3200 return -EPERM;
3201
3202 mutex_lock(&fs_info->balance_mutex);
3203 if (!fs_info->balance_ctl) {
3204 ret = -ENOTCONN;
3205 goto out;
3206 }
3207
3208 bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
3209 if (!bargs) {
3210 ret = -ENOMEM;
3211 goto out;
3212 }
3213
3214 update_ioctl_balance_args(fs_info, 1, bargs);
3215
3216 if (copy_to_user(arg, bargs, sizeof(*bargs)))
3217 ret = -EFAULT;
3218
3219 kfree(bargs);
3220out:
3221 mutex_unlock(&fs_info->balance_mutex);
3222 return ret;
3223}
3224
3037long btrfs_ioctl(struct file *file, unsigned int 3225long btrfs_ioctl(struct file *file, unsigned int
3038 cmd, unsigned long arg) 3226 cmd, unsigned long arg)
3039{ 3227{
@@ -3078,7 +3266,7 @@ long btrfs_ioctl(struct file *file, unsigned int
3078 case BTRFS_IOC_DEV_INFO: 3266 case BTRFS_IOC_DEV_INFO:
3079 return btrfs_ioctl_dev_info(root, argp); 3267 return btrfs_ioctl_dev_info(root, argp);
3080 case BTRFS_IOC_BALANCE: 3268 case BTRFS_IOC_BALANCE:
3081 return btrfs_balance(root->fs_info->dev_root); 3269 return btrfs_ioctl_balance(root, NULL);
3082 case BTRFS_IOC_CLONE: 3270 case BTRFS_IOC_CLONE:
3083 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 3271 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
3084 case BTRFS_IOC_CLONE_RANGE: 3272 case BTRFS_IOC_CLONE_RANGE:
@@ -3110,6 +3298,12 @@ long btrfs_ioctl(struct file *file, unsigned int
3110 return btrfs_ioctl_scrub_cancel(root, argp); 3298 return btrfs_ioctl_scrub_cancel(root, argp);
3111 case BTRFS_IOC_SCRUB_PROGRESS: 3299 case BTRFS_IOC_SCRUB_PROGRESS:
3112 return btrfs_ioctl_scrub_progress(root, argp); 3300 return btrfs_ioctl_scrub_progress(root, argp);
3301 case BTRFS_IOC_BALANCE_V2:
3302 return btrfs_ioctl_balance(root, argp);
3303 case BTRFS_IOC_BALANCE_CTL:
3304 return btrfs_ioctl_balance_ctl(root, arg);
3305 case BTRFS_IOC_BALANCE_PROGRESS:
3306 return btrfs_ioctl_balance_progress(root, argp);
3113 } 3307 }
3114 3308
3115 return -ENOTTY; 3309 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 252ae9915de8..4f69028a68c4 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args {
109 __u64 reserved[124]; /* pad to 1k */ 109 __u64 reserved[124]; /* pad to 1k */
110}; 110};
111 111
112/* balance control ioctl modes */
113#define BTRFS_BALANCE_CTL_PAUSE 1
114#define BTRFS_BALANCE_CTL_CANCEL 2
115
116/*
117 * this is packed, because it should be exactly the same as its disk
118 * byte order counterpart (struct btrfs_disk_balance_args)
119 */
120struct btrfs_balance_args {
121 __u64 profiles;
122 __u64 usage;
123 __u64 devid;
124 __u64 pstart;
125 __u64 pend;
126 __u64 vstart;
127 __u64 vend;
128
129 __u64 target;
130
131 __u64 flags;
132
133 __u64 unused[8];
134} __attribute__ ((__packed__));
135
136/* report balance progress to userspace */
137struct btrfs_balance_progress {
138 __u64 expected; /* estimated # of chunks that will be
139 * relocated to fulfill the request */
140 __u64 considered; /* # of chunks we have considered so far */
141 __u64 completed; /* # of chunks relocated so far */
142};
143
144#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
145#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
146#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
147
148struct btrfs_ioctl_balance_args {
149 __u64 flags; /* in/out */
150 __u64 state; /* out */
151
152 struct btrfs_balance_args data; /* in/out */
153 struct btrfs_balance_args meta; /* in/out */
154 struct btrfs_balance_args sys; /* in/out */
155
156 struct btrfs_balance_progress stat; /* out */
157
158 __u64 unused[72]; /* pad to 1k */
159};
160
112#define BTRFS_INO_LOOKUP_PATH_MAX 4080 161#define BTRFS_INO_LOOKUP_PATH_MAX 4080
113struct btrfs_ioctl_ino_lookup_args { 162struct btrfs_ioctl_ino_lookup_args {
114 __u64 treeid; 163 __u64 treeid;
@@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args {
272 struct btrfs_ioctl_dev_info_args) 321 struct btrfs_ioctl_dev_info_args)
273#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ 322#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
274 struct btrfs_ioctl_fs_info_args) 323 struct btrfs_ioctl_fs_info_args)
324#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
325 struct btrfs_ioctl_balance_args)
326#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
327#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
328 struct btrfs_ioctl_balance_args)
275#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ 329#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
276 struct btrfs_ioctl_ino_path_args) 330 struct btrfs_ioctl_ino_path_args)
277#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ 331#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 200f63bc6675..5a7227fa9380 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -164,8 +164,9 @@ enum {
164 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 164 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
165 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 165 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
166 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 166 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
167 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, 167 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
168 Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err, 168 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
169 Opt_err,
169}; 170};
170 171
171static match_table_t tokens = { 172static match_table_t tokens = {
@@ -200,6 +201,7 @@ static match_table_t tokens = {
200 {Opt_inode_cache, "inode_cache"}, 201 {Opt_inode_cache, "inode_cache"},
201 {Opt_no_space_cache, "nospace_cache"}, 202 {Opt_no_space_cache, "nospace_cache"},
202 {Opt_recovery, "recovery"}, 203 {Opt_recovery, "recovery"},
204 {Opt_skip_balance, "skip_balance"},
203 {Opt_err, NULL}, 205 {Opt_err, NULL},
204}; 206};
205 207
@@ -398,6 +400,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
398 printk(KERN_INFO "btrfs: enabling auto recovery"); 400 printk(KERN_INFO "btrfs: enabling auto recovery");
399 btrfs_set_opt(info->mount_opt, RECOVERY); 401 btrfs_set_opt(info->mount_opt, RECOVERY);
400 break; 402 break;
403 case Opt_skip_balance:
404 btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
405 break;
401 case Opt_err: 406 case Opt_err:
402 printk(KERN_INFO "btrfs: unrecognized mount option " 407 printk(KERN_INFO "btrfs: unrecognized mount option "
403 "'%s'\n", p); 408 "'%s'\n", p);
@@ -723,6 +728,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
723 seq_puts(seq, ",autodefrag"); 728 seq_puts(seq, ",autodefrag");
724 if (btrfs_test_opt(root, INODE_MAP_CACHE)) 729 if (btrfs_test_opt(root, INODE_MAP_CACHE))
725 seq_puts(seq, ",inode_cache"); 730 seq_puts(seq, ",inode_cache");
731 if (btrfs_test_opt(root, SKIP_BALANCE))
732 seq_puts(seq, ",skip_balance");
726 return 0; 733 return 0;
727} 734}
728 735
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ac00e3aa80a1..9489a2aca47b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -23,6 +23,7 @@
23#include <linux/random.h> 23#include <linux/random.h>
24#include <linux/iocontext.h> 24#include <linux/iocontext.h>
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/kthread.h>
26#include <asm/div64.h> 27#include <asm/div64.h>
27#include "compat.h" 28#include "compat.h"
28#include "ctree.h" 29#include "ctree.h"
@@ -1282,7 +1283,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1282 bool clear_super = false; 1283 bool clear_super = false;
1283 1284
1284 mutex_lock(&uuid_mutex); 1285 mutex_lock(&uuid_mutex);
1285 mutex_lock(&root->fs_info->volume_mutex);
1286 1286
1287 all_avail = root->fs_info->avail_data_alloc_bits | 1287 all_avail = root->fs_info->avail_data_alloc_bits |
1288 root->fs_info->avail_system_alloc_bits | 1288 root->fs_info->avail_system_alloc_bits |
@@ -1452,7 +1452,6 @@ error_close:
1452 if (bdev) 1452 if (bdev)
1453 blkdev_put(bdev, FMODE_READ | FMODE_EXCL); 1453 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
1454out: 1454out:
1455 mutex_unlock(&root->fs_info->volume_mutex);
1456 mutex_unlock(&uuid_mutex); 1455 mutex_unlock(&uuid_mutex);
1457 return ret; 1456 return ret;
1458error_undo: 1457error_undo:
@@ -1629,7 +1628,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1629 } 1628 }
1630 1629
1631 filemap_write_and_wait(bdev->bd_inode->i_mapping); 1630 filemap_write_and_wait(bdev->bd_inode->i_mapping);
1632 mutex_lock(&root->fs_info->volume_mutex);
1633 1631
1634 devices = &root->fs_info->fs_devices->devices; 1632 devices = &root->fs_info->fs_devices->devices;
1635 /* 1633 /*
@@ -1757,8 +1755,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1757 ret = btrfs_relocate_sys_chunks(root); 1755 ret = btrfs_relocate_sys_chunks(root);
1758 BUG_ON(ret); 1756 BUG_ON(ret);
1759 } 1757 }
1760out: 1758
1761 mutex_unlock(&root->fs_info->volume_mutex);
1762 return ret; 1759 return ret;
1763error: 1760error:
1764 blkdev_put(bdev, FMODE_EXCL); 1761 blkdev_put(bdev, FMODE_EXCL);
@@ -1766,7 +1763,7 @@ error:
1766 mutex_unlock(&uuid_mutex); 1763 mutex_unlock(&uuid_mutex);
1767 up_write(&sb->s_umount); 1764 up_write(&sb->s_umount);
1768 } 1765 }
1769 goto out; 1766 return ret;
1770} 1767}
1771 1768
1772static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, 1769static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
@@ -2077,6 +2074,362 @@ error:
2077 return ret; 2074 return ret;
2078} 2075}
2079 2076
2077static int insert_balance_item(struct btrfs_root *root,
2078 struct btrfs_balance_control *bctl)
2079{
2080 struct btrfs_trans_handle *trans;
2081 struct btrfs_balance_item *item;
2082 struct btrfs_disk_balance_args disk_bargs;
2083 struct btrfs_path *path;
2084 struct extent_buffer *leaf;
2085 struct btrfs_key key;
2086 int ret, err;
2087
2088 path = btrfs_alloc_path();
2089 if (!path)
2090 return -ENOMEM;
2091
2092 trans = btrfs_start_transaction(root, 0);
2093 if (IS_ERR(trans)) {
2094 btrfs_free_path(path);
2095 return PTR_ERR(trans);
2096 }
2097
2098 key.objectid = BTRFS_BALANCE_OBJECTID;
2099 key.type = BTRFS_BALANCE_ITEM_KEY;
2100 key.offset = 0;
2101
2102 ret = btrfs_insert_empty_item(trans, root, path, &key,
2103 sizeof(*item));
2104 if (ret)
2105 goto out;
2106
2107 leaf = path->nodes[0];
2108 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2109
2110 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
2111
2112 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
2113 btrfs_set_balance_data(leaf, item, &disk_bargs);
2114 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
2115 btrfs_set_balance_meta(leaf, item, &disk_bargs);
2116 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
2117 btrfs_set_balance_sys(leaf, item, &disk_bargs);
2118
2119 btrfs_set_balance_flags(leaf, item, bctl->flags);
2120
2121 btrfs_mark_buffer_dirty(leaf);
2122out:
2123 btrfs_free_path(path);
2124 err = btrfs_commit_transaction(trans, root);
2125 if (err && !ret)
2126 ret = err;
2127 return ret;
2128}
2129
2130static int del_balance_item(struct btrfs_root *root)
2131{
2132 struct btrfs_trans_handle *trans;
2133 struct btrfs_path *path;
2134 struct btrfs_key key;
2135 int ret, err;
2136
2137 path = btrfs_alloc_path();
2138 if (!path)
2139 return -ENOMEM;
2140
2141 trans = btrfs_start_transaction(root, 0);
2142 if (IS_ERR(trans)) {
2143 btrfs_free_path(path);
2144 return PTR_ERR(trans);
2145 }
2146
2147 key.objectid = BTRFS_BALANCE_OBJECTID;
2148 key.type = BTRFS_BALANCE_ITEM_KEY;
2149 key.offset = 0;
2150
2151 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2152 if (ret < 0)
2153 goto out;
2154 if (ret > 0) {
2155 ret = -ENOENT;
2156 goto out;
2157 }
2158
2159 ret = btrfs_del_item(trans, root, path);
2160out:
2161 btrfs_free_path(path);
2162 err = btrfs_commit_transaction(trans, root);
2163 if (err && !ret)
2164 ret = err;
2165 return ret;
2166}
2167
2168/*
2169 * This is a heuristic used to reduce the number of chunks balanced on
2170 * resume after balance was interrupted.
2171 */
2172static void update_balance_args(struct btrfs_balance_control *bctl)
2173{
2174 /*
2175 * Turn on soft mode for chunk types that were being converted.
2176 */
2177 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
2178 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
2179 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
2180 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
2181 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
2182 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
2183
2184 /*
2185 * Turn on usage filter if is not already used. The idea is
2186 * that chunks that we have already balanced should be
2187 * reasonably full. Don't do it for chunks that are being
2188 * converted - that will keep us from relocating unconverted
2189 * (albeit full) chunks.
2190 */
2191 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
2192 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
2193 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
2194 bctl->data.usage = 90;
2195 }
2196 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
2197 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
2198 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
2199 bctl->sys.usage = 90;
2200 }
2201 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
2202 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
2203 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
2204 bctl->meta.usage = 90;
2205 }
2206}
2207
2208/*
2209 * Should be called with both balance and volume mutexes held to
2210 * serialize other volume operations (add_dev/rm_dev/resize) with
2211 * restriper. Same goes for unset_balance_control.
2212 */
2213static void set_balance_control(struct btrfs_balance_control *bctl)
2214{
2215 struct btrfs_fs_info *fs_info = bctl->fs_info;
2216
2217 BUG_ON(fs_info->balance_ctl);
2218
2219 spin_lock(&fs_info->balance_lock);
2220 fs_info->balance_ctl = bctl;
2221 spin_unlock(&fs_info->balance_lock);
2222}
2223
2224static void unset_balance_control(struct btrfs_fs_info *fs_info)
2225{
2226 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
2227
2228 BUG_ON(!fs_info->balance_ctl);
2229
2230 spin_lock(&fs_info->balance_lock);
2231 fs_info->balance_ctl = NULL;
2232 spin_unlock(&fs_info->balance_lock);
2233
2234 kfree(bctl);
2235}
2236
2237/*
2238 * Balance filters. Return 1 if chunk should be filtered out
2239 * (should not be balanced).
2240 */
2241static int chunk_profiles_filter(u64 chunk_profile,
2242 struct btrfs_balance_args *bargs)
2243{
2244 chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
2245
2246 if (chunk_profile == 0)
2247 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2248
2249 if (bargs->profiles & chunk_profile)
2250 return 0;
2251
2252 return 1;
2253}
2254
2255static u64 div_factor_fine(u64 num, int factor)
2256{
2257 if (factor <= 0)
2258 return 0;
2259 if (factor >= 100)
2260 return num;
2261
2262 num *= factor;
2263 do_div(num, 100);
2264 return num;
2265}
2266
2267static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2268 struct btrfs_balance_args *bargs)
2269{
2270 struct btrfs_block_group_cache *cache;
2271 u64 chunk_used, user_thresh;
2272 int ret = 1;
2273
2274 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2275 chunk_used = btrfs_block_group_used(&cache->item);
2276
2277 user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
2278 if (chunk_used < user_thresh)
2279 ret = 0;
2280
2281 btrfs_put_block_group(cache);
2282 return ret;
2283}
2284
2285static int chunk_devid_filter(struct extent_buffer *leaf,
2286 struct btrfs_chunk *chunk,
2287 struct btrfs_balance_args *bargs)
2288{
2289 struct btrfs_stripe *stripe;
2290 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2291 int i;
2292
2293 for (i = 0; i < num_stripes; i++) {
2294 stripe = btrfs_stripe_nr(chunk, i);
2295 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
2296 return 0;
2297 }
2298
2299 return 1;
2300}
2301
2302/* [pstart, pend) */
2303static int chunk_drange_filter(struct extent_buffer *leaf,
2304 struct btrfs_chunk *chunk,
2305 u64 chunk_offset,
2306 struct btrfs_balance_args *bargs)
2307{
2308 struct btrfs_stripe *stripe;
2309 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2310 u64 stripe_offset;
2311 u64 stripe_length;
2312 int factor;
2313 int i;
2314
2315 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
2316 return 0;
2317
2318 if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
2319 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))
2320 factor = 2;
2321 else
2322 factor = 1;
2323 factor = num_stripes / factor;
2324
2325 for (i = 0; i < num_stripes; i++) {
2326 stripe = btrfs_stripe_nr(chunk, i);
2327 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
2328 continue;
2329
2330 stripe_offset = btrfs_stripe_offset(leaf, stripe);
2331 stripe_length = btrfs_chunk_length(leaf, chunk);
2332 do_div(stripe_length, factor);
2333
2334 if (stripe_offset < bargs->pend &&
2335 stripe_offset + stripe_length > bargs->pstart)
2336 return 0;
2337 }
2338
2339 return 1;
2340}
2341
2342/* [vstart, vend) */
2343static int chunk_vrange_filter(struct extent_buffer *leaf,
2344 struct btrfs_chunk *chunk,
2345 u64 chunk_offset,
2346 struct btrfs_balance_args *bargs)
2347{
2348 if (chunk_offset < bargs->vend &&
2349 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
2350 /* at least part of the chunk is inside this vrange */
2351 return 0;
2352
2353 return 1;
2354}
2355
2356static int chunk_soft_convert_filter(u64 chunk_profile,
2357 struct btrfs_balance_args *bargs)
2358{
2359 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
2360 return 0;
2361
2362 chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK;
2363
2364 if (chunk_profile == 0)
2365 chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2366
2367 if (bargs->target & chunk_profile)
2368 return 1;
2369
2370 return 0;
2371}
2372
2373static int should_balance_chunk(struct btrfs_root *root,
2374 struct extent_buffer *leaf,
2375 struct btrfs_chunk *chunk, u64 chunk_offset)
2376{
2377 struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
2378 struct btrfs_balance_args *bargs = NULL;
2379 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
2380
2381 /* type filter */
2382 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
2383 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
2384 return 0;
2385 }
2386
2387 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
2388 bargs = &bctl->data;
2389 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
2390 bargs = &bctl->sys;
2391 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
2392 bargs = &bctl->meta;
2393
2394 /* profiles filter */
2395 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
2396 chunk_profiles_filter(chunk_type, bargs)) {
2397 return 0;
2398 }
2399
2400 /* usage filter */
2401 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
2402 chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
2403 return 0;
2404 }
2405
2406 /* devid filter */
2407 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
2408 chunk_devid_filter(leaf, chunk, bargs)) {
2409 return 0;
2410 }
2411
2412 /* drange filter, makes sense only with devid filter */
2413 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
2414 chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) {
2415 return 0;
2416 }
2417
2418 /* vrange filter */
2419 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
2420 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
2421 return 0;
2422 }
2423
2424 /* soft profile changing mode */
2425 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
2426 chunk_soft_convert_filter(chunk_type, bargs)) {
2427 return 0;
2428 }
2429
2430 return 1;
2431}
2432
2080static u64 div_factor(u64 num, int factor) 2433static u64 div_factor(u64 num, int factor)
2081{ 2434{
2082 if (factor == 10) 2435 if (factor == 10)
@@ -2086,29 +2439,28 @@ static u64 div_factor(u64 num, int factor)
2086 return num; 2439 return num;
2087} 2440}
2088 2441
2089int btrfs_balance(struct btrfs_root *dev_root) 2442static int __btrfs_balance(struct btrfs_fs_info *fs_info)
2090{ 2443{
2091 int ret; 2444 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
2092 struct list_head *devices = &dev_root->fs_info->fs_devices->devices; 2445 struct btrfs_root *chunk_root = fs_info->chunk_root;
2446 struct btrfs_root *dev_root = fs_info->dev_root;
2447 struct list_head *devices;
2093 struct btrfs_device *device; 2448 struct btrfs_device *device;
2094 u64 old_size; 2449 u64 old_size;
2095 u64 size_to_free; 2450 u64 size_to_free;
2451 struct btrfs_chunk *chunk;
2096 struct btrfs_path *path; 2452 struct btrfs_path *path;
2097 struct btrfs_key key; 2453 struct btrfs_key key;
2098 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
2099 struct btrfs_trans_handle *trans;
2100 struct btrfs_key found_key; 2454 struct btrfs_key found_key;
2101 2455 struct btrfs_trans_handle *trans;
2102 if (dev_root->fs_info->sb->s_flags & MS_RDONLY) 2456 struct extent_buffer *leaf;
2103 return -EROFS; 2457 int slot;
2104 2458 int ret;
2105 if (!capable(CAP_SYS_ADMIN)) 2459 int enospc_errors = 0;
2106 return -EPERM; 2460 bool counting = true;
2107
2108 mutex_lock(&dev_root->fs_info->volume_mutex);
2109 dev_root = dev_root->fs_info->dev_root;
2110 2461
2111 /* step one make some room on all the devices */ 2462 /* step one make some room on all the devices */
2463 devices = &fs_info->fs_devices->devices;
2112 list_for_each_entry(device, devices, dev_list) { 2464 list_for_each_entry(device, devices, dev_list) {
2113 old_size = device->total_bytes; 2465 old_size = device->total_bytes;
2114 size_to_free = div_factor(old_size, 1); 2466 size_to_free = div_factor(old_size, 1);
@@ -2137,11 +2489,23 @@ int btrfs_balance(struct btrfs_root *dev_root)
2137 ret = -ENOMEM; 2489 ret = -ENOMEM;
2138 goto error; 2490 goto error;
2139 } 2491 }
2492
2493 /* zero out stat counters */
2494 spin_lock(&fs_info->balance_lock);
2495 memset(&bctl->stat, 0, sizeof(bctl->stat));
2496 spin_unlock(&fs_info->balance_lock);
2497again:
2140 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2498 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2141 key.offset = (u64)-1; 2499 key.offset = (u64)-1;
2142 key.type = BTRFS_CHUNK_ITEM_KEY; 2500 key.type = BTRFS_CHUNK_ITEM_KEY;
2143 2501
2144 while (1) { 2502 while (1) {
2503 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
2504 atomic_read(&fs_info->balance_cancel_req)) {
2505 ret = -ECANCELED;
2506 goto error;
2507 }
2508
2145 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 2509 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2146 if (ret < 0) 2510 if (ret < 0)
2147 goto error; 2511 goto error;
@@ -2151,15 +2515,19 @@ int btrfs_balance(struct btrfs_root *dev_root)
2151 * failed 2515 * failed
2152 */ 2516 */
2153 if (ret == 0) 2517 if (ret == 0)
2154 break; 2518 BUG(); /* FIXME break ? */
2155 2519
2156 ret = btrfs_previous_item(chunk_root, path, 0, 2520 ret = btrfs_previous_item(chunk_root, path, 0,
2157 BTRFS_CHUNK_ITEM_KEY); 2521 BTRFS_CHUNK_ITEM_KEY);
2158 if (ret) 2522 if (ret) {
2523 ret = 0;
2159 break; 2524 break;
2525 }
2526
2527 leaf = path->nodes[0];
2528 slot = path->slots[0];
2529 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2160 2530
2161 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2162 path->slots[0]);
2163 if (found_key.objectid != key.objectid) 2531 if (found_key.objectid != key.objectid)
2164 break; 2532 break;
2165 2533
@@ -2167,22 +2535,375 @@ int btrfs_balance(struct btrfs_root *dev_root)
2167 if (found_key.offset == 0) 2535 if (found_key.offset == 0)
2168 break; 2536 break;
2169 2537
2538 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2539
2540 if (!counting) {
2541 spin_lock(&fs_info->balance_lock);
2542 bctl->stat.considered++;
2543 spin_unlock(&fs_info->balance_lock);
2544 }
2545
2546 ret = should_balance_chunk(chunk_root, leaf, chunk,
2547 found_key.offset);
2170 btrfs_release_path(path); 2548 btrfs_release_path(path);
2549 if (!ret)
2550 goto loop;
2551
2552 if (counting) {
2553 spin_lock(&fs_info->balance_lock);
2554 bctl->stat.expected++;
2555 spin_unlock(&fs_info->balance_lock);
2556 goto loop;
2557 }
2558
2171 ret = btrfs_relocate_chunk(chunk_root, 2559 ret = btrfs_relocate_chunk(chunk_root,
2172 chunk_root->root_key.objectid, 2560 chunk_root->root_key.objectid,
2173 found_key.objectid, 2561 found_key.objectid,
2174 found_key.offset); 2562 found_key.offset);
2175 if (ret && ret != -ENOSPC) 2563 if (ret && ret != -ENOSPC)
2176 goto error; 2564 goto error;
2565 if (ret == -ENOSPC) {
2566 enospc_errors++;
2567 } else {
2568 spin_lock(&fs_info->balance_lock);
2569 bctl->stat.completed++;
2570 spin_unlock(&fs_info->balance_lock);
2571 }
2572loop:
2177 key.offset = found_key.offset - 1; 2573 key.offset = found_key.offset - 1;
2178 } 2574 }
2179 ret = 0; 2575
2576 if (counting) {
2577 btrfs_release_path(path);
2578 counting = false;
2579 goto again;
2580 }
2180error: 2581error:
2181 btrfs_free_path(path); 2582 btrfs_free_path(path);
2182 mutex_unlock(&dev_root->fs_info->volume_mutex); 2583 if (enospc_errors) {
2584 printk(KERN_INFO "btrfs: %d enospc errors during balance\n",
2585 enospc_errors);
2586 if (!ret)
2587 ret = -ENOSPC;
2588 }
2589
2183 return ret; 2590 return ret;
2184} 2591}
2185 2592
2593static inline int balance_need_close(struct btrfs_fs_info *fs_info)
2594{
2595 /* cancel requested || normal exit path */
2596 return atomic_read(&fs_info->balance_cancel_req) ||
2597 (atomic_read(&fs_info->balance_pause_req) == 0 &&
2598 atomic_read(&fs_info->balance_cancel_req) == 0);
2599}
2600
2601static void __cancel_balance(struct btrfs_fs_info *fs_info)
2602{
2603 int ret;
2604
2605 unset_balance_control(fs_info);
2606 ret = del_balance_item(fs_info->tree_root);
2607 BUG_ON(ret);
2608}
2609
2610void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
2611 struct btrfs_ioctl_balance_args *bargs);
2612
2613/*
2614 * Should be called with both balance and volume mutexes held
2615 */
2616int btrfs_balance(struct btrfs_balance_control *bctl,
2617 struct btrfs_ioctl_balance_args *bargs)
2618{
2619 struct btrfs_fs_info *fs_info = bctl->fs_info;
2620 u64 allowed;
2621 int ret;
2622
2623 if (btrfs_fs_closing(fs_info) ||
2624 atomic_read(&fs_info->balance_pause_req) ||
2625 atomic_read(&fs_info->balance_cancel_req)) {
2626 ret = -EINVAL;
2627 goto out;
2628 }
2629
2630 /*
2631 * In case of mixed groups both data and meta should be picked,
2632 * and identical options should be given for both of them.
2633 */
2634 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
2635 if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2636 (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) {
2637 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
2638 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
2639 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
2640 printk(KERN_ERR "btrfs: with mixed groups data and "
2641 "metadata balance options must be the same\n");
2642 ret = -EINVAL;
2643 goto out;
2644 }
2645 }
2646
2647 /*
2648 * Profile changing sanity checks. Skip them if a simple
2649 * balance is requested.
2650 */
2651 if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) &
2652 BTRFS_BALANCE_ARGS_CONVERT))
2653 goto do_balance;
2654
2655 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2656 if (fs_info->fs_devices->num_devices == 1)
2657 allowed |= BTRFS_BLOCK_GROUP_DUP;
2658 else if (fs_info->fs_devices->num_devices < 4)
2659 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
2660 else
2661 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
2662 BTRFS_BLOCK_GROUP_RAID10);
2663
2664 if (!profile_is_valid(bctl->data.target, 1) ||
2665 bctl->data.target & ~allowed) {
2666 printk(KERN_ERR "btrfs: unable to start balance with target "
2667 "data profile %llu\n",
2668 (unsigned long long)bctl->data.target);
2669 ret = -EINVAL;
2670 goto out;
2671 }
2672 if (!profile_is_valid(bctl->meta.target, 1) ||
2673 bctl->meta.target & ~allowed) {
2674 printk(KERN_ERR "btrfs: unable to start balance with target "
2675 "metadata profile %llu\n",
2676 (unsigned long long)bctl->meta.target);
2677 ret = -EINVAL;
2678 goto out;
2679 }
2680 if (!profile_is_valid(bctl->sys.target, 1) ||
2681 bctl->sys.target & ~allowed) {
2682 printk(KERN_ERR "btrfs: unable to start balance with target "
2683 "system profile %llu\n",
2684 (unsigned long long)bctl->sys.target);
2685 ret = -EINVAL;
2686 goto out;
2687 }
2688
2689 if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) {
2690 printk(KERN_ERR "btrfs: dup for data is not allowed\n");
2691 ret = -EINVAL;
2692 goto out;
2693 }
2694
2695 /* allow to reduce meta or sys integrity only if force set */
2696 allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
2697 BTRFS_BLOCK_GROUP_RAID10;
2698 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2699 (fs_info->avail_system_alloc_bits & allowed) &&
2700 !(bctl->sys.target & allowed)) ||
2701 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
2702 (fs_info->avail_metadata_alloc_bits & allowed) &&
2703 !(bctl->meta.target & allowed))) {
2704 if (bctl->flags & BTRFS_BALANCE_FORCE) {
2705 printk(KERN_INFO "btrfs: force reducing metadata "
2706 "integrity\n");
2707 } else {
2708 printk(KERN_ERR "btrfs: balance will reduce metadata "
2709 "integrity, use force if you want this\n");
2710 ret = -EINVAL;
2711 goto out;
2712 }
2713 }
2714
2715do_balance:
2716 ret = insert_balance_item(fs_info->tree_root, bctl);
2717 if (ret && ret != -EEXIST)
2718 goto out;
2719
2720 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
2721 BUG_ON(ret == -EEXIST);
2722 set_balance_control(bctl);
2723 } else {
2724 BUG_ON(ret != -EEXIST);
2725 spin_lock(&fs_info->balance_lock);
2726 update_balance_args(bctl);
2727 spin_unlock(&fs_info->balance_lock);
2728 }
2729
2730 atomic_inc(&fs_info->balance_running);
2731 mutex_unlock(&fs_info->balance_mutex);
2732
2733 ret = __btrfs_balance(fs_info);
2734
2735 mutex_lock(&fs_info->balance_mutex);
2736 atomic_dec(&fs_info->balance_running);
2737
2738 if (bargs) {
2739 memset(bargs, 0, sizeof(*bargs));
2740 update_ioctl_balance_args(fs_info, 0, bargs);
2741 }
2742
2743 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
2744 balance_need_close(fs_info)) {
2745 __cancel_balance(fs_info);
2746 }
2747
2748 wake_up(&fs_info->balance_wait_q);
2749
2750 return ret;
2751out:
2752 if (bctl->flags & BTRFS_BALANCE_RESUME)
2753 __cancel_balance(fs_info);
2754 else
2755 kfree(bctl);
2756 return ret;
2757}
2758
2759static int balance_kthread(void *data)
2760{
2761 struct btrfs_balance_control *bctl =
2762 (struct btrfs_balance_control *)data;
2763 struct btrfs_fs_info *fs_info = bctl->fs_info;
2764 int ret = 0;
2765
2766 mutex_lock(&fs_info->volume_mutex);
2767 mutex_lock(&fs_info->balance_mutex);
2768
2769 set_balance_control(bctl);
2770
2771 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
2772 printk(KERN_INFO "btrfs: force skipping balance\n");
2773 } else {
2774 printk(KERN_INFO "btrfs: continuing balance\n");
2775 ret = btrfs_balance(bctl, NULL);
2776 }
2777
2778 mutex_unlock(&fs_info->balance_mutex);
2779 mutex_unlock(&fs_info->volume_mutex);
2780 return ret;
2781}
2782
2783int btrfs_recover_balance(struct btrfs_root *tree_root)
2784{
2785 struct task_struct *tsk;
2786 struct btrfs_balance_control *bctl;
2787 struct btrfs_balance_item *item;
2788 struct btrfs_disk_balance_args disk_bargs;
2789 struct btrfs_path *path;
2790 struct extent_buffer *leaf;
2791 struct btrfs_key key;
2792 int ret;
2793
2794 path = btrfs_alloc_path();
2795 if (!path)
2796 return -ENOMEM;
2797
2798 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
2799 if (!bctl) {
2800 ret = -ENOMEM;
2801 goto out;
2802 }
2803
2804 key.objectid = BTRFS_BALANCE_OBJECTID;
2805 key.type = BTRFS_BALANCE_ITEM_KEY;
2806 key.offset = 0;
2807
2808 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
2809 if (ret < 0)
2810 goto out_bctl;
2811 if (ret > 0) { /* ret = -ENOENT; */
2812 ret = 0;
2813 goto out_bctl;
2814 }
2815
2816 leaf = path->nodes[0];
2817 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
2818
2819 bctl->fs_info = tree_root->fs_info;
2820 bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
2821
2822 btrfs_balance_data(leaf, item, &disk_bargs);
2823 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
2824 btrfs_balance_meta(leaf, item, &disk_bargs);
2825 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
2826 btrfs_balance_sys(leaf, item, &disk_bargs);
2827 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
2828
2829 tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
2830 if (IS_ERR(tsk))
2831 ret = PTR_ERR(tsk);
2832 else
2833 goto out;
2834
2835out_bctl:
2836 kfree(bctl);
2837out:
2838 btrfs_free_path(path);
2839 return ret;
2840}
2841
2842int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
2843{
2844 int ret = 0;
2845
2846 mutex_lock(&fs_info->balance_mutex);
2847 if (!fs_info->balance_ctl) {
2848 mutex_unlock(&fs_info->balance_mutex);
2849 return -ENOTCONN;
2850 }
2851
2852 if (atomic_read(&fs_info->balance_running)) {
2853 atomic_inc(&fs_info->balance_pause_req);
2854 mutex_unlock(&fs_info->balance_mutex);
2855
2856 wait_event(fs_info->balance_wait_q,
2857 atomic_read(&fs_info->balance_running) == 0);
2858
2859 mutex_lock(&fs_info->balance_mutex);
2860 /* we are good with balance_ctl ripped off from under us */
2861 BUG_ON(atomic_read(&fs_info->balance_running));
2862 atomic_dec(&fs_info->balance_pause_req);
2863 } else {
2864 ret = -ENOTCONN;
2865 }
2866
2867 mutex_unlock(&fs_info->balance_mutex);
2868 return ret;
2869}
2870
2871int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
2872{
2873 mutex_lock(&fs_info->balance_mutex);
2874 if (!fs_info->balance_ctl) {
2875 mutex_unlock(&fs_info->balance_mutex);
2876 return -ENOTCONN;
2877 }
2878
2879 atomic_inc(&fs_info->balance_cancel_req);
2880 /*
2881 * if we are running just wait and return, balance item is
2882 * deleted in btrfs_balance in this case
2883 */
2884 if (atomic_read(&fs_info->balance_running)) {
2885 mutex_unlock(&fs_info->balance_mutex);
2886 wait_event(fs_info->balance_wait_q,
2887 atomic_read(&fs_info->balance_running) == 0);
2888 mutex_lock(&fs_info->balance_mutex);
2889 } else {
2890 /* __cancel_balance needs volume_mutex */
2891 mutex_unlock(&fs_info->balance_mutex);
2892 mutex_lock(&fs_info->volume_mutex);
2893 mutex_lock(&fs_info->balance_mutex);
2894
2895 if (fs_info->balance_ctl)
2896 __cancel_balance(fs_info);
2897
2898 mutex_unlock(&fs_info->volume_mutex);
2899 }
2900
2901 BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
2902 atomic_dec(&fs_info->balance_cancel_req);
2903 mutex_unlock(&fs_info->balance_mutex);
2904 return 0;
2905}
2906
2186/* 2907/*
2187 * shrinking a device means finding all of the device extents past 2908 * shrinking a device means finding all of the device extents past
2188 * the new size, and then following the back refs to the chunks. 2909 * the new size, and then following the back refs to the chunks.
@@ -2756,8 +3477,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
2756 return ret; 3477 return ret;
2757 3478
2758 alloc_profile = BTRFS_BLOCK_GROUP_METADATA | 3479 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2759 (fs_info->metadata_alloc_profile & 3480 fs_info->avail_metadata_alloc_bits;
2760 fs_info->avail_metadata_alloc_bits);
2761 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); 3481 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2762 3482
2763 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, 3483 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
@@ -2767,8 +3487,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
2767 sys_chunk_offset = chunk_offset + chunk_size; 3487 sys_chunk_offset = chunk_offset + chunk_size;
2768 3488
2769 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | 3489 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
2770 (fs_info->system_alloc_profile & 3490 fs_info->avail_system_alloc_bits;
2771 fs_info->avail_system_alloc_bits);
2772 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); 3491 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
2773 3492
2774 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, 3493 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
@@ -2955,12 +3674,8 @@ again:
2955 } 3674 }
2956 } 3675 }
2957 if (rw & REQ_DISCARD) { 3676 if (rw & REQ_DISCARD) {
2958 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | 3677 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK)
2959 BTRFS_BLOCK_GROUP_RAID1 |
2960 BTRFS_BLOCK_GROUP_DUP |
2961 BTRFS_BLOCK_GROUP_RAID10)) {
2962 stripes_required = map->num_stripes; 3678 stripes_required = map->num_stripes;
2963 }
2964 } 3679 }
2965 if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && 3680 if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
2966 stripes_allocated < stripes_required) { 3681 stripes_allocated < stripes_required) {
@@ -2984,10 +3699,7 @@ again:
2984 3699
2985 if (rw & REQ_DISCARD) 3700 if (rw & REQ_DISCARD)
2986 *length = min_t(u64, em->len - offset, *length); 3701 *length = min_t(u64, em->len - offset, *length);
2987 else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | 3702 else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
2988 BTRFS_BLOCK_GROUP_RAID1 |
2989 BTRFS_BLOCK_GROUP_RAID10 |
2990 BTRFS_BLOCK_GROUP_DUP)) {
2991 /* we limit the length of each bio to what fits in a stripe */ 3703 /* we limit the length of each bio to what fits in a stripe */
2992 *length = min_t(u64, em->len - offset, 3704 *length = min_t(u64, em->len - offset,
2993 map->stripe_len - stripe_offset); 3705 map->stripe_len - stripe_offset);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 78f2d4d4f37f..6faec9dd1f93 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -186,6 +186,51 @@ struct map_lookup {
186#define map_lookup_size(n) (sizeof(struct map_lookup) + \ 186#define map_lookup_size(n) (sizeof(struct map_lookup) + \
187 (sizeof(struct btrfs_bio_stripe) * (n))) 187 (sizeof(struct btrfs_bio_stripe) * (n)))
188 188
189/*
190 * Restriper's general type filter
191 */
192#define BTRFS_BALANCE_DATA (1ULL << 0)
193#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
194#define BTRFS_BALANCE_METADATA (1ULL << 2)
195
196#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
197 BTRFS_BALANCE_SYSTEM | \
198 BTRFS_BALANCE_METADATA)
199
200#define BTRFS_BALANCE_FORCE (1ULL << 3)
201#define BTRFS_BALANCE_RESUME (1ULL << 4)
202
203/*
204 * Balance filters
205 */
206#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
207#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
208#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
209#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
210#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
211
212/*
213 * Profile changing flags. When SOFT is set we won't relocate chunk if
214 * it already has the target profile (even though it may be
215 * half-filled).
216 */
217#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
218#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
219
220struct btrfs_balance_args;
221struct btrfs_balance_progress;
222struct btrfs_balance_control {
223 struct btrfs_fs_info *fs_info;
224
225 struct btrfs_balance_args data;
226 struct btrfs_balance_args meta;
227 struct btrfs_balance_args sys;
228
229 u64 flags;
230
231 struct btrfs_balance_progress stat;
232};
233
189int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, 234int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
190 u64 end, u64 *length); 235 u64 end, u64 *length);
191 236
@@ -228,7 +273,11 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
228 u8 *uuid, u8 *fsid); 273 u8 *uuid, u8 *fsid);
229int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); 274int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
230int btrfs_init_new_device(struct btrfs_root *root, char *path); 275int btrfs_init_new_device(struct btrfs_root *root, char *path);
231int btrfs_balance(struct btrfs_root *dev_root); 276int btrfs_balance(struct btrfs_balance_control *bctl,
277 struct btrfs_ioctl_balance_args *bargs);
278int btrfs_recover_balance(struct btrfs_root *tree_root);
279int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
280int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
232int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 281int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
233int find_free_dev_extent(struct btrfs_trans_handle *trans, 282int find_free_dev_extent(struct btrfs_trans_handle *trans,
234 struct btrfs_device *device, u64 num_bytes, 283 struct btrfs_device *device, u64 num_bytes,