50 files changed, 3786 insertions, 3124 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 23537bc8c827..212b4a854f2c 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -2,7 +2,8 @@
 config BTRFS_FS
        tristate "Btrfs filesystem support"
-        select LIBCRC32C
+        select CRYPTO
+        select CRYPTO_CRC32C
        select ZLIB_INFLATE
        select ZLIB_DEFLATE
        select LZO_COMPRESS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index ca693dd554e9..76a843198bcb 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -10,7 +10,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
           compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
           reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
-           uuid-tree.o props.o free-space-tree.o tree-checker.o
+           uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
+           block-rsv.o delalloc-space.o
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 982152d3f920..89116afda7a2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1465,12 +1465,11 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
 *
 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
 */
-int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
+int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+                struct ulist *roots, struct ulist *tmp)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_trans_handle *trans;
-        struct ulist *tmp = NULL;
-        struct ulist *roots = NULL;
        struct ulist_iterator uiter;
        struct ulist_node *node;
        struct seq_list elem = SEQ_LIST_INIT(elem);
@@ -1481,12 +1480,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
                .share_count = 0,
        };
-        tmp = ulist_alloc(GFP_NOFS);
+        ulist_init(roots);
-        roots = ulist_alloc(GFP_NOFS);
+        ulist_init(tmp);
-        if (!tmp || !roots) {
-                ret = -ENOMEM;
-                goto out;
-        }
        trans = btrfs_attach_transaction(root);
        if (IS_ERR(trans)) {
@@ -1527,8 +1522,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
                up_read(&fs_info->commit_root_sem);
        }
 out:
-        ulist_free(tmp);
+        ulist_release(roots);
-        ulist_free(roots);
+        ulist_release(tmp);
        return ret;
 }
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 54d58988483a..777f61dc081e 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -57,7 +57,8 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
                          u64 start_off, struct btrfs_path *path,
                          struct btrfs_inode_extref **ret_extref,
                          u64 *found_off);
-int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr);
+int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
+                struct ulist *roots, struct ulist *tmp_ulist);
 int __init btrfs_prelim_ref_init(void);
 void __cold btrfs_prelim_ref_exit(void);
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
new file mode 100644
index 000000000000..698470b9f32d
--- /dev/null
+++ b/fs/btrfs/block-rsv.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "ctree.h"
+#include "block-rsv.h"
+#include "space-info.h"
+#include "math.h"
+#include "transaction.h"
+static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_block_rsv *block_rsv,
+                                    struct btrfs_block_rsv *dest, u64 num_bytes,
+                                    u64 *qgroup_to_release_ret)
+{
+        struct btrfs_space_info *space_info = block_rsv->space_info;
+        u64 qgroup_to_release = 0;
+        u64 ret;
+        spin_lock(&block_rsv->lock);
+        if (num_bytes == (u64)-1) {
+                num_bytes = block_rsv->size;
+                qgroup_to_release = block_rsv->qgroup_rsv_size;
+        }
+        block_rsv->size -= num_bytes;
+        if (block_rsv->reserved >= block_rsv->size) {
+                num_bytes = block_rsv->reserved - block_rsv->size;
+                block_rsv->reserved = block_rsv->size;
+                block_rsv->full = 1;
+        } else {
+                num_bytes = 0;
+        }
+        if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+                qgroup_to_release = block_rsv->qgroup_rsv_reserved -
+                                    block_rsv->qgroup_rsv_size;
+                block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
+        } else {
+                qgroup_to_release = 0;
+        }
+        spin_unlock(&block_rsv->lock);
+        ret = num_bytes;
+        if (num_bytes > 0) {
+                if (dest) {
+                        spin_lock(&dest->lock);
+                        if (!dest->full) {
+                                u64 bytes_to_add;
+                                bytes_to_add = dest->size - dest->reserved;
+                                bytes_to_add = min(num_bytes, bytes_to_add);
+                                dest->reserved += bytes_to_add;
+                                if (dest->reserved >= dest->size)
+                                        dest->full = 1;
+                                num_bytes -= bytes_to_add;
+                        }
+                        spin_unlock(&dest->lock);
+                }
+                if (num_bytes)
+                        btrfs_space_info_add_old_bytes(fs_info, space_info,
+                                                       num_bytes);
+        }
+        if (qgroup_to_release_ret)
+                *qgroup_to_release_ret = qgroup_to_release;
+        return ret;
+}
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
+                            struct btrfs_block_rsv *dst, u64 num_bytes,
+                            bool update_size)
+{
+        int ret;
+        ret = btrfs_block_rsv_use_bytes(src, num_bytes);
+        if (ret)
+                return ret;
+        btrfs_block_rsv_add_bytes(dst, num_bytes, update_size);
+        return 0;
+}
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
+{
+        memset(rsv, 0, sizeof(*rsv));
+        spin_lock_init(&rsv->lock);
+        rsv->type = type;
+}
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
+                                   struct btrfs_block_rsv *rsv,
+                                   unsigned short type)
+{
+        btrfs_init_block_rsv(rsv, type);
+        rsv->space_info = btrfs_find_space_info(fs_info,
+                                            BTRFS_BLOCK_GROUP_METADATA);
+}
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
+                                              unsigned short type)
+{
+        struct btrfs_block_rsv *block_rsv;
+        block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
+        if (!block_rsv)
+                return NULL;
+        btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
+        return block_rsv;
+}
+void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_rsv *rsv)
+{
+        if (!rsv)
+                return;
+        btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
+        kfree(rsv);
+}
+int btrfs_block_rsv_add(struct btrfs_root *root,
+                        struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+                        enum btrfs_reserve_flush_enum flush)
+{
+        int ret;
+        if (num_bytes == 0)
+                return 0;
+        ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+        if (!ret)
+                btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true);
+        return ret;
+}
+int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
+{
+        u64 num_bytes = 0;
+        int ret = -ENOSPC;
+        if (!block_rsv)
+                return 0;
+        spin_lock(&block_rsv->lock);
+        num_bytes = div_factor(block_rsv->size, min_factor);
+        if (block_rsv->reserved >= num_bytes)
+                ret = 0;
+        spin_unlock(&block_rsv->lock);
+        return ret;
+}
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+                           struct btrfs_block_rsv *block_rsv, u64 min_reserved,
+                           enum btrfs_reserve_flush_enum flush)
+{
+        u64 num_bytes = 0;
+        int ret = -ENOSPC;
+        if (!block_rsv)
+                return 0;
+        spin_lock(&block_rsv->lock);
+        num_bytes = min_reserved;
+        if (block_rsv->reserved >= num_bytes)
+                ret = 0;
+        else
+                num_bytes -= block_rsv->reserved;
+        spin_unlock(&block_rsv->lock);
+        if (!ret)
+                return 0;
+        ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
+        if (!ret) {
+                btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false);
+                return 0;
+        }
+        return ret;
+}
+u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+                              struct btrfs_block_rsv *block_rsv,
+                              u64 num_bytes, u64 *qgroup_to_release)
+{
+        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+        struct btrfs_block_rsv *target = NULL;
+        /*
+         * If we are the delayed_rsv then push to the global rsv, otherwise dump
+         * into the delayed rsv if it is not full.
+         */
+        if (block_rsv == delayed_rsv)
+                target = global_rsv;
+        else if (block_rsv != global_rsv && !delayed_rsv->full)
+                target = delayed_rsv;
+        if (target && block_rsv->space_info != target->space_info)
+                target = NULL;
+        return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
+                                       qgroup_to_release);
+}
+int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes)
+{
+        int ret = -ENOSPC;
+        spin_lock(&block_rsv->lock);
+        if (block_rsv->reserved >= num_bytes) {
+                block_rsv->reserved -= num_bytes;
+                if (block_rsv->reserved < block_rsv->size)
+                        block_rsv->full = 0;
+                ret = 0;
+        }
+        spin_unlock(&block_rsv->lock);
+        return ret;
+}
+void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
+                               u64 num_bytes, bool update_size)
+{
+        spin_lock(&block_rsv->lock);
+        block_rsv->reserved += num_bytes;
+        if (update_size)
+                block_rsv->size += num_bytes;
+        else if (block_rsv->reserved >= block_rsv->size)
+                block_rsv->full = 1;
+        spin_unlock(&block_rsv->lock);
+}
+int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
+                             struct btrfs_block_rsv *dest, u64 num_bytes,
+                             int min_factor)
+{
+        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+        u64 min_bytes;
+        if (global_rsv->space_info != dest->space_info)
+                return -ENOSPC;
+        spin_lock(&global_rsv->lock);
+        min_bytes = div_factor(global_rsv->size, min_factor);
+        if (global_rsv->reserved < min_bytes + num_bytes) {
+                spin_unlock(&global_rsv->lock);
+                return -ENOSPC;
+        }
+        global_rsv->reserved -= num_bytes;
+        if (global_rsv->reserved < global_rsv->size)
+                global_rsv->full = 0;
+        spin_unlock(&global_rsv->lock);
+        btrfs_block_rsv_add_bytes(dest, num_bytes, true);
+        return 0;
+}
+void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+        struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
+        struct btrfs_space_info *sinfo = block_rsv->space_info;
+        u64 num_bytes;
+        /*
+         * The global block rsv is based on the size of the extent tree, the
+         * checksum tree and the root tree.  If the fs is empty we want to set
+         * it to a minimal amount for safety.
+         */
+        num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
+                btrfs_root_used(&fs_info->csum_root->root_item) +
+                btrfs_root_used(&fs_info->tree_root->root_item);
+        num_bytes = max_t(u64, num_bytes, SZ_16M);
+        spin_lock(&sinfo->lock);
+        spin_lock(&block_rsv->lock);
+        block_rsv->size = min_t(u64, num_bytes, SZ_512M);
+        if (block_rsv->reserved < block_rsv->size) {
+                num_bytes = btrfs_space_info_used(sinfo, true);
+                if (sinfo->total_bytes > num_bytes) {
+                        num_bytes = sinfo->total_bytes - num_bytes;
+                        num_bytes = min(num_bytes,
+                                        block_rsv->size - block_rsv->reserved);
+                        block_rsv->reserved += num_bytes;
+                        btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
+                                                              num_bytes);
+                        trace_btrfs_space_reservation(fs_info, "space_info",
+                                                      sinfo->flags, num_bytes,
+                                                      1);
+                }
+        } else if (block_rsv->reserved > block_rsv->size) {
+                num_bytes = block_rsv->reserved - block_rsv->size;
+                btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
+                                                      -num_bytes);
+                trace_btrfs_space_reservation(fs_info, "space_info",
+                                      sinfo->flags, num_bytes, 0);
+                block_rsv->reserved = block_rsv->size;
+        }
+        if (block_rsv->reserved == block_rsv->size)
+                block_rsv->full = 1;
+        else
+                block_rsv->full = 0;
+        spin_unlock(&block_rsv->lock);
+        spin_unlock(&sinfo->lock);
+}
+void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+        struct btrfs_space_info *space_info;
+        space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+        fs_info->chunk_block_rsv.space_info = space_info;
+        space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+        fs_info->global_block_rsv.space_info = space_info;
+        fs_info->trans_block_rsv.space_info = space_info;
+        fs_info->empty_block_rsv.space_info = space_info;
+        fs_info->delayed_block_rsv.space_info = space_info;
+        fs_info->delayed_refs_rsv.space_info = space_info;
+        fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
+        fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
+        fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
+        fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+        if (fs_info->quota_root)
+                fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
+        fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
+        btrfs_update_global_block_rsv(fs_info);
+}
+void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
+{
+        btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1);
+        WARN_ON(fs_info->trans_block_rsv.size > 0);
+        WARN_ON(fs_info->trans_block_rsv.reserved > 0);
+        WARN_ON(fs_info->chunk_block_rsv.size > 0);
+        WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+        WARN_ON(fs_info->delayed_block_rsv.size > 0);
+        WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
+        WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
+        WARN_ON(fs_info->delayed_refs_rsv.size > 0);
+}
+static struct btrfs_block_rsv *get_block_rsv(
+                                        const struct btrfs_trans_handle *trans,
+                                        const struct btrfs_root *root)
+{
+        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct btrfs_block_rsv *block_rsv = NULL;
+        if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+            (root == fs_info->csum_root && trans->adding_csums) ||
+            (root == fs_info->uuid_root))
+                block_rsv = trans->block_rsv;
+        if (!block_rsv)
+                block_rsv = root->block_rsv;
+        if (!block_rsv)
+                block_rsv = &fs_info->empty_block_rsv;
+        return block_rsv;
+}
+struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            u32 blocksize)
+{
+        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct btrfs_block_rsv *block_rsv;
+        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+        int ret;
+        bool global_updated = false;
+        block_rsv = get_block_rsv(trans, root);
+        if (unlikely(block_rsv->size == 0))
+                goto try_reserve;
+again:
+        ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize);
+        if (!ret)
+                return block_rsv;
+        if (block_rsv->failfast)
+                return ERR_PTR(ret);
+        if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
+                global_updated = true;
+                btrfs_update_global_block_rsv(fs_info);
+                goto again;
+        }
+        /*
+         * The global reserve still exists to save us from ourselves, so don't
+         * warn_on if we are short on our delayed refs reserve.
+         */
+        if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
+            btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
+                static DEFINE_RATELIMIT_STATE(_rs,
+                                DEFAULT_RATELIMIT_INTERVAL * 10,
+                                /*DEFAULT_RATELIMIT_BURST*/ 1);
+                if (__ratelimit(&_rs))
+                        WARN(1, KERN_DEBUG
+                                "BTRFS: block rsv returned %d\n", ret);
+        }
+try_reserve:
+        ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize,
+                                           BTRFS_RESERVE_NO_FLUSH);
+        if (!ret)
+                return block_rsv;
+        /*
+         * If we couldn't reserve metadata bytes try and use some from
+         * the global reserve if its space type is the same as the global
+         * reservation.
+         */
+        if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
+            block_rsv->space_info == global_rsv->space_info) {
+                ret = btrfs_block_rsv_use_bytes(global_rsv, blocksize);
+                if (!ret)
+                        return global_rsv;
+        }
+        return ERR_PTR(ret);
+}
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
new file mode 100644
index 000000000000..d1428bb73fc5
--- /dev/null
+++ b/fs/btrfs/block-rsv.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BTRFS_BLOCK_RSV_H
+#define BTRFS_BLOCK_RSV_H
+struct btrfs_trans_handle;
+enum btrfs_reserve_flush_enum;
+/*
+ * Types of block reserves
+ */
+enum {
+        BTRFS_BLOCK_RSV_GLOBAL,
+        BTRFS_BLOCK_RSV_DELALLOC,
+        BTRFS_BLOCK_RSV_TRANS,
+        BTRFS_BLOCK_RSV_CHUNK,
+        BTRFS_BLOCK_RSV_DELOPS,
+        BTRFS_BLOCK_RSV_DELREFS,
+        BTRFS_BLOCK_RSV_EMPTY,
+        BTRFS_BLOCK_RSV_TEMP,
+};
+struct btrfs_block_rsv {
+        u64 size;
+        u64 reserved;
+        struct btrfs_space_info *space_info;
+        spinlock_t lock;
+        unsigned short full;
+        unsigned short type;
+        unsigned short failfast;
+        /*
+         * Qgroup equivalent for @size @reserved
+         *
+         * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
+         * about things like csum size nor how many tree blocks it will need to
+         * reserve.
+         *
+         * Qgroup cares more about net change of the extent usage.
+         *
+         * So for one newly inserted file extent, in worst case it will cause
+         * leaf split and level increase, nodesize for each file extent is
+         * already too much.
+         *
+         * In short, qgroup_size/reserved is the upper limit of possible needed
+         * qgroup metadata reservation.
+         */
+        u64 qgroup_rsv_size;
+        u64 qgroup_rsv_reserved;
+};
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
+                                              unsigned short type);
+void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
+                                   struct btrfs_block_rsv *rsv,
+                                   unsigned short type);
+void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_rsv *rsv);
+int btrfs_block_rsv_add(struct btrfs_root *root,
+                        struct btrfs_block_rsv *block_rsv, u64 num_bytes,
+                        enum btrfs_reserve_flush_enum flush);
+int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+                           struct btrfs_block_rsv *block_rsv, u64 min_reserved,
+                           enum btrfs_reserve_flush_enum flush);
+int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
+                            struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
+                            bool update_size);
+int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes);
+int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
+                             struct btrfs_block_rsv *dest, u64 num_bytes,
+                             int min_factor);
+void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
+                               u64 num_bytes, bool update_size);
+u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+                              struct btrfs_block_rsv *block_rsv,
+                              u64 num_bytes, u64 *qgroup_to_release);
+void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info);
+void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info);
+void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
+struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
+                                            struct btrfs_root *root,
+                                            u32 blocksize);
+static inline void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
+                                           struct btrfs_block_rsv *block_rsv,
+                                           u64 num_bytes)
+{
+        __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
+}
+static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
+                                         struct btrfs_block_rsv *block_rsv,
+                                         u32 blocksize)
+{
+        btrfs_block_rsv_add_bytes(block_rsv, blocksize, false);
+        btrfs_block_rsv_release(fs_info, block_rsv, 0);
+}
+#endif /* BTRFS_BLOCK_RSV_H */
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d5b438706b77..f853835c409c 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -337,22 +337,34 @@ static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
        clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
 }
+/* Array of bytes with variable length, hexadecimal format 0x1234 */
+#define CSUM_FMT                                "0x%*phN"
+#define CSUM_FMT_VALUE(size, bytes)             size, bytes
 static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
-                u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
+                u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
 {
        struct btrfs_root *root = inode->root;
+        struct btrfs_super_block *sb = root->fs_info->super_copy;
+        const u16 csum_size = btrfs_super_csum_size(sb);
        /* Output minus objectid, which is more meaningful */
        if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
                btrfs_warn_rl(root->fs_info,
-        "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
                        root->root_key.objectid, btrfs_ino(inode),
-                        logical_start, csum, csum_expected, mirror_num);
+                        logical_start,
+                        CSUM_FMT_VALUE(csum_size, csum),
+                        CSUM_FMT_VALUE(csum_size, csum_expected),
+                        mirror_num);
        else
                btrfs_warn_rl(root->fs_info,
-        "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
+"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
                        root->root_key.objectid, btrfs_ino(inode),
-                        logical_start, csum, csum_expected, mirror_num);
+                        logical_start,
+                        CSUM_FMT_VALUE(csum_size, csum),
+                        CSUM_FMT_VALUE(csum_size, csum_expected),
+                        mirror_num);
 }
 #endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b0c8094528d1..81a9731959a9 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -83,7 +83,7 @@
 #include <linux/blkdev.h>
 #include <linux/mm.h>
 #include <linux/string.h>
-#include <linux/crc32c.h>
+#include <crypto/hash.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -1710,9 +1710,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
                                     char **datav, unsigned int num_pages)
 {
        struct btrfs_fs_info *fs_info = state->fs_info;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        struct btrfs_header *h;
        u8 csum[BTRFS_CSUM_SIZE];
-        u32 crc = ~(u32)0;
        unsigned int i;
        if (num_pages * PAGE_SIZE < state->metablock_size)
@@ -1723,14 +1723,17 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
        if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE))
                return 1;
+        shash->tfm = fs_info->csum_shash;
+        crypto_shash_init(shash);
        for (i = 0; i < num_pages; i++) {
                u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
                size_t sublen = i ? PAGE_SIZE :
                                    (PAGE_SIZE - BTRFS_CSUM_SIZE);
-                crc = crc32c(crc, data, sublen);
+                crypto_shash_update(shash, data, sublen);
        }
-        btrfs_csum_final(crc, csum);
+        crypto_shash_final(shash, csum);
        if (memcmp(csum, h->csum, state->csum_size))
                return 1;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 84dd4a8980c5..60c47b417a4b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/sched/mm.h>
 #include <linux/log2.h>
+#include <crypto/hash.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -42,6 +43,22 @@ const char* btrfs_compress_type2str(enum btrfs_compression_type type)
        return NULL;
 }
+bool btrfs_compress_is_valid_type(const char *str, size_t len)
+{
+        int i;
+        for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) {
+                size_t comp_len = strlen(btrfs_compress_types[i]);
+                if (len < comp_len)
+                        continue;
+                if (!strncmp(btrfs_compress_types[i], str, comp_len))
+                        return true;
+        }
+        return false;
+}
 static int btrfs_decompress_bio(struct compressed_bio *cb);
 static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
@@ -57,32 +74,37 @@ static int check_compressed_csum(struct btrfs_inode *inode,
                                 struct compressed_bio *cb,
                                 u64 disk_start)
 {
+        struct btrfs_fs_info *fs_info = inode->root->fs_info;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+        const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        int ret;
        struct page *page;
        unsigned long i;
        char *kaddr;
-        u32 csum;
+        u8 csum[BTRFS_CSUM_SIZE];
-        u32 *cb_sum = &cb->sums;
+        u8 *cb_sum = cb->sums;
        if (inode->flags & BTRFS_INODE_NODATASUM)
                return 0;
+        shash->tfm = fs_info->csum_shash;
        for (i = 0; i < cb->nr_pages; i++) {
                page = cb->compressed_pages[i];
-                csum = ~(u32)0;
+                crypto_shash_init(shash);
                kaddr = kmap_atomic(page);
-                csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE);
+                crypto_shash_update(shash, kaddr, PAGE_SIZE);
-                btrfs_csum_final(csum, (u8 *)&csum);
                kunmap_atomic(kaddr);
+                crypto_shash_final(shash, (u8 *)&csum);
-                if (csum != *cb_sum) {
+                if (memcmp(&csum, cb_sum, csum_size)) {
-                        btrfs_print_data_csum_error(inode, disk_start, csum,
+                        btrfs_print_data_csum_error(inode, disk_start,
-                                        *cb_sum, cb->mirror_num);
+                                        csum, cb_sum, cb->mirror_num);
                        ret = -EIO;
                        goto fail;
                }
-                cb_sum++;
+                cb_sum += csum_size;
        }
        ret = 0;
@@ -318,7 +340,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
        bdev = fs_info->fs_devices->latest_bdev;
-        bio = btrfs_bio_alloc(bdev, first_byte);
+        bio = btrfs_bio_alloc(first_byte);
+        bio_set_dev(bio, bdev);
        bio->bi_opf = REQ_OP_WRITE | write_flags;
        bio->bi_private = cb;
        bio->bi_end_io = end_compressed_bio_write;
@@ -360,7 +383,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
                                bio_endio(bio);
                        }
-                        bio = btrfs_bio_alloc(bdev, first_byte);
+                        bio = btrfs_bio_alloc(first_byte);
+                        bio_set_dev(bio, bdev);
                        bio->bi_opf = REQ_OP_WRITE | write_flags;
                        bio->bi_private = cb;
                        bio->bi_end_io = end_compressed_bio_write;
@@ -536,7 +560,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        struct extent_map *em;
        blk_status_t ret = BLK_STS_RESOURCE;
        int faili = 0;
-        u32 *sums;
+        const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+        u8 *sums;
        em_tree = &BTRFS_I(inode)->extent_tree;
@@ -558,7 +583,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        cb->errors = 0;
        cb->inode = inode;
        cb->mirror_num = mirror_num;
-        sums = &cb->sums;
+        sums = cb->sums;
        cb->start = em->orig_start;
        em_len = em->len;
@@ -597,7 +622,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        /* include any pages we added in add_ra-bio_pages */
        cb->len = bio->bi_iter.bi_size;
-        comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
+        comp_bio = btrfs_bio_alloc(cur_disk_byte);
+        bio_set_dev(comp_bio, bdev);
        comp_bio->bi_opf = REQ_OP_READ;
        comp_bio->bi_private = cb;
        comp_bio->bi_end_io = end_compressed_bio_read;
@@ -617,6 +643,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                page->mapping = NULL;
                if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
                    PAGE_SIZE) {
+                        unsigned int nr_sectors;
                        ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
                                                  BTRFS_WQ_ENDIO_DATA);
                        BUG_ON(ret); /* -ENOMEM */
@@ -634,8 +662,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                                            sums);
                                BUG_ON(ret); /* -ENOMEM */
                        }
-                        sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
-                                             fs_info->sectorsize);
+                        nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
+                                                  fs_info->sectorsize);
+                        sums += csum_size * nr_sectors;
                        ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
                        if (ret) {
@@ -643,7 +673,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                bio_endio(comp_bio);
                        }
-                        comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
+                        comp_bio = btrfs_bio_alloc(cur_disk_byte);
+                        bio_set_dev(comp_bio, bdev);
                        comp_bio->bi_opf = REQ_OP_READ;
                        comp_bio->bi_private = cb;
                        comp_bio->bi_end_io = end_compressed_bio_read;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 9976fe0f7526..2035b8eb1290 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -61,7 +61,7 @@ struct compressed_bio {
         * the start of a variable length array of checksums only
         * used by reads
         */
-        u32 sums;
+        u8 sums[];
 };
 static inline unsigned int btrfs_compress_type(unsigned int type_level)
@@ -173,6 +173,7 @@ extern const struct btrfs_compress_op btrfs_lzo_compress;
 extern const struct btrfs_compress_op btrfs_zstd_compress;
 const char* btrfs_compress_type2str(enum btrfs_compression_type type);
+bool btrfs_compress_is_valid_type(const char *str, size_t len);
 int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0a61dff27f57..299e11e6c554 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -19,6 +19,7 @@
 #include <linux/kobject.h>
 #include <trace/events/btrfs.h>
 #include <asm/kmap_types.h>
+#include <asm/unaligned.h>
 #include <linux/pagemap.h>
 #include <linux/btrfs.h>
 #include <linux/btrfs_tree.h>
@@ -31,11 +32,13 @@
 #include "extent_io.h"
 #include "extent_map.h"
 #include "async-thread.h"
+#include "block-rsv.h"
 struct btrfs_trans_handle;
 struct btrfs_transaction;
 struct btrfs_pending_snapshot;
 struct btrfs_delayed_ref_root;
+struct btrfs_space_info;
 extern struct kmem_cache *btrfs_trans_handle_cachep;
 extern struct kmem_cache *btrfs_bit_radix_cachep;
 extern struct kmem_cache *btrfs_path_cachep;
@@ -45,7 +48,16 @@ struct btrfs_ref;
 #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
-#define BTRFS_MAX_MIRRORS 3
+/*
+ * Maximum number of mirrors that can be available for all profiles counting
+ * the target device of dev-replace as one. During an active device replace
+ * procedure, the target device of the copy operation is a mirror for the
+ * filesystem data as well that can be used to read data in order to repair
+ * read errors on other disks.
+ *
+ * Current value is derived from RAID1 with 2 copies.
+ */
+#define BTRFS_MAX_MIRRORS (2 + 1)
 #define BTRFS_MAX_LEVEL 8
@@ -72,6 +84,7 @@ struct btrfs_ref;
 /* four bytes for CRC32 */
 static const int btrfs_csum_sizes[] = { 4 };
+static const char *btrfs_csum_names[] = { "crc32c" };
 #define BTRFS_EMPTY_DIR_SIZE 0
@@ -99,10 +112,6 @@ static inline u32 count_max_extents(u64 size)
        return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
 }
-struct btrfs_mapping_tree {
-        struct extent_map_tree map_tree;
-};
 static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 {
        BUG_ON(num_stripes == 0);
@@ -395,115 +404,6 @@ struct raid_kobject {
        struct list_head list;
 };
-struct btrfs_space_info {
-        spinlock_t lock;
-        u64 total_bytes;        /* total bytes in the space,
-                                   this doesn't take mirrors into account */
-        u64 bytes_used;         /* total bytes used,
-                                   this doesn't take mirrors into account */
-        u64 bytes_pinned;       /* total bytes pinned, will be freed when the
-                                   transaction finishes */
-        u64 bytes_reserved;     /* total bytes the allocator has reserved for
-                                   current allocations */
-        u64 bytes_may_use;      /* number of bytes that may be used for
-                                   delalloc/allocations */
-        u64 bytes_readonly;     /* total bytes that are read only */
-        u64 max_extent_size;    /* This will hold the maximum extent size of
-                                   the space info if we had an ENOSPC in the
-                                   allocator. */
-        unsigned int full:1;    /* indicates that we cannot allocate any more
-                                   chunks for this space */
-        unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
-        unsigned int flush:1;           /* set if we are trying to make space */
-        unsigned int force_alloc;       /* set if we need to force a chunk
-                                           alloc for this space */
-        u64 disk_used;          /* total bytes used on disk */
-        u64 disk_total;         /* total bytes on disk, takes mirrors into
-                                   account */
-        u64 flags;
-        /*
-         * bytes_pinned is kept in line with what is actually pinned, as in
-         * we've called update_block_group and dropped the bytes_used counter
-         * and increased the bytes_pinned counter.  However this means that
-         * bytes_pinned does not reflect the bytes that will be pinned once the
-         * delayed refs are flushed, so this counter is inc'ed every time we
-         * call btrfs_free_extent so it is a realtime count of what will be
-         * freed once the transaction is committed.  It will be zeroed every
-         * time the transaction commits.
-         */
-        struct percpu_counter total_bytes_pinned;
-        struct list_head list;
-        /* Protected by the spinlock 'lock'. */
-        struct list_head ro_bgs;
-        struct list_head priority_tickets;
-        struct list_head tickets;
-        /*
-         * tickets_id just indicates the next ticket will be handled, so note
-         * it's not stored per ticket.
-         */
-        u64 tickets_id;
-        struct rw_semaphore groups_sem;
-        /* for block groups in our same type */
-        struct list_head block_groups[BTRFS_NR_RAID_TYPES];
-        wait_queue_head_t wait;
-        struct kobject kobj;
-        struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
-};
-/*
- * Types of block reserves
- */
-enum {
-        BTRFS_BLOCK_RSV_GLOBAL,
-        BTRFS_BLOCK_RSV_DELALLOC,
-        BTRFS_BLOCK_RSV_TRANS,
-        BTRFS_BLOCK_RSV_CHUNK,
-        BTRFS_BLOCK_RSV_DELOPS,
-        BTRFS_BLOCK_RSV_DELREFS,
-        BTRFS_BLOCK_RSV_EMPTY,
-        BTRFS_BLOCK_RSV_TEMP,
-};
-struct btrfs_block_rsv {
-        u64 size;
-        u64 reserved;
-        struct btrfs_space_info *space_info;
-        spinlock_t lock;
-        unsigned short full;
-        unsigned short type;
-        unsigned short failfast;
-        /*
-         * Qgroup equivalent for @size @reserved
-         *
-         * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
-         * about things like csum size nor how many tree blocks it will need to
-         * reserve.
-         *
-         * Qgroup cares more about net change of the extent usage.
-         *
-         * So for one newly inserted file extent, in worst case it will cause
-         * leaf split and level increase, nodesize for each file extent is
-         * already too much.
-         *
-         * In short, qgroup_size/reserved is the upper limit of possible needed
-         * qgroup metadata reservation.
-         */
-        u64 qgroup_rsv_size;
-        u64 qgroup_rsv_reserved;
-};
 /*
 * free clusters are used to claim free space in relatively large chunks,
 * allowing us to do less seeky writes. They are used for all metadata
@@ -786,11 +686,18 @@ enum {
        /*
         * Indicate that balance has been set up from the ioctl and is in the
         * main phase. The fs_info::balance_ctl is initialized.
+         * Set and cleared while holding fs_info::balance_mutex.
         */
        BTRFS_FS_BALANCE_RUNNING,
        /* Indicate that the cleaner thread is awake and doing something. */
        BTRFS_FS_CLEANER_RUNNING,
+        /*
+         * The checksumming has an optimized version and is considered fast,
+         * so we don't need to offload checksums to workqueues.
+         */
+        BTRFS_FS_CSUM_IMPL_FAST,
 };
 struct btrfs_fs_info {
@@ -824,7 +731,7 @@ struct btrfs_fs_info {
        struct extent_io_tree *pinned_extents;
        /* logical->physical extent mapping */
-        struct btrfs_mapping_tree mapping_tree;
+        struct extent_map_tree mapping_tree;
        /*
         * block reservation for extent, checksum, root tree and
@@ -1160,6 +1067,14 @@ struct btrfs_fs_info {
        spinlock_t swapfile_pins_lock;
        struct rb_root swapfile_pins;
+        struct crypto_shash *csum_shash;
+        /*
+         * Number of send operations in progress.
+         * Updated while holding fs_info::balance_mutex.
+         */
+        int send_in_progress;
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
        spinlock_t ref_verify_lock;
        struct rb_root block_tree;
@@ -2451,6 +2366,11 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
        return btrfs_csum_sizes[t];
 }
+static inline const char *btrfs_super_csum_name(u16 csum_type)
+{
+        /* csum type is validated at mount time */
+        return btrfs_csum_names[csum_type];
+}
 /*
 * The leaf data grows from end-to-front in the node.
@@ -2642,6 +2562,16 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
        ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \
        btrfs_item_offset_nr(leaf, slot)))
+static inline u32 btrfs_crc32c(u32 crc, const void *address, unsigned length)
+{
+        return crc32c(crc, address, length);
+}
+static inline void btrfs_crc32c_final(u32 crc, u8 *result)
+{
+        put_unaligned_le32(~crc, result);
+}
 static inline u64 btrfs_name_hash(const char *name, int len)
 {
       return crc32c((u32)~1, name, len);
@@ -2656,12 +2586,6 @@ static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
       return (u64) crc32c(parent_objectid, name, len);
 }
-static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
-{
-        return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
-                (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
-}
 static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
 {
        return mapping_gfp_constraint(mapping, ~__GFP_FS);
@@ -2698,8 +2622,6 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
        return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
 }
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
-bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
 void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
                                         const u64 start);
 void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
@@ -2814,17 +2736,28 @@ enum btrfs_flush_state {
        COMMIT_TRANS            =       9,
 };
-int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
+/*
-int btrfs_check_data_free_space(struct inode *inode,
+ * control flags for do_chunk_alloc's force field
-                        struct extent_changeset **reserved, u64 start, u64 len);
+ * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
-void btrfs_free_reserved_data_space(struct inode *inode,
+ * if we really need one.
-                        struct extent_changeset *reserved, u64 start, u64 len);
+ *
-void btrfs_delalloc_release_space(struct inode *inode,
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one
-                                  struct extent_changeset *reserved,
+ * if we have very few chunks already allocated.  This is
-                                  u64 start, u64 len, bool qgroup_free);
+ * used as part of the clustering code to help make sure
-void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
+ * we have a good pool of storage to cluster in, without
-                                            u64 len);
+ * filling the FS with empty chunks
-void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ */
+enum btrfs_chunk_alloc_enum {
+        CHUNK_ALLOC_NO_FORCE,
+        CHUNK_ALLOC_LIMITED,
+        CHUNK_ALLOC_FORCE,
+};
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+                      enum btrfs_chunk_alloc_enum force);
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     struct btrfs_block_rsv *rsv,
                                     int nitems, bool use_global_rsv);
@@ -2834,41 +2767,6 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
                                    bool qgroup_free);
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
-                                     bool qgroup_free);
-int btrfs_delalloc_reserve_space(struct inode *inode,
-                        struct extent_changeset **reserved, u64 start, u64 len);
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
-                                              unsigned short type);
-void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
-                                   struct btrfs_block_rsv *rsv,
-                                   unsigned short type);
-void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
-                          struct btrfs_block_rsv *rsv);
-int btrfs_block_rsv_add(struct btrfs_root *root,
-                        struct btrfs_block_rsv *block_rsv, u64 num_bytes,
-                        enum btrfs_reserve_flush_enum flush);
-int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
-int btrfs_block_rsv_refill(struct btrfs_root *root,
-                           struct btrfs_block_rsv *block_rsv, u64 min_reserved,
-                           enum btrfs_reserve_flush_enum flush);
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
-                            struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
-                            bool update_size);
-int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
-                             struct btrfs_block_rsv *dest, u64 num_bytes,
-                             int min_factor);
-void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
-                             struct btrfs_block_rsv *block_rsv,
-                             u64 num_bytes);
-void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
-void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
-int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
-                                  enum btrfs_reserve_flush_enum flush);
-void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
-                                       struct btrfs_block_rsv *src,
-                                       u64 num_bytes);
 int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
 void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
@@ -3186,7 +3084,8 @@ int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
 struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
                    struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+                                   u8 *dst);
 blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
                              u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
@@ -3514,8 +3413,7 @@ __cold
 static inline void assfail(const char *expr, const char *file, int line)
 {
        if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
-                pr_err("assertion failed: %s, file: %s, line: %d\n",
+                pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
-                       expr, file, line);
                BUG();
        }
 }
@@ -3599,10 +3497,11 @@ do {									\
 /* compatibility and incompatibility defines */
 #define btrfs_set_fs_incompat(__fs_info, opt) \
-        __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+        __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \
+                                #opt)
 static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
-                                           u64 flag)
+                                           u64 flag, const char* name)
 {
        struct btrfs_super_block *disk_super;
        u64 features;
@@ -3615,18 +3514,20 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
                if (!(features & flag)) {
                        features |= flag;
                        btrfs_set_super_incompat_flags(disk_super, features);
-                        btrfs_info(fs_info, "setting %llu feature flag",
+                        btrfs_info(fs_info,
-                                         flag);
+                                "setting incompat feature flag for %s (0x%llx)",
+                                name, flag);
                }
                spin_unlock(&fs_info->super_lock);
        }
 }
 #define btrfs_clear_fs_incompat(__fs_info, opt) \
-        __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+        __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \
+                                  #opt)
 static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
-                                             u64 flag)
+                                             u64 flag, const char* name)
 {
        struct btrfs_super_block *disk_super;
        u64 features;
@@ -3639,8 +3540,9 @@ static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
                if (features & flag) {
                        features &= ~flag;
                        btrfs_set_super_incompat_flags(disk_super, features);
-                        btrfs_info(fs_info, "clearing %llu feature flag",
+                        btrfs_info(fs_info,
-                                         flag);
+                                "clearing incompat feature flag for %s (0x%llx)",
+                                name, flag);
                }
                spin_unlock(&fs_info->super_lock);
        }
@@ -3657,10 +3559,11 @@ static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
 }
 #define btrfs_set_fs_compat_ro(__fs_info, opt) \
-        __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+        __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \
+                                 #opt)
 static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
-                                            u64 flag)
+                                            u64 flag, const char *name)
 {
        struct btrfs_super_block *disk_super;
        u64 features;
@@ -3673,18 +3576,20 @@ static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
                if (!(features & flag)) {
                        features |= flag;
                        btrfs_set_super_compat_ro_flags(disk_super, features);
-                        btrfs_info(fs_info, "setting %llu ro feature flag",
+                        btrfs_info(fs_info,
-                                   flag);
+                                "setting compat-ro feature flag for %s (0x%llx)",
+                                name, flag);
                }
                spin_unlock(&fs_info->super_lock);
        }
 }
 #define btrfs_clear_fs_compat_ro(__fs_info, opt) \
-        __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+        __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \
+                                   #opt)
 static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
-                                              u64 flag)
+                                              u64 flag, const char *name)
 {
        struct btrfs_super_block *disk_super;
        u64 features;
@@ -3697,8 +3602,9 @@ static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
                if (features & flag) {
                        features &= ~flag;
                        btrfs_set_super_compat_ro_flags(disk_super, features);
-                        btrfs_info(fs_info, "clearing %llu ro feature flag",
+                        btrfs_info(fs_info,
-                                   flag);
+                                "clearing compat-ro feature flag for %s (0x%llx)",
+                                name, flag);
                }
                spin_unlock(&fs_info->super_lock);
        }
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
new file mode 100644
index 000000000000..17f7c0d38768
--- /dev/null
+++ b/fs/btrfs/delalloc-space.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "ctree.h"
+#include "delalloc-space.h"
+#include "block-rsv.h"
+#include "btrfs_inode.h"
+#include "space-info.h"
+#include "transaction.h"
+#include "qgroup.h"
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
+{
+        struct btrfs_root *root = inode->root;
+        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
+        u64 used;
+        int ret = 0;
+        int need_commit = 2;
+        int have_pinned_space;
+        /* Make sure bytes are sectorsize aligned */
+        bytes = ALIGN(bytes, fs_info->sectorsize);
+        if (btrfs_is_free_space_inode(inode)) {
+                need_commit = 0;
+                ASSERT(current->journal_info);
+        }
+again:
+        /* Make sure we have enough space to handle the data first */
+        spin_lock(&data_sinfo->lock);
+        used = btrfs_space_info_used(data_sinfo, true);
+        if (used + bytes > data_sinfo->total_bytes) {
+                struct btrfs_trans_handle *trans;
+                /*
+                 * If we don't have enough free bytes in this space then we need
+                 * to alloc a new chunk.
+                 */
+                if (!data_sinfo->full) {
+                        u64 alloc_target;
+                        data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
+                        spin_unlock(&data_sinfo->lock);
+                        alloc_target = btrfs_data_alloc_profile(fs_info);
+                        /*
+                         * It is ugly that we don't call nolock join
+                         * transaction for the free space inode case here.
+                         * But it is safe because we only do the data space
+                         * reservation for the free space cache in the
+                         * transaction context, the common join transaction
+                         * just increase the counter of the current transaction
+                         * handler, doesn't try to acquire the trans_lock of
+                         * the fs.
+                         */
+                        trans = btrfs_join_transaction(root);
+                        if (IS_ERR(trans))
+                                return PTR_ERR(trans);
+                        ret = btrfs_chunk_alloc(trans, alloc_target,
+                                                CHUNK_ALLOC_NO_FORCE);
+                        btrfs_end_transaction(trans);
+                        if (ret < 0) {
+                                if (ret != -ENOSPC)
+                                        return ret;
+                                else {
+                                        have_pinned_space = 1;
+                                        goto commit_trans;
+                                }
+                        }
+                        goto again;
+                }
+                /*
+                 * If we don't have enough pinned space to deal with this
+                 * allocation, and no removed chunk in current transaction,
+                 * don't bother committing the transaction.
+                 */
+                have_pinned_space = __percpu_counter_compare(
+                        &data_sinfo->total_bytes_pinned,
+                        used + bytes - data_sinfo->total_bytes,
+                        BTRFS_TOTAL_BYTES_PINNED_BATCH);
+                spin_unlock(&data_sinfo->lock);
+                /* Commit the current transaction and try again */
+commit_trans:
+                if (need_commit) {
+                        need_commit--;
+                        if (need_commit > 0) {
+                                btrfs_start_delalloc_roots(fs_info, -1);
+                                btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
+                                                         (u64)-1);
+                        }
+                        trans = btrfs_join_transaction(root);
+                        if (IS_ERR(trans))
+                                return PTR_ERR(trans);
+                        if (have_pinned_space >= 0 ||
+                            test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
+                                     &trans->transaction->flags) ||
+                            need_commit > 0) {
+                                ret = btrfs_commit_transaction(trans);
+                                if (ret)
+                                        return ret;
+                                /*
+                                 * The cleaner kthread might still be doing iput
+                                 * operations. Wait for it to finish so that
+                                 * more space is released.  We don't need to
+                                 * explicitly run the delayed iputs here because
+                                 * the commit_transaction would have woken up
+                                 * the cleaner.
+                                 */
+                                ret = btrfs_wait_on_delayed_iputs(fs_info);
+                                if (ret)
+                                        return ret;
+                                goto again;
+                        } else {
+                                btrfs_end_transaction(trans);
+                        }
+                }
+                trace_btrfs_space_reservation(fs_info,
+                                              "space_info:enospc",
+                                              data_sinfo->flags, bytes, 1);
+                return -ENOSPC;
+        }
+        btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, bytes);
+        trace_btrfs_space_reservation(fs_info, "space_info",
+                                      data_sinfo->flags, bytes, 1);
+        spin_unlock(&data_sinfo->lock);
+        return 0;
+}
+int btrfs_check_data_free_space(struct inode *inode,
+                        struct extent_changeset **reserved, u64 start, u64 len)
+{
+        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+        int ret;
+        /* align the range */
+        len = round_up(start + len, fs_info->sectorsize) -
+              round_down(start, fs_info->sectorsize);
+        start = round_down(start, fs_info->sectorsize);
+        ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
+        if (ret < 0)
+                return ret;
+        /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
+        ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
+        if (ret < 0)
+                btrfs_free_reserved_data_space_noquota(inode, start, len);
+        else
+                ret = 0;
+        return ret;
+}
+/*
+ * Called if we need to clear a data reservation for this inode
+ * Normally in a error case.
+ *
+ * This one will *NOT* use accurate qgroup reserved space API, just for case
+ * which we can't sleep and is sure it won't affect qgroup reserved space.
+ * Like clear_bit_hook().
+ */
+void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
+                                            u64 len)
+{
+        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+        struct btrfs_space_info *data_sinfo;
+        /* Make sure the range is aligned to sectorsize */
+        len = round_up(start + len, fs_info->sectorsize) -
+              round_down(start, fs_info->sectorsize);
+        start = round_down(start, fs_info->sectorsize);
+        data_sinfo = fs_info->data_sinfo;
+        spin_lock(&data_sinfo->lock);
+        btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, -len);
+        trace_btrfs_space_reservation(fs_info, "space_info",
+                                      data_sinfo->flags, len, 0);
+        spin_unlock(&data_sinfo->lock);
+}
+/*
+ * Called if we need to clear a data reservation for this inode
+ * Normally in a error case.
+ *
+ * This one will handle the per-inode data rsv map for accurate reserved
+ * space framework.
+ */
+void btrfs_free_reserved_data_space(struct inode *inode,
+                        struct extent_changeset *reserved, u64 start, u64 len)
+{
+        struct btrfs_root *root = BTRFS_I(inode)->root;
+        /* Make sure the range is aligned to sectorsize */
+        len = round_up(start + len, root->fs_info->sectorsize) -
+              round_down(start, root->fs_info->sectorsize);
+        start = round_down(start, root->fs_info->sectorsize);
+        btrfs_free_reserved_data_space_noquota(inode, start, len);
+        btrfs_qgroup_free_data(inode, reserved, start, len);
+}
+/**
+ * btrfs_inode_rsv_release - release any excessive reservation.
+ * @inode - the inode we need to release from.
+ * @qgroup_free - free or convert qgroup meta.
+ *   Unlike normal operation, qgroup meta reservation needs to know if we are
+ *   freeing qgroup reservation or just converting it into per-trans.  Normally
+ *   @qgroup_free is true for error handling, and false for normal release.
+ *
+ * This is the same as btrfs_block_rsv_release, except that it handles the
+ * tracepoint for the reservation.
+ */
+static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
+{
+        struct btrfs_fs_info *fs_info = inode->root->fs_info;
+        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+        u64 released = 0;
+        u64 qgroup_to_release = 0;
+        /*
+         * Since we statically set the block_rsv->size we just want to say we
+         * are releasing 0 bytes, and then we'll just get the reservation over
+         * the size free'd.
+         */
+        released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
+                                             &qgroup_to_release);
+        if (released > 0)
+                trace_btrfs_space_reservation(fs_info, "delalloc",
+                                              btrfs_ino(inode), released, 0);
+        if (qgroup_free)
+                btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
+        else
+                btrfs_qgroup_convert_reserved_meta(inode->root,
+                                                   qgroup_to_release);
+}
+static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
+                                                 struct btrfs_inode *inode)
+{
+        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+        u64 reserve_size = 0;
+        u64 qgroup_rsv_size = 0;
+        u64 csum_leaves;
+        unsigned outstanding_extents;
+        lockdep_assert_held(&inode->lock);
+        outstanding_extents = inode->outstanding_extents;
+        if (outstanding_extents)
+                reserve_size = btrfs_calc_trans_metadata_size(fs_info,
+                                                outstanding_extents + 1);
+        csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
+                                                 inode->csum_bytes);
+        reserve_size += btrfs_calc_trans_metadata_size(fs_info,
+                                                       csum_leaves);
+        /*
+         * For qgroup rsv, the calculation is very simple:
+         * account one nodesize for each outstanding extent
+         *
+         * This is overestimating in most cases.
+         */
+        qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
+        spin_lock(&block_rsv->lock);
+        block_rsv->size = reserve_size;
+        block_rsv->qgroup_rsv_size = qgroup_rsv_size;
+        spin_unlock(&block_rsv->lock);
+}
+static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+                                    u64 num_bytes, u64 *meta_reserve,
+                                    u64 *qgroup_reserve)
+{
+        u64 nr_extents = count_max_extents(num_bytes);
+        u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+        /* We add one for the inode update at finish ordered time */
+        *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
+                                                nr_extents + csum_leaves + 1);
+        *qgroup_reserve = nr_extents * fs_info->nodesize;
+}
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
+{
+        struct btrfs_root *root = inode->root;
+        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
+        u64 meta_reserve, qgroup_reserve;
+        unsigned nr_extents;
+        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
+        int ret = 0;
+        bool delalloc_lock = true;
+        /*
+         * If we are a free space inode we need to not flush since we will be in
+         * the middle of a transaction commit.  We also don't need the delalloc
+         * mutex since we won't race with anybody.  We need this mostly to make
+         * lockdep shut its filthy mouth.
+         *
+         * If we have a transaction open (can happen if we call truncate_block
+         * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
+         */
+        if (btrfs_is_free_space_inode(inode)) {
+                flush = BTRFS_RESERVE_NO_FLUSH;
+                delalloc_lock = false;
+        } else {
+                if (current->journal_info)
+                        flush = BTRFS_RESERVE_FLUSH_LIMIT;
+                if (btrfs_transaction_in_commit(fs_info))
+                        schedule_timeout(1);
+        }
+        if (delalloc_lock)
+                mutex_lock(&inode->delalloc_mutex);
+        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
+        /*
+         * We always want to do it this way, every other way is wrong and ends
+         * in tears.  Pre-reserving the amount we are going to add will always
+         * be the right way, because otherwise if we have enough parallelism we
+         * could end up with thousands of inodes all holding little bits of
+         * reservations they were able to make previously and the only way to
+         * reclaim that space is to ENOSPC out the operations and clear
+         * everything out and try again, which is bad.  This way we just
+         * over-reserve slightly, and clean up the mess when we are done.
+         */
+        calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
+                                &qgroup_reserve);
+        ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
+        if (ret)
+                goto out_fail;
+        ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
+        if (ret)
+                goto out_qgroup;
+        /*
+         * Now we need to update our outstanding extents and csum bytes _first_
+         * and then add the reservation to the block_rsv.  This keeps us from
+         * racing with an ordered completion or some such that would think it
+         * needs to free the reservation we just made.
+         */
+        spin_lock(&inode->lock);
+        nr_extents = count_max_extents(num_bytes);
+        btrfs_mod_outstanding_extents(inode, nr_extents);
+        inode->csum_bytes += num_bytes;
+        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+        spin_unlock(&inode->lock);
+        /* Now we can safely add our space to our block rsv */
+        btrfs_block_rsv_add_bytes(block_rsv, meta_reserve, false);
+        trace_btrfs_space_reservation(root->fs_info, "delalloc",
+                                      btrfs_ino(inode), meta_reserve, 1);
+        spin_lock(&block_rsv->lock);
+        block_rsv->qgroup_rsv_reserved += qgroup_reserve;
+        spin_unlock(&block_rsv->lock);
+        if (delalloc_lock)
+                mutex_unlock(&inode->delalloc_mutex);
+        return 0;
+out_qgroup:
+        btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
+out_fail:
+        btrfs_inode_rsv_release(inode, true);
+        if (delalloc_lock)
+                mutex_unlock(&inode->delalloc_mutex);
+        return ret;
+}
+/**
+ * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
+ * @inode: the inode to release the reservation for.
+ * @num_bytes: the number of bytes we are releasing.
+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
+ *
+ * This will release the metadata reservation for an inode.  This can be called
+ * once we complete IO for a given set of bytes to release their metadata
+ * reservations, or on error for the same reason.
+ */
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+                                     bool qgroup_free)
+{
+        struct btrfs_fs_info *fs_info = inode->root->fs_info;
+        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
+        spin_lock(&inode->lock);
+        inode->csum_bytes -= num_bytes;
+        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+        spin_unlock(&inode->lock);
+        if (btrfs_is_testing(fs_info))
+                return;
+        btrfs_inode_rsv_release(inode, qgroup_free);
+}
+/**
+ * btrfs_delalloc_release_extents - release our outstanding_extents
+ * @inode: the inode to balance the reservation for.
+ * @num_bytes: the number of bytes we originally reserved with
+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
+ *
+ * When we reserve space we increase outstanding_extents for the extents we may
+ * add.  Once we've set the range as delalloc or created our ordered extents we
+ * have outstanding_extents to track the real usage, so we use this to free our
+ * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
+ * with btrfs_delalloc_reserve_metadata.
+ */
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+                                    bool qgroup_free)
+{
+        struct btrfs_fs_info *fs_info = inode->root->fs_info;
+        unsigned num_extents;
+        spin_lock(&inode->lock);
+        num_extents = count_max_extents(num_bytes);
+        btrfs_mod_outstanding_extents(inode, -num_extents);
+        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+        spin_unlock(&inode->lock);
+        if (btrfs_is_testing(fs_info))
+                return;
+        btrfs_inode_rsv_release(inode, qgroup_free);
+}
+/**
+ * btrfs_delalloc_reserve_space - reserve data and metadata space for
+ * delalloc
+ * @inode: inode we're writing to
+ * @start: start range we are writing to
+ * @len: how long the range we are writing to
+ * @reserved: mandatory parameter, record actually reserved qgroup ranges of
+ *            current reservation.
+ *
+ * This will do the following things
+ *
+ * - reserve space in data space info for num bytes
+ *   and reserve precious corresponding qgroup space
+ *   (Done in check_data_free_space)
+ *
+ * - reserve space for metadata space, based on the number of outstanding
+ *   extents and how much csums will be needed
+ *   also reserve metadata space in a per root over-reserve method.
+ * - add to the inodes->delalloc_bytes
+ * - add it to the fs_info's delalloc inodes list.
+ *   (Above 3 all done in delalloc_reserve_metadata)
+ *
+ * Return 0 for success
+ * Return <0 for error(-ENOSPC or -EQUOT)
+ */
+int btrfs_delalloc_reserve_space(struct inode *inode,
+                        struct extent_changeset **reserved, u64 start, u64 len)
+{
+        int ret;
+        ret = btrfs_check_data_free_space(inode, reserved, start, len);
+        if (ret < 0)
+                return ret;
+        ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
+        if (ret < 0)
+                btrfs_free_reserved_data_space(inode, *reserved, start, len);
+        return ret;
+}
+/**
+ * btrfs_delalloc_release_space - release data and metadata space for delalloc
+ * @inode: inode we're releasing space for
+ * @start: start position of the space already reserved
+ * @len: the len of the space already reserved
+ * @release_bytes: the len of the space we consumed or didn't use
+ *
+ * This function will release the metadata space that was not used and will
+ * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
+ * list if there are no delalloc bytes left.
+ * Also it will handle the qgroup reserved space.
+ */
+void btrfs_delalloc_release_space(struct inode *inode,
+                                  struct extent_changeset *reserved,
+                                  u64 start, u64 len, bool qgroup_free)
+{
+        btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
+        btrfs_free_reserved_data_space(inode, reserved, start, len);
+}
diff --git a/fs/btrfs/delalloc-space.h b/fs/btrfs/delalloc-space.h
new file mode 100644
index 000000000000..54466fbd7075
--- /dev/null
+++ b/fs/btrfs/delalloc-space.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BTRFS_DELALLOC_SPACE_H
+#define BTRFS_DELALLOC_SPACE_H
+struct extent_changeset;
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
+int btrfs_check_data_free_space(struct inode *inode,
+                        struct extent_changeset **reserved, u64 start, u64 len);
+void btrfs_free_reserved_data_space(struct inode *inode,
+                        struct extent_changeset *reserved, u64 start, u64 len);
+void btrfs_delalloc_release_space(struct inode *inode,
+                                  struct extent_changeset *reserved,
+                                  u64 start, u64 len, bool qgroup_free);
+void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
+                                            u64 len);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+                                     bool qgroup_free);
+int btrfs_delalloc_reserve_space(struct inode *inode,
+                        struct extent_changeset **reserved, u64 start, u64 len);
+#endif /* BTRFS_DELALLOC_SPACE_H */
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index a73fc23e2961..9a91d1eb0af4 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -10,6 +10,7 @@
 #include "delayed-ref.h"
 #include "transaction.h"
 #include "qgroup.h"
+#include "space-info.h"
 struct kmem_cache *btrfs_delayed_ref_head_cachep;
 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
@@ -24,6 +25,179 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
 * of hammering updates on the extent allocation tree.
 */
+bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
+{
+        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+        bool ret = false;
+        u64 reserved;
+        spin_lock(&global_rsv->lock);
+        reserved = global_rsv->reserved;
+        spin_unlock(&global_rsv->lock);
+        /*
+         * Since the global reserve is just kind of magic we don't really want
+         * to rely on it to save our bacon, so if our size is more than the
+         * delayed_refs_rsv and the global rsv then it's time to think about
+         * bailing.
+         */
+        spin_lock(&delayed_refs_rsv->lock);
+        reserved += delayed_refs_rsv->reserved;
+        if (delayed_refs_rsv->size >= reserved)
+                ret = true;
+        spin_unlock(&delayed_refs_rsv->lock);
+        return ret;
+}
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
+{
+        u64 num_entries =
+                atomic_read(&trans->transaction->delayed_refs.num_entries);
+        u64 avg_runtime;
+        u64 val;
+        smp_mb();
+        avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
+        val = num_entries * avg_runtime;
+        if (val >= NSEC_PER_SEC)
+                return 1;
+        if (val >= NSEC_PER_SEC / 2)
+                return 2;
+        return btrfs_check_space_for_delayed_refs(trans->fs_info);
+}
+/**
+ * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
+ * @fs_info - the fs_info for our fs.
+ * @nr - the number of items to drop.
+ *
+ * This drops the delayed ref head's count from the delayed refs rsv and frees
+ * any excess reservation we had.
+ */
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
+{
+        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
+        u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
+        u64 released = 0;
+        released = __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes,
+                                             NULL);
+        if (released)
+                trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+                                              0, released, 0);
+}
+/*
+ * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
+ * @trans - the trans that may have generated delayed refs
+ *
+ * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
+ * it'll calculate the additional size and add it to the delayed_refs_rsv.
+ */
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
+{
+        struct btrfs_fs_info *fs_info = trans->fs_info;
+        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+        u64 num_bytes;
+        if (!trans->delayed_ref_updates)
+                return;
+        num_bytes = btrfs_calc_trans_metadata_size(fs_info,
+                                                   trans->delayed_ref_updates);
+        spin_lock(&delayed_rsv->lock);
+        delayed_rsv->size += num_bytes;
+        delayed_rsv->full = 0;
+        spin_unlock(&delayed_rsv->lock);
+        trans->delayed_ref_updates = 0;
+}
+/**
+ * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
+ * @fs_info - the fs info for our fs.
+ * @src - the source block rsv to transfer from.
+ * @num_bytes - the number of bytes to transfer.
+ *
+ * This transfers up to the num_bytes amount from the src rsv to the
+ * delayed_refs_rsv.  Any extra bytes are returned to the space info.
+ */
+void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+                                       struct btrfs_block_rsv *src,
+                                       u64 num_bytes)
+{
+        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+        u64 to_free = 0;
+        spin_lock(&src->lock);
+        src->reserved -= num_bytes;
+        src->size -= num_bytes;
+        spin_unlock(&src->lock);
+        spin_lock(&delayed_refs_rsv->lock);
+        if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
+                u64 delta = delayed_refs_rsv->size -
+                        delayed_refs_rsv->reserved;
+                if (num_bytes > delta) {
+                        to_free = num_bytes - delta;
+                        num_bytes = delta;
+                }
+        } else {
+                to_free = num_bytes;
+                num_bytes = 0;
+        }
+        if (num_bytes)
+                delayed_refs_rsv->reserved += num_bytes;
+        if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
+                delayed_refs_rsv->full = 1;
+        spin_unlock(&delayed_refs_rsv->lock);
+        if (num_bytes)
+                trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+                                              0, num_bytes, 1);
+        if (to_free)
+                btrfs_space_info_add_old_bytes(fs_info,
+                                delayed_refs_rsv->space_info, to_free);
+}
+/**
+ * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
+ * @fs_info - the fs_info for our fs.
+ * @flush - control how we can flush for this reservation.
+ *
+ * This will refill the delayed block_rsv up to 1 items size worth of space and
+ * will return -ENOSPC if we can't make the reservation.
+ */
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+                                  enum btrfs_reserve_flush_enum flush)
+{
+        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
+        u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
+        u64 num_bytes = 0;
+        int ret = -ENOSPC;
+        spin_lock(&block_rsv->lock);
+        if (block_rsv->reserved < block_rsv->size) {
+                num_bytes = block_rsv->size - block_rsv->reserved;
+                num_bytes = min(num_bytes, limit);
+        }
+        spin_unlock(&block_rsv->lock);
+        if (!num_bytes)
+                return 0;
+        ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv,
+                                           num_bytes, flush);
+        if (ret)
+                return ret;
+        btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0);
+        trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
+                                      0, num_bytes, 1);
+        return 0;
+}
 /*
 * compare two delayed tree backrefs with same bytenr and type
 */
@@ -957,13 +1131,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
 }
 /*
- * this does a simple search for the head node for a given extent.
+ * This does a simple search for the head node for a given extent.  Returns the
- * It must be called with the delayed ref spinlock held, and it returns
+ * head node if found, or NULL if not.
- * the head node if any where found, or NULL if not.
 */
 struct btrfs_delayed_ref_head *
 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
 {
+        lockdep_assert_held(&delayed_refs->lock);
        return find_ref_head(delayed_refs, bytenr, false);
 }
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c18f93ea88ed..1c977e6d45dc 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -364,6 +364,16 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
+void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
+void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
+int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
+                                  enum btrfs_reserve_flush_enum flush);
+void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
+                                       struct btrfs_block_rsv *src,
+                                       u64 num_bytes);
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
+bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
 /*
 * helper functions to cast a node into its container
 */
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index ee0989c7e3a9..6b2e9aa83ffa 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -201,7 +201,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
                return PTR_ERR(bdev);
        }
-        filemap_write_and_wait(bdev->bd_inode->i_mapping);
+        sync_blockdev(bdev);
        devices = &fs_info->fs_devices->devices;
        list_for_each_entry(device, devices, dev_list) {
@@ -237,7 +237,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
        }
        rcu_assign_pointer(device->name, name);
-        mutex_lock(&fs_info->fs_devices->device_list_mutex);
        set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
        device->generation = 0;
        device->io_width = fs_info->sectorsize;
@@ -256,6 +255,8 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
        device->dev_stats_valid = 1;
        set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
        device->fs_devices = fs_info->fs_devices;
+        mutex_lock(&fs_info->fs_devices->device_list_mutex);
        list_add(&device->dev_list, &fs_info->fs_devices->devices);
        fs_info->fs_devices->num_devices++;
        fs_info->fs_devices->open_devices++;
@@ -399,7 +400,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
        int ret;
        struct btrfs_device *tgt_device = NULL;
        struct btrfs_device *src_device = NULL;
-        bool need_unlock;
        src_device = btrfs_find_device_by_devspec(fs_info, srcdevid,
                                                  srcdev_name);
@@ -413,11 +413,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
                return -ETXTBSY;
        }
-        ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
-                                            src_device, &tgt_device);
-        if (ret)
-                return ret;
        /*
         * Here we commit the transaction to make sure commit_total_bytes
         * of all the devices are updated.
@@ -431,7 +426,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
                return PTR_ERR(trans);
        }
-        need_unlock = true;
+        ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
+                                            src_device, &tgt_device);
+        if (ret)
+                return ret;
        down_write(&dev_replace->rwsem);
        switch (dev_replace->replace_state) {
        case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
@@ -442,11 +441,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
        case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
                ASSERT(0);
                ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
+                up_write(&dev_replace->rwsem);
                goto leave;
        }
        dev_replace->cont_reading_from_srcdev_mode = read_src;
-        WARN_ON(!src_device);
        dev_replace->srcdev = src_device;
        dev_replace->tgtdev = tgt_device;
@@ -471,7 +470,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
        atomic64_set(&dev_replace->num_write_errors, 0);
        atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
        up_write(&dev_replace->rwsem);
-        need_unlock = false;
        ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
        if (ret)
@@ -479,16 +477,16 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
        btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
-        /* force writing the updated state information to disk */
+        /* Commit dev_replace state and reserve 1 item for it. */
-        trans = btrfs_start_transaction(root, 0);
+        trans = btrfs_start_transaction(root, 1);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-                need_unlock = true;
                down_write(&dev_replace->rwsem);
                dev_replace->replace_state =
                        BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED;
                dev_replace->srcdev = NULL;
                dev_replace->tgtdev = NULL;
+                up_write(&dev_replace->rwsem);
                goto leave;
        }
@@ -510,8 +508,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
        return ret;
 leave:
-        if (need_unlock)
-                up_write(&dev_replace->rwsem);
        btrfs_destroy_dev_replace_tgtdev(tgt_device);
        return ret;
 }
@@ -678,7 +674,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
        btrfs_device_set_disk_total_bytes(tgt_device,
                                          src_device->disk_total_bytes);
        btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
-        tgt_device->commit_total_bytes = src_device->commit_total_bytes;
        tgt_device->commit_bytes_used = src_device->bytes_used;
        btrfs_assign_next_active_device(src_device, tgt_device);
@@ -728,7 +723,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
                                                struct btrfs_device *srcdev,
                                                struct btrfs_device *tgtdev)
 {
-        struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+        struct extent_map_tree *em_tree = &fs_info->mapping_tree;
        struct extent_map *em;
        struct map_lookup *map;
        u64 start = 0;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index deb74a8c191a..41a2bd2e0c56 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -19,6 +19,7 @@
 #include <linux/crc32c.h>
 #include <linux/sched/mm.h>
 #include <asm/unaligned.h>
+#include <crypto/hash.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -40,10 +41,6 @@
 #include "tree-checker.h"
 #include "ref-verify.h"
-#ifdef CONFIG_X86
-#include <asm/cpufeature.h>
-#endif
 #define BTRFS_SUPER_FLAG_SUPP   (BTRFS_HEADER_FLAG_WRITTEN |\
                                 BTRFS_HEADER_FLAG_RELOC |\
                                 BTRFS_SUPER_FLAG_ERROR |\
@@ -249,16 +246,6 @@ out:
        return em;
 }
-u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
-{
-        return crc32c(seed, data, len);
-}
-void btrfs_csum_final(u32 crc, u8 *result)
-{
-        put_unaligned_le32(~crc, result);
-}
 /*
 * Compute the csum of a btree block and store the result to provided buffer.
 *
@@ -266,6 +253,8 @@ void btrfs_csum_final(u32 crc, u8 *result)
 */
 static int csum_tree_block(struct extent_buffer *buf, u8 *result)
 {
+        struct btrfs_fs_info *fs_info = buf->fs_info;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        unsigned long len;
        unsigned long cur_len;
        unsigned long offset = BTRFS_CSUM_SIZE;
@@ -273,9 +262,12 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result)
        unsigned long map_start;
        unsigned long map_len;
        int err;
-        u32 crc = ~(u32)0;
+        shash->tfm = fs_info->csum_shash;
+        crypto_shash_init(shash);
        len = buf->len - offset;
        while (len > 0) {
                /*
                 * Note: we don't need to check for the err == 1 case here, as
@@ -288,14 +280,13 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result)
                if (WARN_ON(err))
                        return err;
                cur_len = min(len, map_len - (offset - map_start));
-                crc = btrfs_csum_data(kaddr + offset - map_start,
+                crypto_shash_update(shash, kaddr + offset - map_start, cur_len);
-                                      crc, cur_len);
                len -= cur_len;
                offset += cur_len;
        }
        memset(result, 0, BTRFS_CSUM_SIZE);
-        btrfs_csum_final(crc, result);
+        crypto_shash_final(shash, result);
        return 0;
 }
@@ -356,6 +347,16 @@ out:
        return ret;
 }
+static bool btrfs_supported_super_csum(u16 csum_type)
+{
+        switch (csum_type) {
+        case BTRFS_CSUM_TYPE_CRC32:
+                return true;
+        default:
+                return false;
+        }
+}
 /*
 * Return 0 if the superblock checksum type matches the checksum value of that
 * algorithm. Pass the raw disk superblock data.
@@ -365,33 +366,25 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_super_block *disk_sb =
                (struct btrfs_super_block *)raw_disk_sb;
-        u16 csum_type = btrfs_super_csum_type(disk_sb);
+        char result[BTRFS_CSUM_SIZE];
-        int ret = 0;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
-        if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
+        shash->tfm = fs_info->csum_shash;
-                u32 crc = ~(u32)0;
+        crypto_shash_init(shash);
-                char result[sizeof(crc)];
-                /*
+        /*
-                 * The super_block structure does not span the whole
+         * The super_block structure does not span the whole
-                 * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
+         * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
-                 * is filled with zeros and is included in the checksum.
+         * filled with zeros and is included in the checksum.
-                 */
+         */
-                crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
+        crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
-                                crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+                            BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
-                btrfs_csum_final(crc, result);
+        crypto_shash_final(shash, result);
-                if (memcmp(raw_disk_sb, result, sizeof(result)))
+        if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb)))
-                        ret = 1;
+                return 1;
-        }
-        if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
+        return 0;
-                btrfs_err(fs_info, "unsupported checksum algorithm %u",
-                                csum_type);
-                ret = 1;
-        }
-        return ret;
 }
 int btrfs_verify_level_key(struct extent_buffer *eb, int level,
@@ -873,14 +866,13 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
        return btree_csum_one_bio(bio);
 }
-static int check_async_write(struct btrfs_inode *bi)
+static int check_async_write(struct btrfs_fs_info *fs_info,
+                             struct btrfs_inode *bi)
 {
        if (atomic_read(&bi->sync_writers))
                return 0;
-#ifdef CONFIG_X86
+        if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
-        if (static_cpu_has(X86_FEATURE_XMM4_2))
                return 0;
-#endif
        return 1;
 }
@@ -889,7 +881,7 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
                                          unsigned long bio_flags)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-        int async = check_async_write(BTRFS_I(inode));
+        int async = check_async_write(fs_info, BTRFS_I(inode));
        blk_status_t ret;
        if (bio_op(bio) != REQ_OP_WRITE) {
@@ -2262,6 +2254,29 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
        return 0;
 }
+static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
+{
+        struct crypto_shash *csum_shash;
+        const char *csum_name = btrfs_super_csum_name(csum_type);
+        csum_shash = crypto_alloc_shash(csum_name, 0, 0);
+        if (IS_ERR(csum_shash)) {
+                btrfs_err(fs_info, "error allocating %s hash for checksum",
+                          csum_name);
+                return PTR_ERR(csum_shash);
+        }
+        fs_info->csum_shash = csum_shash;
+        return 0;
+}
+static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
+{
+        crypto_free_shash(fs_info->csum_shash);
+}
 static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
                            struct btrfs_fs_devices *fs_devices)
 {
@@ -2577,7 +2592,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
        ret = validate_super(fs_info, sb, -1);
        if (ret < 0)
                goto out;
-        if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) {
+        if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
                ret = -EUCLEAN;
                btrfs_err(fs_info, "invalid csum type, has %u want %u",
                          btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
@@ -2607,6 +2622,7 @@ int open_ctree(struct super_block *sb,
        u32 stripesize;
        u64 generation;
        u64 features;
+        u16 csum_type;
        struct btrfs_key location;
        struct buffer_head *bh;
        struct btrfs_super_block *disk_super;
@@ -2689,7 +2705,7 @@ int open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->space_info);
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
        INIT_LIST_HEAD(&fs_info->unused_bgs);
-        btrfs_mapping_init(&fs_info->mapping_tree);
+        extent_map_tree_init(&fs_info->mapping_tree);
        btrfs_init_block_rsv(&fs_info->global_block_rsv,
                             BTRFS_BLOCK_RSV_GLOBAL);
        btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
@@ -2793,6 +2809,8 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->swapfile_pins_lock);
        fs_info->swapfile_pins = RB_ROOT;
+        fs_info->send_in_progress = 0;
        ret = btrfs_alloc_stripe_hash_table(fs_info);
        if (ret) {
                err = ret;
@@ -2813,6 +2831,25 @@ int open_ctree(struct super_block *sb,
        }
        /*
+         * Verify the type first, if that or the the checksum value are
+         * corrupted, we'll find out
+         */
+        csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
+        if (!btrfs_supported_super_csum(csum_type)) {
+                btrfs_err(fs_info, "unsupported checksum algorithm: %u",
+                          csum_type);
+                err = -EINVAL;
+                brelse(bh);
+                goto fail_alloc;
+        }
+        ret = btrfs_init_csum_hash(fs_info, csum_type);
+        if (ret) {
+                err = ret;
+                goto fail_alloc;
+        }
+        /*
         * We want to check superblock checksum, the type is stored inside.
         * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
         */
@@ -2820,7 +2857,7 @@ int open_ctree(struct super_block *sb,
                btrfs_err(fs_info, "superblock checksum mismatch");
                err = -EINVAL;
                brelse(bh);
-                goto fail_alloc;
+                goto fail_csum;
        }
        /*
@@ -2857,11 +2894,11 @@ int open_ctree(struct super_block *sb,
        if (ret) {
                btrfs_err(fs_info, "superblock contains fatal errors");
                err = -EINVAL;
-                goto fail_alloc;
+                goto fail_csum;
        }
        if (!btrfs_super_root(disk_super))
-                goto fail_alloc;
+                goto fail_csum;
        /* check FS state, whether FS is broken. */
        if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
@@ -2883,7 +2920,7 @@ int open_ctree(struct super_block *sb,
        ret = btrfs_parse_options(fs_info, options, sb->s_flags);
        if (ret) {
                err = ret;
-                goto fail_alloc;
+                goto fail_csum;
        }
        features = btrfs_super_incompat_flags(disk_super) &
@@ -2893,7 +2930,7 @@ int open_ctree(struct super_block *sb,
                    "cannot mount because of unsupported optional features (%llx)",
                    features);
                err = -EINVAL;
-                goto fail_alloc;
+                goto fail_csum;
        }
        features = btrfs_super_incompat_flags(disk_super);
@@ -2937,7 +2974,7 @@ int open_ctree(struct super_block *sb,
                btrfs_err(fs_info,
 "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
                        nodesize, sectorsize);
-                goto fail_alloc;
+                goto fail_csum;
        }
        /*
@@ -2953,7 +2990,7 @@ int open_ctree(struct super_block *sb,
        "cannot mount read-write because of unsupported optional features (%llx)",
                       features);
                err = -EINVAL;
-                goto fail_alloc;
+                goto fail_csum;
        }
        ret = btrfs_init_workqueues(fs_info, fs_devices);
@@ -3331,6 +3368,8 @@ fail_tree_roots:
 fail_sb_buffer:
        btrfs_stop_all_workers(fs_info);
        btrfs_free_block_groups(fs_info);
+fail_csum:
+        btrfs_free_csum_hash(fs_info);
 fail_alloc:
 fail_iput:
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3472,17 +3511,20 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
 static int write_dev_supers(struct btrfs_device *device,
                            struct btrfs_super_block *sb, int max_mirrors)
 {
+        struct btrfs_fs_info *fs_info = device->fs_info;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        struct buffer_head *bh;
        int i;
        int ret;
        int errors = 0;
-        u32 crc;
        u64 bytenr;
        int op_flags;
        if (max_mirrors == 0)
                max_mirrors = BTRFS_SUPER_MIRROR_MAX;
+        shash->tfm = fs_info->csum_shash;
        for (i = 0; i < max_mirrors; i++) {
                bytenr = btrfs_sb_offset(i);
                if (bytenr + BTRFS_SUPER_INFO_SIZE >=
@@ -3491,10 +3533,10 @@ static int write_dev_supers(struct btrfs_device *device,
                btrfs_set_super_bytenr(sb, bytenr);
-                crc = ~(u32)0;
+                crypto_shash_init(shash);
-                crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc,
+                crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE,
-                                      BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+                                    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
-                btrfs_csum_final(crc, sb->csum);
+                crypto_shash_final(shash, sb->csum);
                /* One reference for us, and we leave it for the caller */
                bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
@@ -3709,7 +3751,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
        if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
            (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
-                min_tolerated = min(min_tolerated,
+                min_tolerated = min_t(int, min_tolerated,
                                    btrfs_raid_array[BTRFS_RAID_SINGLE].
                                    tolerated_failures);
@@ -3718,7 +3760,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
                        continue;
                if (!(flags & btrfs_raid_array[raid_type].bg_flag))
                        continue;
-                min_tolerated = min(min_tolerated,
+                min_tolerated = min_t(int, min_tolerated,
                                    btrfs_raid_array[raid_type].
                                    tolerated_failures);
        }
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a0161aa1ea0b..e80f7c45a307 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -115,8 +115,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
                          int atomic);
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
                      struct btrfs_key *first_key);
-u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
-void btrfs_csum_final(u32 crc, u8 *result);
 blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        enum btrfs_wq_endio_type metadata);
 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5faf057f6f37..d3b58e388535 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -28,46 +28,12 @@
 #include "sysfs.h"
 #include "qgroup.h"
 #include "ref-verify.h"
+#include "space-info.h"
+#include "block-rsv.h"
+#include "delalloc-space.h"
 #undef SCRAMBLE_DELAYED_REFS
-/*
- * control flags for do_chunk_alloc's force field
- * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
- * if we really need one.
- *
- * CHUNK_ALLOC_LIMITED means to only try and allocate one
- * if we have very few chunks already allocated.  This is
- * used as part of the clustering code to help make sure
- * we have a good pool of storage to cluster in, without
- * filling the FS with empty chunks
- *
- * CHUNK_ALLOC_FORCE means it must try to allocate one
- *
- */
-enum {
-        CHUNK_ALLOC_NO_FORCE = 0,
-        CHUNK_ALLOC_LIMITED = 1,
-        CHUNK_ALLOC_FORCE = 2,
-};
-/*
- * Declare a helper function to detect underflow of various space info members
- */
-#define DECLARE_SPACE_INFO_UPDATE(name)                                 \
-static inline void update_##name(struct btrfs_space_info *sinfo,        \
-                                 s64 bytes)                             \
-{                                                                       \
-        if (bytes < 0 && sinfo->name < -bytes) {                        \
-                WARN_ON(1);                                             \
-                sinfo->name = 0;                                        \
-                return;                                                 \
-        }                                                               \
-        sinfo->name += bytes;                                           \
-}
-DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
-DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                               struct btrfs_delayed_ref_node *node, u64 parent,
@@ -84,21 +50,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
                                     struct btrfs_delayed_ref_node *node,
                                     struct btrfs_delayed_extent_op *extent_op);
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
-                          int force);
 static int find_next_key(struct btrfs_path *path, int level,
                         struct btrfs_key *key);
-static void dump_space_info(struct btrfs_fs_info *fs_info,
-                            struct btrfs_space_info *info, u64 bytes,
-                            int dump_block_groups);
-static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
-                               u64 num_bytes);
-static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_space_info *space_info,
-                                     u64 num_bytes);
-static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_space_info *space_info,
-                                     u64 num_bytes);
 static noinline int
 block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -737,62 +690,39 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
        return block_group_cache_tree_search(info, bytenr, 1);
 }
-static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
+static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
-                                                  u64 flags)
 {
-        struct list_head *head = &info->space_info;
+        if (ref->type == BTRFS_REF_METADATA) {
-        struct btrfs_space_info *found;
+                if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
+                        return BTRFS_BLOCK_GROUP_SYSTEM;
-        flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
+                else
+                        return BTRFS_BLOCK_GROUP_METADATA;
-        rcu_read_lock();
-        list_for_each_entry_rcu(found, head, list) {
-                if (found->flags & flags) {
-                        rcu_read_unlock();
-                        return found;
-                }
        }
-        rcu_read_unlock();
+        return BTRFS_BLOCK_GROUP_DATA;
-        return NULL;
 }
 static void add_pinned_bytes(struct btrfs_fs_info *fs_info,
-                             struct btrfs_ref *ref, int sign)
+                             struct btrfs_ref *ref)
 {
        struct btrfs_space_info *space_info;
-        s64 num_bytes;
+        u64 flags = generic_ref_to_space_flags(ref);
-        u64 flags;
-        ASSERT(sign == 1 || sign == -1);
-        num_bytes = sign * ref->len;
-        if (ref->type == BTRFS_REF_METADATA) {
-                if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID)
-                        flags = BTRFS_BLOCK_GROUP_SYSTEM;
-                else
-                        flags = BTRFS_BLOCK_GROUP_METADATA;
-        } else {
-                flags = BTRFS_BLOCK_GROUP_DATA;
-        }
-        space_info = __find_space_info(fs_info, flags);
+        space_info = btrfs_find_space_info(fs_info, flags);
        ASSERT(space_info);
-        percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes,
+        percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len,
                    BTRFS_TOTAL_BYTES_PINNED_BATCH);
 }
-/*
+static void sub_pinned_bytes(struct btrfs_fs_info *fs_info,
- * after adding space to the filesystem, we need to clear the full flags
+                             struct btrfs_ref *ref)
- * on all the space infos.
- */
-void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
 {
-        struct list_head *head = &info->space_info;
+        struct btrfs_space_info *space_info;
-        struct btrfs_space_info *found;
+        u64 flags = generic_ref_to_space_flags(ref);
-        rcu_read_lock();
+        space_info = btrfs_find_space_info(fs_info, flags);
-        list_for_each_entry_rcu(found, head, list)
+        ASSERT(space_info);
-                found->full = 0;
+        percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len,
-        rcu_read_unlock();
+                    BTRFS_TOTAL_BYTES_PINNED_BATCH);
 }
 /* simple helper to search for an existing data extent at a given offset */
@@ -1121,11 +1051,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
        __le64 lenum;
        lenum = cpu_to_le64(root_objectid);
-        high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
+        high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
        lenum = cpu_to_le64(owner);
-        low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+        low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
        lenum = cpu_to_le64(offset);
-        low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
+        low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
        return ((u64)high_crc << 31) ^ (u64)low_crc;
 }
@@ -2065,7 +1995,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_ref_tree_mod(fs_info, generic_ref);
        if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
-                add_pinned_bytes(fs_info, generic_ref, -1);
+                sub_pinned_bytes(fs_info, generic_ref);
        return ret;
 }
@@ -2462,7 +2392,7 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
                        flags = BTRFS_BLOCK_GROUP_SYSTEM;
                else
                        flags = BTRFS_BLOCK_GROUP_METADATA;
-                space_info = __find_space_info(fs_info, flags);
+                space_info = btrfs_find_space_info(fs_info, flags);
                ASSERT(space_info);
                percpu_counter_add_batch(&space_info->total_bytes_pinned,
                                   -head->num_bytes,
@@ -2824,49 +2754,6 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
        return num_csums;
 }
-bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
-{
-        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        bool ret = false;
-        u64 reserved;
-        spin_lock(&global_rsv->lock);
-        reserved = global_rsv->reserved;
-        spin_unlock(&global_rsv->lock);
-        /*
-         * Since the global reserve is just kind of magic we don't really want
-         * to rely on it to save our bacon, so if our size is more than the
-         * delayed_refs_rsv and the global rsv then it's time to think about
-         * bailing.
-         */
-        spin_lock(&delayed_refs_rsv->lock);
-        reserved += delayed_refs_rsv->reserved;
-        if (delayed_refs_rsv->size >= reserved)
-                ret = true;
-        spin_unlock(&delayed_refs_rsv->lock);
-        return ret;
-}
-int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
-{
-        u64 num_entries =
-                atomic_read(&trans->transaction->delayed_refs.num_entries);
-        u64 avg_runtime;
-        u64 val;
-        smp_mb();
-        avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
-        val = num_entries * avg_runtime;
-        if (val >= NSEC_PER_SEC)
-                return 1;
-        if (val >= NSEC_PER_SEC / 2)
-                return 2;
-        return btrfs_check_space_for_delayed_refs(trans->fs_info);
-}
 /*
 * this starts processing the delayed reference count updates and
 * extent insertions we have queued up so far.  count can be
@@ -3834,93 +3721,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
        wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
 }
-static const char *alloc_name(u64 flags)
-{
-        switch (flags) {
-        case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
-                return "mixed";
-        case BTRFS_BLOCK_GROUP_METADATA:
-                return "metadata";
-        case BTRFS_BLOCK_GROUP_DATA:
-                return "data";
-        case BTRFS_BLOCK_GROUP_SYSTEM:
-                return "system";
-        default:
-                WARN_ON(1);
-                return "invalid-combination";
-        };
-}
-static int create_space_info(struct btrfs_fs_info *info, u64 flags)
-{
-        struct btrfs_space_info *space_info;
-        int i;
-        int ret;
-        space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
-        if (!space_info)
-                return -ENOMEM;
-        ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
-                                 GFP_KERNEL);
-        if (ret) {
-                kfree(space_info);
-                return ret;
-        }
-        for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
-                INIT_LIST_HEAD(&space_info->block_groups[i]);
-        init_rwsem(&space_info->groups_sem);
-        spin_lock_init(&space_info->lock);
-        space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
-        space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
-        init_waitqueue_head(&space_info->wait);
-        INIT_LIST_HEAD(&space_info->ro_bgs);
-        INIT_LIST_HEAD(&space_info->tickets);
-        INIT_LIST_HEAD(&space_info->priority_tickets);
-        ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
-                                    info->space_info_kobj, "%s",
-                                    alloc_name(space_info->flags));
-        if (ret) {
-                kobject_put(&space_info->kobj);
-                return ret;
-        }
-        list_add_rcu(&space_info->list, &info->space_info);
-        if (flags & BTRFS_BLOCK_GROUP_DATA)
-                info->data_sinfo = space_info;
-        return ret;
-}
-static void update_space_info(struct btrfs_fs_info *info, u64 flags,
-                             u64 total_bytes, u64 bytes_used,
-                             u64 bytes_readonly,
-                             struct btrfs_space_info **space_info)
-{
-        struct btrfs_space_info *found;
-        int factor;
-        factor = btrfs_bg_type_to_factor(flags);
-        found = __find_space_info(info, flags);
-        ASSERT(found);
-        spin_lock(&found->lock);
-        found->total_bytes += total_bytes;
-        found->disk_total += total_bytes * factor;
-        found->bytes_used += bytes_used;
-        found->disk_used += bytes_used * factor;
-        found->bytes_readonly += bytes_readonly;
-        if (total_bytes > 0)
-                found->full = 0;
-        space_info_add_new_bytes(info, found, total_bytes -
-                                 bytes_used - bytes_readonly);
-        spin_unlock(&found->lock);
-        *space_info = found;
-}
 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 {
        u64 extra_flags = chunk_to_extended(flags) &
@@ -4068,215 +3868,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
        return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
 }
-static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
-                                 bool may_use_included)
-{
-        ASSERT(s_info);
-        return s_info->bytes_used + s_info->bytes_reserved +
-                s_info->bytes_pinned + s_info->bytes_readonly +
-                (may_use_included ? s_info->bytes_may_use : 0);
-}
-int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
-{
-        struct btrfs_root *root = inode->root;
-        struct btrfs_fs_info *fs_info = root->fs_info;
-        struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
-        u64 used;
-        int ret = 0;
-        int need_commit = 2;
-        int have_pinned_space;
-        /* make sure bytes are sectorsize aligned */
-        bytes = ALIGN(bytes, fs_info->sectorsize);
-        if (btrfs_is_free_space_inode(inode)) {
-                need_commit = 0;
-                ASSERT(current->journal_info);
-        }
-again:
-        /* make sure we have enough space to handle the data first */
-        spin_lock(&data_sinfo->lock);
-        used = btrfs_space_info_used(data_sinfo, true);
-        if (used + bytes > data_sinfo->total_bytes) {
-                struct btrfs_trans_handle *trans;
-                /*
-                 * if we don't have enough free bytes in this space then we need
-                 * to alloc a new chunk.
-                 */
-                if (!data_sinfo->full) {
-                        u64 alloc_target;
-                        data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
-                        spin_unlock(&data_sinfo->lock);
-                        alloc_target = btrfs_data_alloc_profile(fs_info);
-                        /*
-                         * It is ugly that we don't call nolock join
-                         * transaction for the free space inode case here.
-                         * But it is safe because we only do the data space
-                         * reservation for the free space cache in the
-                         * transaction context, the common join transaction
-                         * just increase the counter of the current transaction
-                         * handler, doesn't try to acquire the trans_lock of
-                         * the fs.
-                         */
-                        trans = btrfs_join_transaction(root);
-                        if (IS_ERR(trans))
-                                return PTR_ERR(trans);
-                        ret = do_chunk_alloc(trans, alloc_target,
-                                             CHUNK_ALLOC_NO_FORCE);
-                        btrfs_end_transaction(trans);
-                        if (ret < 0) {
-                                if (ret != -ENOSPC)
-                                        return ret;
-                                else {
-                                        have_pinned_space = 1;
-                                        goto commit_trans;
-                                }
-                        }
-                        goto again;
-                }
-                /*
-                 * If we don't have enough pinned space to deal with this
-                 * allocation, and no removed chunk in current transaction,
-                 * don't bother committing the transaction.
-                 */
-                have_pinned_space = __percpu_counter_compare(
-                        &data_sinfo->total_bytes_pinned,
-                        used + bytes - data_sinfo->total_bytes,
-                        BTRFS_TOTAL_BYTES_PINNED_BATCH);
-                spin_unlock(&data_sinfo->lock);
-                /* commit the current transaction and try again */
-commit_trans:
-                if (need_commit) {
-                        need_commit--;
-                        if (need_commit > 0) {
-                                btrfs_start_delalloc_roots(fs_info, -1);
-                                btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
-                                                         (u64)-1);
-                        }
-                        trans = btrfs_join_transaction(root);
-                        if (IS_ERR(trans))
-                                return PTR_ERR(trans);
-                        if (have_pinned_space >= 0 ||
-                            test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
-                                     &trans->transaction->flags) ||
-                            need_commit > 0) {
-                                ret = btrfs_commit_transaction(trans);
-                                if (ret)
-                                        return ret;
-                                /*
-                                 * The cleaner kthread might still be doing iput
-                                 * operations. Wait for it to finish so that
-                                 * more space is released.  We don't need to
-                                 * explicitly run the delayed iputs here because
-                                 * the commit_transaction would have woken up
-                                 * the cleaner.
-                                 */
-                                ret = btrfs_wait_on_delayed_iputs(fs_info);
-                                if (ret)
-                                        return ret;
-                                goto again;
-                        } else {
-                                btrfs_end_transaction(trans);
-                        }
-                }
-                trace_btrfs_space_reservation(fs_info,
-                                              "space_info:enospc",
-                                              data_sinfo->flags, bytes, 1);
-                return -ENOSPC;
-        }
-        update_bytes_may_use(data_sinfo, bytes);
-        trace_btrfs_space_reservation(fs_info, "space_info",
-                                      data_sinfo->flags, bytes, 1);
-        spin_unlock(&data_sinfo->lock);
-        return 0;
-}
-int btrfs_check_data_free_space(struct inode *inode,
-                        struct extent_changeset **reserved, u64 start, u64 len)
-{
-        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-        int ret;
-        /* align the range */
-        len = round_up(start + len, fs_info->sectorsize) -
-              round_down(start, fs_info->sectorsize);
-        start = round_down(start, fs_info->sectorsize);
-        ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
-        if (ret < 0)
-                return ret;
-        /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
-        ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
-        if (ret < 0)
-                btrfs_free_reserved_data_space_noquota(inode, start, len);
-        else
-                ret = 0;
-        return ret;
-}
-/*
- * Called if we need to clear a data reservation for this inode
- * Normally in a error case.
- *
- * This one will *NOT* use accurate qgroup reserved space API, just for case
- * which we can't sleep and is sure it won't affect qgroup reserved space.
- * Like clear_bit_hook().
- */
-void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
-                                            u64 len)
-{
-        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-        struct btrfs_space_info *data_sinfo;
-        /* Make sure the range is aligned to sectorsize */
-        len = round_up(start + len, fs_info->sectorsize) -
-              round_down(start, fs_info->sectorsize);
-        start = round_down(start, fs_info->sectorsize);
-        data_sinfo = fs_info->data_sinfo;
-        spin_lock(&data_sinfo->lock);
-        update_bytes_may_use(data_sinfo, -len);
-        trace_btrfs_space_reservation(fs_info, "space_info",
-                                      data_sinfo->flags, len, 0);
-        spin_unlock(&data_sinfo->lock);
-}
-/*
- * Called if we need to clear a data reservation for this inode
- * Normally in a error case.
- *
- * This one will handle the per-inode data rsv map for accurate reserved
- * space framework.
- */
-void btrfs_free_reserved_data_space(struct inode *inode,
-                        struct extent_changeset *reserved, u64 start, u64 len)
-{
-        struct btrfs_root *root = BTRFS_I(inode)->root;
-        /* Make sure the range is aligned to sectorsize */
-        len = round_up(start + len, root->fs_info->sectorsize) -
-              round_down(start, root->fs_info->sectorsize);
-        start = round_down(start, root->fs_info->sectorsize);
-        btrfs_free_reserved_data_space_noquota(inode, start, len);
-        btrfs_qgroup_free_data(inode, reserved, start, len);
-}
 static void force_metadata_allocation(struct btrfs_fs_info *info)
 {
        struct list_head *head = &info->space_info;
@@ -4290,11 +3881,6 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
        rcu_read_unlock();
 }
-static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
-{
-        return (global->size << 1);
-}
 static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
                              struct btrfs_space_info *sinfo, int force)
 {
@@ -4325,15 +3911,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
 {
        u64 num_dev;
-        if (type & (BTRFS_BLOCK_GROUP_RAID10 |
+        num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
-                    BTRFS_BLOCK_GROUP_RAID0 |
+        if (!num_dev)
-                    BTRFS_BLOCK_GROUP_RAID5 |
-                    BTRFS_BLOCK_GROUP_RAID6))
                num_dev = fs_info->fs_devices->rw_devices;
-        else if (type & BTRFS_BLOCK_GROUP_RAID1)
-                num_dev = 2;
-        else
-                num_dev = 1;    /* DUP or single */
        return num_dev;
 }
@@ -4358,7 +3938,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
         */
        lockdep_assert_held(&fs_info->chunk_mutex);
-        info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+        info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
        spin_lock(&info->lock);
        left = info->total_bytes - btrfs_space_info_used(info, true);
        spin_unlock(&info->lock);
@@ -4372,7 +3952,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
        if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
                btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
                           left, thresh, type);
-                dump_space_info(fs_info, info, 0, 0);
+                btrfs_dump_space_info(fs_info, info, 0, 0);
        }
        if (left < thresh) {
@@ -4405,8 +3985,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
 *    - return 1 if it successfully allocates a chunk,
 *    - return errors including -ENOSPC otherwise.
 */
-static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
-                          int force)
+                      enum btrfs_chunk_alloc_enum force)
 {
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_space_info *space_info;
@@ -4418,7 +3998,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
        if (trans->allocating_chunk)
                return -ENOSPC;
-        space_info = __find_space_info(fs_info, flags);
+        space_info = btrfs_find_space_info(fs_info, flags);
        ASSERT(space_info);
        do {
@@ -4525,1714 +4105,6 @@ out:
        return ret;
 }
-static int can_overcommit(struct btrfs_fs_info *fs_info,
-                          struct btrfs_space_info *space_info, u64 bytes,
-                          enum btrfs_reserve_flush_enum flush,
-                          bool system_chunk)
-{
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        u64 profile;
-        u64 space_size;
-        u64 avail;
-        u64 used;
-        int factor;
-        /* Don't overcommit when in mixed mode. */
-        if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
-                return 0;
-        if (system_chunk)
-                profile = btrfs_system_alloc_profile(fs_info);
-        else
-                profile = btrfs_metadata_alloc_profile(fs_info);
-        used = btrfs_space_info_used(space_info, false);
-        /*
-         * We only want to allow over committing if we have lots of actual space
-         * free, but if we don't have enough space to handle the global reserve
-         * space then we could end up having a real enospc problem when trying
-         * to allocate a chunk or some other such important allocation.
-         */
-        spin_lock(&global_rsv->lock);
-        space_size = calc_global_rsv_need_space(global_rsv);
-        spin_unlock(&global_rsv->lock);
-        if (used + space_size >= space_info->total_bytes)
-                return 0;
-        used += space_info->bytes_may_use;
-        avail = atomic64_read(&fs_info->free_chunk_space);
-        /*
-         * If we have dup, raid1 or raid10 then only half of the free
-         * space is actually usable.  For raid56, the space info used
-         * doesn't include the parity drive, so we don't have to
-         * change the math
-         */
-        factor = btrfs_bg_type_to_factor(profile);
-        avail = div_u64(avail, factor);
-        /*
-         * If we aren't flushing all things, let us overcommit up to
-         * 1/2th of the space. If we can flush, don't let us overcommit
-         * too much, let it overcommit up to 1/8 of the space.
-         */
-        if (flush == BTRFS_RESERVE_FLUSH_ALL)
-                avail >>= 3;
-        else
-                avail >>= 1;
-        if (used + bytes < space_info->total_bytes + avail)
-                return 1;
-        return 0;
-}
-static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
-                                         unsigned long nr_pages, int nr_items)
-{
-        struct super_block *sb = fs_info->sb;
-        if (down_read_trylock(&sb->s_umount)) {
-                writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
-                up_read(&sb->s_umount);
-        } else {
-                /*
-                 * We needn't worry the filesystem going from r/w to r/o though
-                 * we don't acquire ->s_umount mutex, because the filesystem
-                 * should guarantee the delalloc inodes list be empty after
-                 * the filesystem is readonly(all dirty pages are written to
-                 * the disk).
-                 */
-                btrfs_start_delalloc_roots(fs_info, nr_items);
-                if (!current->journal_info)
-                        btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
-        }
-}
-static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
-                                        u64 to_reclaim)
-{
-        u64 bytes;
-        u64 nr;
-        bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
-        nr = div64_u64(to_reclaim, bytes);
-        if (!nr)
-                nr = 1;
-        return nr;
-}
-#define EXTENT_SIZE_PER_ITEM    SZ_256K
-/*
- * shrink metadata reservation for delalloc
- */
-static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
-                            u64 orig, bool wait_ordered)
-{
-        struct btrfs_space_info *space_info;
-        struct btrfs_trans_handle *trans;
-        u64 delalloc_bytes;
-        u64 dio_bytes;
-        u64 async_pages;
-        u64 items;
-        long time_left;
-        unsigned long nr_pages;
-        int loops;
-        /* Calc the number of the pages we need flush for space reservation */
-        items = calc_reclaim_items_nr(fs_info, to_reclaim);
-        to_reclaim = items * EXTENT_SIZE_PER_ITEM;
-        trans = (struct btrfs_trans_handle *)current->journal_info;
-        space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-        delalloc_bytes = percpu_counter_sum_positive(
-                                                &fs_info->delalloc_bytes);
-        dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
-        if (delalloc_bytes == 0 && dio_bytes == 0) {
-                if (trans)
-                        return;
-                if (wait_ordered)
-                        btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
-                return;
-        }
-        /*
-         * If we are doing more ordered than delalloc we need to just wait on
-         * ordered extents, otherwise we'll waste time trying to flush delalloc
-         * that likely won't give us the space back we need.
-         */
-        if (dio_bytes > delalloc_bytes)
-                wait_ordered = true;
-        loops = 0;
-        while ((delalloc_bytes || dio_bytes) && loops < 3) {
-                nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
-                /*
-                 * Triggers inode writeback for up to nr_pages. This will invoke
-                 * ->writepages callback and trigger delalloc filling
-                 *  (btrfs_run_delalloc_range()).
-                 */
-                btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
-                /*
-                 * We need to wait for the compressed pages to start before
-                 * we continue.
-                 */
-                async_pages = atomic_read(&fs_info->async_delalloc_pages);
-                if (!async_pages)
-                        goto skip_async;
-                /*
-                 * Calculate how many compressed pages we want to be written
-                 * before we continue. I.e if there are more async pages than we
-                 * require wait_event will wait until nr_pages are written.
-                 */
-                if (async_pages <= nr_pages)
-                        async_pages = 0;
-                else
-                        async_pages -= nr_pages;
-                wait_event(fs_info->async_submit_wait,
-                           atomic_read(&fs_info->async_delalloc_pages) <=
-                           (int)async_pages);
-skip_async:
-                spin_lock(&space_info->lock);
-                if (list_empty(&space_info->tickets) &&
-                    list_empty(&space_info->priority_tickets)) {
-                        spin_unlock(&space_info->lock);
-                        break;
-                }
-                spin_unlock(&space_info->lock);
-                loops++;
-                if (wait_ordered && !trans) {
-                        btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
-                } else {
-                        time_left = schedule_timeout_killable(1);
-                        if (time_left)
-                                break;
-                }
-                delalloc_bytes = percpu_counter_sum_positive(
-                                                &fs_info->delalloc_bytes);
-                dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
-        }
-}
-struct reserve_ticket {
-        u64 orig_bytes;
-        u64 bytes;
-        int error;
-        struct list_head list;
-        wait_queue_head_t wait;
-};
-/**
- * maybe_commit_transaction - possibly commit the transaction if its ok to
- * @root - the root we're allocating for
- * @bytes - the number of bytes we want to reserve
- * @force - force the commit
- *
- * This will check to make sure that committing the transaction will actually
- * get us somewhere and then commit the transaction if it does.  Otherwise it
- * will return -ENOSPC.
- */
-static int may_commit_transaction(struct btrfs_fs_info *fs_info,
-                                  struct btrfs_space_info *space_info)
-{
-        struct reserve_ticket *ticket = NULL;
-        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
-        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
-        struct btrfs_trans_handle *trans;
-        u64 bytes_needed;
-        u64 reclaim_bytes = 0;
-        trans = (struct btrfs_trans_handle *)current->journal_info;
-        if (trans)
-                return -EAGAIN;
-        spin_lock(&space_info->lock);
-        if (!list_empty(&space_info->priority_tickets))
-                ticket = list_first_entry(&space_info->priority_tickets,
-                                          struct reserve_ticket, list);
-        else if (!list_empty(&space_info->tickets))
-                ticket = list_first_entry(&space_info->tickets,
-                                          struct reserve_ticket, list);
-        bytes_needed = (ticket) ? ticket->bytes : 0;
-        spin_unlock(&space_info->lock);
-        if (!bytes_needed)
-                return 0;
-        trans = btrfs_join_transaction(fs_info->extent_root);
-        if (IS_ERR(trans))
-                return PTR_ERR(trans);
-        /*
-         * See if there is enough pinned space to make this reservation, or if
-         * we have block groups that are going to be freed, allowing us to
-         * possibly do a chunk allocation the next loop through.
-         */
-        if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
-            __percpu_counter_compare(&space_info->total_bytes_pinned,
-                                     bytes_needed,
-                                     BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
-                goto commit;
-        /*
-         * See if there is some space in the delayed insertion reservation for
-         * this reservation.
-         */
-        if (space_info != delayed_rsv->space_info)
-                goto enospc;
-        spin_lock(&delayed_rsv->lock);
-        reclaim_bytes += delayed_rsv->reserved;
-        spin_unlock(&delayed_rsv->lock);
-        spin_lock(&delayed_refs_rsv->lock);
-        reclaim_bytes += delayed_refs_rsv->reserved;
-        spin_unlock(&delayed_refs_rsv->lock);
-        if (reclaim_bytes >= bytes_needed)
-                goto commit;
-        bytes_needed -= reclaim_bytes;
-        if (__percpu_counter_compare(&space_info->total_bytes_pinned,
-                                   bytes_needed,
-                                   BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
-                goto enospc;
-commit:
-        return btrfs_commit_transaction(trans);
-enospc:
-        btrfs_end_transaction(trans);
-        return -ENOSPC;
-}
-/*
- * Try to flush some data based on policy set by @state. This is only advisory
- * and may fail for various reasons. The caller is supposed to examine the
- * state of @space_info to detect the outcome.
- */
-static void flush_space(struct btrfs_fs_info *fs_info,
-                       struct btrfs_space_info *space_info, u64 num_bytes,
-                       int state)
-{
-        struct btrfs_root *root = fs_info->extent_root;
-        struct btrfs_trans_handle *trans;
-        int nr;
-        int ret = 0;
-        switch (state) {
-        case FLUSH_DELAYED_ITEMS_NR:
-        case FLUSH_DELAYED_ITEMS:
-                if (state == FLUSH_DELAYED_ITEMS_NR)
-                        nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
-                else
-                        nr = -1;
-                trans = btrfs_join_transaction(root);
-                if (IS_ERR(trans)) {
-                        ret = PTR_ERR(trans);
-                        break;
-                }
-                ret = btrfs_run_delayed_items_nr(trans, nr);
-                btrfs_end_transaction(trans);
-                break;
-        case FLUSH_DELALLOC:
-        case FLUSH_DELALLOC_WAIT:
-                shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
-                                state == FLUSH_DELALLOC_WAIT);
-                break;
-        case FLUSH_DELAYED_REFS_NR:
-        case FLUSH_DELAYED_REFS:
-                trans = btrfs_join_transaction(root);
-                if (IS_ERR(trans)) {
-                        ret = PTR_ERR(trans);
-                        break;
-                }
-                if (state == FLUSH_DELAYED_REFS_NR)
-                        nr = calc_reclaim_items_nr(fs_info, num_bytes);
-                else
-                        nr = 0;
-                btrfs_run_delayed_refs(trans, nr);
-                btrfs_end_transaction(trans);
-                break;
-        case ALLOC_CHUNK:
-        case ALLOC_CHUNK_FORCE:
-                trans = btrfs_join_transaction(root);
-                if (IS_ERR(trans)) {
-                        ret = PTR_ERR(trans);
-                        break;
-                }
-                ret = do_chunk_alloc(trans,
-                                     btrfs_metadata_alloc_profile(fs_info),
-                                     (state == ALLOC_CHUNK) ?
-                                      CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
-                btrfs_end_transaction(trans);
-                if (ret > 0 || ret == -ENOSPC)
-                        ret = 0;
-                break;
-        case COMMIT_TRANS:
-                /*
-                 * If we have pending delayed iputs then we could free up a
-                 * bunch of pinned space, so make sure we run the iputs before
-                 * we do our pinned bytes check below.
-                 */
-                btrfs_run_delayed_iputs(fs_info);
-                btrfs_wait_on_delayed_iputs(fs_info);
-                ret = may_commit_transaction(fs_info, space_info);
-                break;
-        default:
-                ret = -ENOSPC;
-                break;
-        }
-        trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
-                                ret);
-        return;
-}
-static inline u64
-btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
-                                 struct btrfs_space_info *space_info,
-                                 bool system_chunk)
-{
-        struct reserve_ticket *ticket;
-        u64 used;
-        u64 expected;
-        u64 to_reclaim = 0;
-        list_for_each_entry(ticket, &space_info->tickets, list)
-                to_reclaim += ticket->bytes;
-        list_for_each_entry(ticket, &space_info->priority_tickets, list)
-                to_reclaim += ticket->bytes;
-        if (to_reclaim)
-                return to_reclaim;
-        to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
-        if (can_overcommit(fs_info, space_info, to_reclaim,
-                           BTRFS_RESERVE_FLUSH_ALL, system_chunk))
-                return 0;
-        used = btrfs_space_info_used(space_info, true);
-        if (can_overcommit(fs_info, space_info, SZ_1M,
-                           BTRFS_RESERVE_FLUSH_ALL, system_chunk))
-                expected = div_factor_fine(space_info->total_bytes, 95);
-        else
-                expected = div_factor_fine(space_info->total_bytes, 90);
-        if (used > expected)
-                to_reclaim = used - expected;
-        else
-                to_reclaim = 0;
-        to_reclaim = min(to_reclaim, space_info->bytes_may_use +
-                                     space_info->bytes_reserved);
-        return to_reclaim;
-}
-static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
-                                        struct btrfs_space_info *space_info,
-                                        u64 used, bool system_chunk)
-{
-        u64 thresh = div_factor_fine(space_info->total_bytes, 98);
-        /* If we're just plain full then async reclaim just slows us down. */
-        if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
-                return 0;
-        if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
-                                              system_chunk))
-                return 0;
-        return (used >= thresh && !btrfs_fs_closing(fs_info) &&
-                !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
-}
-static bool wake_all_tickets(struct list_head *head)
-{
-        struct reserve_ticket *ticket;
-        while (!list_empty(head)) {
-                ticket = list_first_entry(head, struct reserve_ticket, list);
-                list_del_init(&ticket->list);
-                ticket->error = -ENOSPC;
-                wake_up(&ticket->wait);
-                if (ticket->bytes != ticket->orig_bytes)
-                        return true;
-        }
-        return false;
-}
-/*
- * This is for normal flushers, we can wait all goddamned day if we want to.  We
- * will loop and continuously try to flush as long as we are making progress.
- * We count progress as clearing off tickets each time we have to loop.
- */
-static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
-{
-        struct btrfs_fs_info *fs_info;
-        struct btrfs_space_info *space_info;
-        u64 to_reclaim;
-        int flush_state;
-        int commit_cycles = 0;
-        u64 last_tickets_id;
-        fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
-        space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-        spin_lock(&space_info->lock);
-        to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
-                                                      false);
-        if (!to_reclaim) {
-                space_info->flush = 0;
-                spin_unlock(&space_info->lock);
-                return;
-        }
-        last_tickets_id = space_info->tickets_id;
-        spin_unlock(&space_info->lock);
-        flush_state = FLUSH_DELAYED_ITEMS_NR;
-        do {
-                flush_space(fs_info, space_info, to_reclaim, flush_state);
-                spin_lock(&space_info->lock);
-                if (list_empty(&space_info->tickets)) {
-                        space_info->flush = 0;
-                        spin_unlock(&space_info->lock);
-                        return;
-                }
-                to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
-                                                              space_info,
-                                                              false);
-                if (last_tickets_id == space_info->tickets_id) {
-                        flush_state++;
-                } else {
-                        last_tickets_id = space_info->tickets_id;
-                        flush_state = FLUSH_DELAYED_ITEMS_NR;
-                        if (commit_cycles)
-                                commit_cycles--;
-                }
-                /*
-                 * We don't want to force a chunk allocation until we've tried
-                 * pretty hard to reclaim space.  Think of the case where we
-                 * freed up a bunch of space and so have a lot of pinned space
-                 * to reclaim.  We would rather use that than possibly create a
-                 * underutilized metadata chunk.  So if this is our first run
-                 * through the flushing state machine skip ALLOC_CHUNK_FORCE and
-                 * commit the transaction.  If nothing has changed the next go
-                 * around then we can force a chunk allocation.
-                 */
-                if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
-                        flush_state++;
-                if (flush_state > COMMIT_TRANS) {
-                        commit_cycles++;
-                        if (commit_cycles > 2) {
-                                if (wake_all_tickets(&space_info->tickets)) {
-                                        flush_state = FLUSH_DELAYED_ITEMS_NR;
-                                        commit_cycles--;
-                                } else {
-                                        space_info->flush = 0;
-                                }
-                        } else {
-                                flush_state = FLUSH_DELAYED_ITEMS_NR;
-                        }
-                }
-                spin_unlock(&space_info->lock);
-        } while (flush_state <= COMMIT_TRANS);
-}
-void btrfs_init_async_reclaim_work(struct work_struct *work)
-{
-        INIT_WORK(work, btrfs_async_reclaim_metadata_space);
-}
-static const enum btrfs_flush_state priority_flush_states[] = {
-        FLUSH_DELAYED_ITEMS_NR,
-        FLUSH_DELAYED_ITEMS,
-        ALLOC_CHUNK,
-};
-static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
-                                            struct btrfs_space_info *space_info,
-                                            struct reserve_ticket *ticket)
-{
-        u64 to_reclaim;
-        int flush_state;
-        spin_lock(&space_info->lock);
-        to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
-                                                      false);
-        if (!to_reclaim) {
-                spin_unlock(&space_info->lock);
-                return;
-        }
-        spin_unlock(&space_info->lock);
-        flush_state = 0;
-        do {
-                flush_space(fs_info, space_info, to_reclaim,
-                            priority_flush_states[flush_state]);
-                flush_state++;
-                spin_lock(&space_info->lock);
-                if (ticket->bytes == 0) {
-                        spin_unlock(&space_info->lock);
-                        return;
-                }
-                spin_unlock(&space_info->lock);
-        } while (flush_state < ARRAY_SIZE(priority_flush_states));
-}
-static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
-                               struct btrfs_space_info *space_info,
-                               struct reserve_ticket *ticket)
-{
-        DEFINE_WAIT(wait);
-        u64 reclaim_bytes = 0;
-        int ret = 0;
-        spin_lock(&space_info->lock);
-        while (ticket->bytes > 0 && ticket->error == 0) {
-                ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
-                if (ret) {
-                        ret = -EINTR;
-                        break;
-                }
-                spin_unlock(&space_info->lock);
-                schedule();
-                finish_wait(&ticket->wait, &wait);
-                spin_lock(&space_info->lock);
-        }
-        if (!ret)
-                ret = ticket->error;
-        if (!list_empty(&ticket->list))
-                list_del_init(&ticket->list);
-        if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
-                reclaim_bytes = ticket->orig_bytes - ticket->bytes;
-        spin_unlock(&space_info->lock);
-        if (reclaim_bytes)
-                space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
-        return ret;
-}
-/**
- * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
- * @root - the root we're allocating for
- * @space_info - the space info we want to allocate from
- * @orig_bytes - the number of bytes we want
- * @flush - whether or not we can flush to make our reservation
- *
- * This will reserve orig_bytes number of bytes from the space info associated
- * with the block_rsv.  If there is not enough space it will make an attempt to
- * flush out space to make room.  It will do this by flushing delalloc if
- * possible or committing the transaction.  If flush is 0 then no attempts to
- * regain reservations will be made and this will fail if there is not enough
- * space already.
- */
-static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_space_info *space_info,
-                                    u64 orig_bytes,
-                                    enum btrfs_reserve_flush_enum flush,
-                                    bool system_chunk)
-{
-        struct reserve_ticket ticket;
-        u64 used;
-        u64 reclaim_bytes = 0;
-        int ret = 0;
-        ASSERT(orig_bytes);
-        ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
-        spin_lock(&space_info->lock);
-        ret = -ENOSPC;
-        used = btrfs_space_info_used(space_info, true);
-        /*
-         * If we have enough space then hooray, make our reservation and carry
-         * on.  If not see if we can overcommit, and if we can, hooray carry on.
-         * If not things get more complicated.
-         */
-        if (used + orig_bytes <= space_info->total_bytes) {
-                update_bytes_may_use(space_info, orig_bytes);
-                trace_btrfs_space_reservation(fs_info, "space_info",
-                                              space_info->flags, orig_bytes, 1);
-                ret = 0;
-        } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
-                                  system_chunk)) {
-                update_bytes_may_use(space_info, orig_bytes);
-                trace_btrfs_space_reservation(fs_info, "space_info",
-                                              space_info->flags, orig_bytes, 1);
-                ret = 0;
-        }
-        /*
-         * If we couldn't make a reservation then setup our reservation ticket
-         * and kick the async worker if it's not already running.
-         *
-         * If we are a priority flusher then we just need to add our ticket to
-         * the list and we will do our own flushing further down.
-         */
-        if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
-                ticket.orig_bytes = orig_bytes;
-                ticket.bytes = orig_bytes;
-                ticket.error = 0;
-                init_waitqueue_head(&ticket.wait);
-                if (flush == BTRFS_RESERVE_FLUSH_ALL) {
-                        list_add_tail(&ticket.list, &space_info->tickets);
-                        if (!space_info->flush) {
-                                space_info->flush = 1;
-                                trace_btrfs_trigger_flush(fs_info,
-                                                          space_info->flags,
-                                                          orig_bytes, flush,
-                                                          "enospc");
-                                queue_work(system_unbound_wq,
-                                           &fs_info->async_reclaim_work);
-                        }
-                } else {
-                        list_add_tail(&ticket.list,
-                                      &space_info->priority_tickets);
-                }
-        } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
-                used += orig_bytes;
-                /*
-                 * We will do the space reservation dance during log replay,
-                 * which means we won't have fs_info->fs_root set, so don't do
-                 * the async reclaim as we will panic.
-                 */
-                if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
-                    need_do_async_reclaim(fs_info, space_info,
-                                          used, system_chunk) &&
-                    !work_busy(&fs_info->async_reclaim_work)) {
-                        trace_btrfs_trigger_flush(fs_info, space_info->flags,
-                                                  orig_bytes, flush, "preempt");
-                        queue_work(system_unbound_wq,
-                                   &fs_info->async_reclaim_work);
-                }
-        }
-        spin_unlock(&space_info->lock);
-        if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
-                return ret;
-        if (flush == BTRFS_RESERVE_FLUSH_ALL)
-                return wait_reserve_ticket(fs_info, space_info, &ticket);
-        ret = 0;
-        priority_reclaim_metadata_space(fs_info, space_info, &ticket);
-        spin_lock(&space_info->lock);
-        if (ticket.bytes) {
-                if (ticket.bytes < orig_bytes)
-                        reclaim_bytes = orig_bytes - ticket.bytes;
-                list_del_init(&ticket.list);
-                ret = -ENOSPC;
-        }
-        spin_unlock(&space_info->lock);
-        if (reclaim_bytes)
-                space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
-        ASSERT(list_empty(&ticket.list));
-        return ret;
-}
-/**
- * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
- * @root - the root we're allocating for
- * @block_rsv - the block_rsv we're allocating for
- * @orig_bytes - the number of bytes we want
- * @flush - whether or not we can flush to make our reservation
- *
- * This will reserve orig_bytes number of bytes from the space info associated
- * with the block_rsv.  If there is not enough space it will make an attempt to
- * flush out space to make room.  It will do this by flushing delalloc if
- * possible or committing the transaction.  If flush is 0 then no attempts to
- * regain reservations will be made and this will fail if there is not enough
- * space already.
- */
-static int reserve_metadata_bytes(struct btrfs_root *root,
-                                  struct btrfs_block_rsv *block_rsv,
-                                  u64 orig_bytes,
-                                  enum btrfs_reserve_flush_enum flush)
-{
-        struct btrfs_fs_info *fs_info = root->fs_info;
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        int ret;
-        bool system_chunk = (root == fs_info->chunk_root);
-        ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
-                                       orig_bytes, flush, system_chunk);
-        if (ret == -ENOSPC &&
-            unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
-                if (block_rsv != global_rsv &&
-                    !block_rsv_use_bytes(global_rsv, orig_bytes))
-                        ret = 0;
-        }
-        if (ret == -ENOSPC) {
-                trace_btrfs_space_reservation(fs_info, "space_info:enospc",
-                                              block_rsv->space_info->flags,
-                                              orig_bytes, 1);
-                if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
-                        dump_space_info(fs_info, block_rsv->space_info,
-                                        orig_bytes, 0);
-        }
-        return ret;
-}
-static struct btrfs_block_rsv *get_block_rsv(
-                                        const struct btrfs_trans_handle *trans,
-                                        const struct btrfs_root *root)
-{
-        struct btrfs_fs_info *fs_info = root->fs_info;
-        struct btrfs_block_rsv *block_rsv = NULL;
-        if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
-            (root == fs_info->csum_root && trans->adding_csums) ||
-            (root == fs_info->uuid_root))
-                block_rsv = trans->block_rsv;
-        if (!block_rsv)
-                block_rsv = root->block_rsv;
-        if (!block_rsv)
-                block_rsv = &fs_info->empty_block_rsv;
-        return block_rsv;
-}
-static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
-                               u64 num_bytes)
-{
-        int ret = -ENOSPC;
-        spin_lock(&block_rsv->lock);
-        if (block_rsv->reserved >= num_bytes) {
-                block_rsv->reserved -= num_bytes;
-                if (block_rsv->reserved < block_rsv->size)
-                        block_rsv->full = 0;
-                ret = 0;
-        }
-        spin_unlock(&block_rsv->lock);
-        return ret;
-}
-static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
-                                u64 num_bytes, bool update_size)
-{
-        spin_lock(&block_rsv->lock);
-        block_rsv->reserved += num_bytes;
-        if (update_size)
-                block_rsv->size += num_bytes;
-        else if (block_rsv->reserved >= block_rsv->size)
-                block_rsv->full = 1;
-        spin_unlock(&block_rsv->lock);
-}
-int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
-                             struct btrfs_block_rsv *dest, u64 num_bytes,
-                             int min_factor)
-{
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        u64 min_bytes;
-        if (global_rsv->space_info != dest->space_info)
-                return -ENOSPC;
-        spin_lock(&global_rsv->lock);
-        min_bytes = div_factor(global_rsv->size, min_factor);
-        if (global_rsv->reserved < min_bytes + num_bytes) {
-                spin_unlock(&global_rsv->lock);
-                return -ENOSPC;
-        }
-        global_rsv->reserved -= num_bytes;
-        if (global_rsv->reserved < global_rsv->size)
-                global_rsv->full = 0;
-        spin_unlock(&global_rsv->lock);
-        block_rsv_add_bytes(dest, num_bytes, true);
-        return 0;
-}
-/**
- * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
- * @fs_info - the fs info for our fs.
- * @src - the source block rsv to transfer from.
- * @num_bytes - the number of bytes to transfer.
- *
- * This transfers up to the num_bytes amount from the src rsv to the
- * delayed_refs_rsv.  Any extra bytes are returned to the space info.
- */
-void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
-                                       struct btrfs_block_rsv *src,
-                                       u64 num_bytes)
-{
-        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
-        u64 to_free = 0;
-        spin_lock(&src->lock);
-        src->reserved -= num_bytes;
-        src->size -= num_bytes;
-        spin_unlock(&src->lock);
-        spin_lock(&delayed_refs_rsv->lock);
-        if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
-                u64 delta = delayed_refs_rsv->size -
-                        delayed_refs_rsv->reserved;
-                if (num_bytes > delta) {
-                        to_free = num_bytes - delta;
-                        num_bytes = delta;
-                }
-        } else {
-                to_free = num_bytes;
-                num_bytes = 0;
-        }
-        if (num_bytes)
-                delayed_refs_rsv->reserved += num_bytes;
-        if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
-                delayed_refs_rsv->full = 1;
-        spin_unlock(&delayed_refs_rsv->lock);
-        if (num_bytes)
-                trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
-                                              0, num_bytes, 1);
-        if (to_free)
-                space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info,
-                                         to_free);
-}
-/**
- * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
- * @fs_info - the fs_info for our fs.
- * @flush - control how we can flush for this reservation.
- *
- * This will refill the delayed block_rsv up to 1 items size worth of space and
- * will return -ENOSPC if we can't make the reservation.
- */
-int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
-                                  enum btrfs_reserve_flush_enum flush)
-{
-        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
-        u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
-        u64 num_bytes = 0;
-        int ret = -ENOSPC;
-        spin_lock(&block_rsv->lock);
-        if (block_rsv->reserved < block_rsv->size) {
-                num_bytes = block_rsv->size - block_rsv->reserved;
-                num_bytes = min(num_bytes, limit);
-        }
-        spin_unlock(&block_rsv->lock);
-        if (!num_bytes)
-                return 0;
-        ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv,
-                                     num_bytes, flush);
-        if (ret)
-                return ret;
-        block_rsv_add_bytes(block_rsv, num_bytes, 0);
-        trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
-                                      0, num_bytes, 1);
-        return 0;
-}
-/*
- * This is for space we already have accounted in space_info->bytes_may_use, so
- * basically when we're returning space from block_rsv's.
- */
-static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_space_info *space_info,
-                                     u64 num_bytes)
-{
-        struct reserve_ticket *ticket;
-        struct list_head *head;
-        u64 used;
-        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
-        bool check_overcommit = false;
-        spin_lock(&space_info->lock);
-        head = &space_info->priority_tickets;
-        /*
-         * If we are over our limit then we need to check and see if we can
-         * overcommit, and if we can't then we just need to free up our space
-         * and not satisfy any requests.
-         */
-        used = btrfs_space_info_used(space_info, true);
-        if (used - num_bytes >= space_info->total_bytes)
-                check_overcommit = true;
-again:
-        while (!list_empty(head) && num_bytes) {
-                ticket = list_first_entry(head, struct reserve_ticket,
-                                          list);
-                /*
-                 * We use 0 bytes because this space is already reserved, so
-                 * adding the ticket space would be a double count.
-                 */
-                if (check_overcommit &&
-                    !can_overcommit(fs_info, space_info, 0, flush, false))
-                        break;
-                if (num_bytes >= ticket->bytes) {
-                        list_del_init(&ticket->list);
-                        num_bytes -= ticket->bytes;
-                        ticket->bytes = 0;
-                        space_info->tickets_id++;
-                        wake_up(&ticket->wait);
-                } else {
-                        ticket->bytes -= num_bytes;
-                        num_bytes = 0;
-                }
-        }
-        if (num_bytes && head == &space_info->priority_tickets) {
-                head = &space_info->tickets;
-                flush = BTRFS_RESERVE_FLUSH_ALL;
-                goto again;
-        }
-        update_bytes_may_use(space_info, -num_bytes);
-        trace_btrfs_space_reservation(fs_info, "space_info",
-                                      space_info->flags, num_bytes, 0);
-        spin_unlock(&space_info->lock);
-}
-/*
- * This is for newly allocated space that isn't accounted in
- * space_info->bytes_may_use yet.  So if we allocate a chunk or unpin an extent
- * we use this helper.
- */
-static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_space_info *space_info,
-                                     u64 num_bytes)
-{
-        struct reserve_ticket *ticket;
-        struct list_head *head = &space_info->priority_tickets;
-again:
-        while (!list_empty(head) && num_bytes) {
-                ticket = list_first_entry(head, struct reserve_ticket,
-                                          list);
-                if (num_bytes >= ticket->bytes) {
-                        trace_btrfs_space_reservation(fs_info, "space_info",
-                                                      space_info->flags,
-                                                      ticket->bytes, 1);
-                        list_del_init(&ticket->list);
-                        num_bytes -= ticket->bytes;
-                        update_bytes_may_use(space_info, ticket->bytes);
-                        ticket->bytes = 0;
-                        space_info->tickets_id++;
-                        wake_up(&ticket->wait);
-                } else {
-                        trace_btrfs_space_reservation(fs_info, "space_info",
-                                                      space_info->flags,
-                                                      num_bytes, 1);
-                        update_bytes_may_use(space_info, num_bytes);
-                        ticket->bytes -= num_bytes;
-                        num_bytes = 0;
-                }
-        }
-        if (num_bytes && head == &space_info->priority_tickets) {
-                head = &space_info->tickets;
-                goto again;
-        }
-}
-static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_block_rsv *block_rsv,
-                                    struct btrfs_block_rsv *dest, u64 num_bytes,
-                                    u64 *qgroup_to_release_ret)
-{
-        struct btrfs_space_info *space_info = block_rsv->space_info;
-        u64 qgroup_to_release = 0;
-        u64 ret;
-        spin_lock(&block_rsv->lock);
-        if (num_bytes == (u64)-1) {
-                num_bytes = block_rsv->size;
-                qgroup_to_release = block_rsv->qgroup_rsv_size;
-        }
-        block_rsv->size -= num_bytes;
-        if (block_rsv->reserved >= block_rsv->size) {
-                num_bytes = block_rsv->reserved - block_rsv->size;
-                block_rsv->reserved = block_rsv->size;
-                block_rsv->full = 1;
-        } else {
-                num_bytes = 0;
-        }
-        if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
-                qgroup_to_release = block_rsv->qgroup_rsv_reserved -
-                                    block_rsv->qgroup_rsv_size;
-                block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
-        } else {
-                qgroup_to_release = 0;
-        }
-        spin_unlock(&block_rsv->lock);
-        ret = num_bytes;
-        if (num_bytes > 0) {
-                if (dest) {
-                        spin_lock(&dest->lock);
-                        if (!dest->full) {
-                                u64 bytes_to_add;
-                                bytes_to_add = dest->size - dest->reserved;
-                                bytes_to_add = min(num_bytes, bytes_to_add);
-                                dest->reserved += bytes_to_add;
-                                if (dest->reserved >= dest->size)
-                                        dest->full = 1;
-                                num_bytes -= bytes_to_add;
-                        }
-                        spin_unlock(&dest->lock);
-                }
-                if (num_bytes)
-                        space_info_add_old_bytes(fs_info, space_info,
-                                                 num_bytes);
-        }
-        if (qgroup_to_release_ret)
-                *qgroup_to_release_ret = qgroup_to_release;
-        return ret;
-}
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
-                            struct btrfs_block_rsv *dst, u64 num_bytes,
-                            bool update_size)
-{
-        int ret;
-        ret = block_rsv_use_bytes(src, num_bytes);
-        if (ret)
-                return ret;
-        block_rsv_add_bytes(dst, num_bytes, update_size);
-        return 0;
-}
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
-{
-        memset(rsv, 0, sizeof(*rsv));
-        spin_lock_init(&rsv->lock);
-        rsv->type = type;
-}
-void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
-                                   struct btrfs_block_rsv *rsv,
-                                   unsigned short type)
-{
-        btrfs_init_block_rsv(rsv, type);
-        rsv->space_info = __find_space_info(fs_info,
-                                            BTRFS_BLOCK_GROUP_METADATA);
-}
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
-                                              unsigned short type)
-{
-        struct btrfs_block_rsv *block_rsv;
-        block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
-        if (!block_rsv)
-                return NULL;
-        btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
-        return block_rsv;
-}
-void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
-                          struct btrfs_block_rsv *rsv)
-{
-        if (!rsv)
-                return;
-        btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
-        kfree(rsv);
-}
-int btrfs_block_rsv_add(struct btrfs_root *root,
-                        struct btrfs_block_rsv *block_rsv, u64 num_bytes,
-                        enum btrfs_reserve_flush_enum flush)
-{
-        int ret;
-        if (num_bytes == 0)
-                return 0;
-        ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
-        if (!ret)
-                block_rsv_add_bytes(block_rsv, num_bytes, true);
-        return ret;
-}
-int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
-{
-        u64 num_bytes = 0;
-        int ret = -ENOSPC;
-        if (!block_rsv)
-                return 0;
-        spin_lock(&block_rsv->lock);
-        num_bytes = div_factor(block_rsv->size, min_factor);
-        if (block_rsv->reserved >= num_bytes)
-                ret = 0;
-        spin_unlock(&block_rsv->lock);
-        return ret;
-}
-int btrfs_block_rsv_refill(struct btrfs_root *root,
-                           struct btrfs_block_rsv *block_rsv, u64 min_reserved,
-                           enum btrfs_reserve_flush_enum flush)
-{
-        u64 num_bytes = 0;
-        int ret = -ENOSPC;
-        if (!block_rsv)
-                return 0;
-        spin_lock(&block_rsv->lock);
-        num_bytes = min_reserved;
-        if (block_rsv->reserved >= num_bytes)
-                ret = 0;
-        else
-                num_bytes -= block_rsv->reserved;
-        spin_unlock(&block_rsv->lock);
-        if (!ret)
-                return 0;
-        ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
-        if (!ret) {
-                block_rsv_add_bytes(block_rsv, num_bytes, false);
-                return 0;
-        }
-        return ret;
-}
-static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
-                                     struct btrfs_block_rsv *block_rsv,
-                                     u64 num_bytes, u64 *qgroup_to_release)
-{
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
-        struct btrfs_block_rsv *target = delayed_rsv;
-        if (target->full || target == block_rsv)
-                target = global_rsv;
-        if (block_rsv->space_info != target->space_info)
-                target = NULL;
-        return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
-                                       qgroup_to_release);
-}
-void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
-                             struct btrfs_block_rsv *block_rsv,
-                             u64 num_bytes)
-{
-        __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
-}
-/**
- * btrfs_inode_rsv_release - release any excessive reservation.
- * @inode - the inode we need to release from.
- * @qgroup_free - free or convert qgroup meta.
- *   Unlike normal operation, qgroup meta reservation needs to know if we are
- *   freeing qgroup reservation or just converting it into per-trans.  Normally
- *   @qgroup_free is true for error handling, and false for normal release.
- *
- * This is the same as btrfs_block_rsv_release, except that it handles the
- * tracepoint for the reservation.
- */
-static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
-{
-        struct btrfs_fs_info *fs_info = inode->root->fs_info;
-        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
-        u64 released = 0;
-        u64 qgroup_to_release = 0;
-        /*
-         * Since we statically set the block_rsv->size we just want to say we
-         * are releasing 0 bytes, and then we'll just get the reservation over
-         * the size free'd.
-         */
-        released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
-                                             &qgroup_to_release);
-        if (released > 0)
-                trace_btrfs_space_reservation(fs_info, "delalloc",
-                                              btrfs_ino(inode), released, 0);
-        if (qgroup_free)
-                btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
-        else
-                btrfs_qgroup_convert_reserved_meta(inode->root,
-                                                   qgroup_to_release);
-}
-/**
- * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
- * @fs_info - the fs_info for our fs.
- * @nr - the number of items to drop.
- *
- * This drops the delayed ref head's count from the delayed refs rsv and frees
- * any excess reservation we had.
- */
-void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
-{
-        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
-        u64 released = 0;
-        released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv,
-                                           num_bytes, NULL);
-        if (released)
-                trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
-                                              0, released, 0);
-}
-static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
-        struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
-        struct btrfs_space_info *sinfo = block_rsv->space_info;
-        u64 num_bytes;
-        /*
-         * The global block rsv is based on the size of the extent tree, the
-         * checksum tree and the root tree.  If the fs is empty we want to set
-         * it to a minimal amount for safety.
-         */
-        num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
-                btrfs_root_used(&fs_info->csum_root->root_item) +
-                btrfs_root_used(&fs_info->tree_root->root_item);
-        num_bytes = max_t(u64, num_bytes, SZ_16M);
-        spin_lock(&sinfo->lock);
-        spin_lock(&block_rsv->lock);
-        block_rsv->size = min_t(u64, num_bytes, SZ_512M);
-        if (block_rsv->reserved < block_rsv->size) {
-                num_bytes = btrfs_space_info_used(sinfo, true);
-                if (sinfo->total_bytes > num_bytes) {
-                        num_bytes = sinfo->total_bytes - num_bytes;
-                        num_bytes = min(num_bytes,
-                                        block_rsv->size - block_rsv->reserved);
-                        block_rsv->reserved += num_bytes;
-                        update_bytes_may_use(sinfo, num_bytes);
-                        trace_btrfs_space_reservation(fs_info, "space_info",
-                                                      sinfo->flags, num_bytes,
-                                                      1);
-                }
-        } else if (block_rsv->reserved > block_rsv->size) {
-                num_bytes = block_rsv->reserved - block_rsv->size;
-                update_bytes_may_use(sinfo, -num_bytes);
-                trace_btrfs_space_reservation(fs_info, "space_info",
-                                      sinfo->flags, num_bytes, 0);
-                block_rsv->reserved = block_rsv->size;
-        }
-        if (block_rsv->reserved == block_rsv->size)
-                block_rsv->full = 1;
-        else
-                block_rsv->full = 0;
-        spin_unlock(&block_rsv->lock);
-        spin_unlock(&sinfo->lock);
-}
-static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
-        struct btrfs_space_info *space_info;
-        space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
-        fs_info->chunk_block_rsv.space_info = space_info;
-        space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-        fs_info->global_block_rsv.space_info = space_info;
-        fs_info->trans_block_rsv.space_info = space_info;
-        fs_info->empty_block_rsv.space_info = space_info;
-        fs_info->delayed_block_rsv.space_info = space_info;
-        fs_info->delayed_refs_rsv.space_info = space_info;
-        fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
-        fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
-        fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
-        fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
-        if (fs_info->quota_root)
-                fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
-        fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
-        update_global_block_rsv(fs_info);
-}
-static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
-        block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
-                                (u64)-1, NULL);
-        WARN_ON(fs_info->trans_block_rsv.size > 0);
-        WARN_ON(fs_info->trans_block_rsv.reserved > 0);
-        WARN_ON(fs_info->chunk_block_rsv.size > 0);
-        WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
-        WARN_ON(fs_info->delayed_block_rsv.size > 0);
-        WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
-        WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
-        WARN_ON(fs_info->delayed_refs_rsv.size > 0);
-}
-/*
- * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
- * @trans - the trans that may have generated delayed refs
- *
- * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
- * it'll calculate the additional size and add it to the delayed_refs_rsv.
- */
-void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
-{
-        struct btrfs_fs_info *fs_info = trans->fs_info;
-        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
-        u64 num_bytes;
-        if (!trans->delayed_ref_updates)
-                return;
-        num_bytes = btrfs_calc_trans_metadata_size(fs_info,
-                                                   trans->delayed_ref_updates);
-        spin_lock(&delayed_rsv->lock);
-        delayed_rsv->size += num_bytes;
-        delayed_rsv->full = 0;
-        spin_unlock(&delayed_rsv->lock);
-        trans->delayed_ref_updates = 0;
-}
-/*
- * To be called after all the new block groups attached to the transaction
- * handle have been created (btrfs_create_pending_block_groups()).
- */
-void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
-{
-        struct btrfs_fs_info *fs_info = trans->fs_info;
-        if (!trans->chunk_bytes_reserved)
-                return;
-        WARN_ON_ONCE(!list_empty(&trans->new_bgs));
-        block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
-                                trans->chunk_bytes_reserved, NULL);
-        trans->chunk_bytes_reserved = 0;
-}
-/*
- * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
- * root: the root of the parent directory
- * rsv: block reservation
- * items: the number of items that we need do reservation
- * use_global_rsv: allow fallback to the global block reservation
- *
- * This function is used to reserve the space for snapshot/subvolume
- * creation and deletion. Those operations are different with the
- * common file/directory operations, they change two fs/file trees
- * and root tree, the number of items that the qgroup reserves is
- * different with the free space reservation. So we can not use
- * the space reservation mechanism in start_transaction().
- */
-int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
-                                     struct btrfs_block_rsv *rsv, int items,
-                                     bool use_global_rsv)
-{
-        u64 qgroup_num_bytes = 0;
-        u64 num_bytes;
-        int ret;
-        struct btrfs_fs_info *fs_info = root->fs_info;
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
-                /* One for parent inode, two for dir entries */
-                qgroup_num_bytes = 3 * fs_info->nodesize;
-                ret = btrfs_qgroup_reserve_meta_prealloc(root,
-                                qgroup_num_bytes, true);
-                if (ret)
-                        return ret;
-        }
-        num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
-        rsv->space_info = __find_space_info(fs_info,
-                                            BTRFS_BLOCK_GROUP_METADATA);
-        ret = btrfs_block_rsv_add(root, rsv, num_bytes,
-                                  BTRFS_RESERVE_FLUSH_ALL);
-        if (ret == -ENOSPC && use_global_rsv)
-                ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
-        if (ret && qgroup_num_bytes)
-                btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
-        return ret;
-}
-void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
-                                      struct btrfs_block_rsv *rsv)
-{
-        btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
-}
-static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
-                                                 struct btrfs_inode *inode)
-{
-        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
-        u64 reserve_size = 0;
-        u64 qgroup_rsv_size = 0;
-        u64 csum_leaves;
-        unsigned outstanding_extents;
-        lockdep_assert_held(&inode->lock);
-        outstanding_extents = inode->outstanding_extents;
-        if (outstanding_extents)
-                reserve_size = btrfs_calc_trans_metadata_size(fs_info,
-                                                outstanding_extents + 1);
-        csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
-                                                 inode->csum_bytes);
-        reserve_size += btrfs_calc_trans_metadata_size(fs_info,
-                                                       csum_leaves);
-        /*
-         * For qgroup rsv, the calculation is very simple:
-         * account one nodesize for each outstanding extent
-         *
-         * This is overestimating in most cases.
-         */
-        qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
-        spin_lock(&block_rsv->lock);
-        block_rsv->size = reserve_size;
-        block_rsv->qgroup_rsv_size = qgroup_rsv_size;
-        spin_unlock(&block_rsv->lock);
-}
-static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
-                                    u64 num_bytes, u64 *meta_reserve,
-                                    u64 *qgroup_reserve)
-{
-        u64 nr_extents = count_max_extents(num_bytes);
-        u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
-        /* We add one for the inode update at finish ordered time */
-        *meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
-                                                nr_extents + csum_leaves + 1);
-        *qgroup_reserve = nr_extents * fs_info->nodesize;
-}
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
-{
-        struct btrfs_root *root = inode->root;
-        struct btrfs_fs_info *fs_info = root->fs_info;
-        struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
-        u64 meta_reserve, qgroup_reserve;
-        unsigned nr_extents;
-        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
-        int ret = 0;
-        bool delalloc_lock = true;
-        /* If we are a free space inode we need to not flush since we will be in
-         * the middle of a transaction commit.  We also don't need the delalloc
-         * mutex since we won't race with anybody.  We need this mostly to make
-         * lockdep shut its filthy mouth.
-         *
-         * If we have a transaction open (can happen if we call truncate_block
-         * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
-         */
-        if (btrfs_is_free_space_inode(inode)) {
-                flush = BTRFS_RESERVE_NO_FLUSH;
-                delalloc_lock = false;
-        } else {
-                if (current->journal_info)
-                        flush = BTRFS_RESERVE_FLUSH_LIMIT;
-                if (btrfs_transaction_in_commit(fs_info))
-                        schedule_timeout(1);
-        }
-        if (delalloc_lock)
-                mutex_lock(&inode->delalloc_mutex);
-        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
-        /*
-         * We always want to do it this way, every other way is wrong and ends
-         * in tears.  Pre-reserving the amount we are going to add will always
-         * be the right way, because otherwise if we have enough parallelism we
-         * could end up with thousands of inodes all holding little bits of
-         * reservations they were able to make previously and the only way to
-         * reclaim that space is to ENOSPC out the operations and clear
-         * everything out and try again, which is bad.  This way we just
-         * over-reserve slightly, and clean up the mess when we are done.
-         */
-        calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
-                                &qgroup_reserve);
-        ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
-        if (ret)
-                goto out_fail;
-        ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
-        if (ret)
-                goto out_qgroup;
-        /*
-         * Now we need to update our outstanding extents and csum bytes _first_
-         * and then add the reservation to the block_rsv.  This keeps us from
-         * racing with an ordered completion or some such that would think it
-         * needs to free the reservation we just made.
-         */
-        spin_lock(&inode->lock);
-        nr_extents = count_max_extents(num_bytes);
-        btrfs_mod_outstanding_extents(inode, nr_extents);
-        inode->csum_bytes += num_bytes;
-        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
-        spin_unlock(&inode->lock);
-        /* Now we can safely add our space to our block rsv */
-        block_rsv_add_bytes(block_rsv, meta_reserve, false);
-        trace_btrfs_space_reservation(root->fs_info, "delalloc",
-                                      btrfs_ino(inode), meta_reserve, 1);
-        spin_lock(&block_rsv->lock);
-        block_rsv->qgroup_rsv_reserved += qgroup_reserve;
-        spin_unlock(&block_rsv->lock);
-        if (delalloc_lock)
-                mutex_unlock(&inode->delalloc_mutex);
-        return 0;
-out_qgroup:
-        btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
-out_fail:
-        btrfs_inode_rsv_release(inode, true);
-        if (delalloc_lock)
-                mutex_unlock(&inode->delalloc_mutex);
-        return ret;
-}
-/**
- * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
- * @inode: the inode to release the reservation for.
- * @num_bytes: the number of bytes we are releasing.
- * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
- *
- * This will release the metadata reservation for an inode.  This can be called
- * once we complete IO for a given set of bytes to release their metadata
- * reservations, or on error for the same reason.
- */
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
-                                     bool qgroup_free)
-{
-        struct btrfs_fs_info *fs_info = inode->root->fs_info;
-        num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
-        spin_lock(&inode->lock);
-        inode->csum_bytes -= num_bytes;
-        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
-        spin_unlock(&inode->lock);
-        if (btrfs_is_testing(fs_info))
-                return;
-        btrfs_inode_rsv_release(inode, qgroup_free);
-}
-/**
- * btrfs_delalloc_release_extents - release our outstanding_extents
- * @inode: the inode to balance the reservation for.
- * @num_bytes: the number of bytes we originally reserved with
- * @qgroup_free: do we need to free qgroup meta reservation or convert them.
- *
- * When we reserve space we increase outstanding_extents for the extents we may
- * add.  Once we've set the range as delalloc or created our ordered extents we
- * have outstanding_extents to track the real usage, so we use this to free our
- * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
- * with btrfs_delalloc_reserve_metadata.
- */
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
-                                    bool qgroup_free)
-{
-        struct btrfs_fs_info *fs_info = inode->root->fs_info;
-        unsigned num_extents;
-        spin_lock(&inode->lock);
-        num_extents = count_max_extents(num_bytes);
-        btrfs_mod_outstanding_extents(inode, -num_extents);
-        btrfs_calculate_inode_block_rsv_size(fs_info, inode);
-        spin_unlock(&inode->lock);
-        if (btrfs_is_testing(fs_info))
-                return;
-        btrfs_inode_rsv_release(inode, qgroup_free);
-}
-/**
- * btrfs_delalloc_reserve_space - reserve data and metadata space for
- * delalloc
- * @inode: inode we're writing to
- * @start: start range we are writing to
- * @len: how long the range we are writing to
- * @reserved: mandatory parameter, record actually reserved qgroup ranges of
- *            current reservation.
- *
- * This will do the following things
- *
- * o reserve space in data space info for num bytes
- *   and reserve precious corresponding qgroup space
- *   (Done in check_data_free_space)
- *
- * o reserve space for metadata space, based on the number of outstanding
- *   extents and how much csums will be needed
- *   also reserve metadata space in a per root over-reserve method.
- * o add to the inodes->delalloc_bytes
- * o add it to the fs_info's delalloc inodes list.
- *   (Above 3 all done in delalloc_reserve_metadata)
- *
- * Return 0 for success
- * Return <0 for error(-ENOSPC or -EQUOT)
- */
-int btrfs_delalloc_reserve_space(struct inode *inode,
-                        struct extent_changeset **reserved, u64 start, u64 len)
-{
-        int ret;
-        ret = btrfs_check_data_free_space(inode, reserved, start, len);
-        if (ret < 0)
-                return ret;
-        ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
-        if (ret < 0)
-                btrfs_free_reserved_data_space(inode, *reserved, start, len);
-        return ret;
-}
-/**
- * btrfs_delalloc_release_space - release data and metadata space for delalloc
- * @inode: inode we're releasing space for
- * @start: start position of the space already reserved
- * @len: the len of the space already reserved
- * @release_bytes: the len of the space we consumed or didn't use
- *
- * This function will release the metadata space that was not used and will
- * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
- * list if there are no delalloc bytes left.
- * Also it will handle the qgroup reserved space.
- */
-void btrfs_delalloc_release_space(struct inode *inode,
-                                  struct extent_changeset *reserved,
-                                  u64 start, u64 len, bool qgroup_free)
-{
-        btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
-        btrfs_free_reserved_data_space(inode, reserved, start, len);
-}
 static int update_block_group(struct btrfs_trans_handle *trans,
                              u64 bytenr, u64 num_bytes, int alloc)
 {
@@ -6296,7 +4168,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                        old_val -= num_bytes;
                        btrfs_set_block_group_used(&cache->item, old_val);
                        cache->pinned += num_bytes;
-                        update_bytes_pinned(cache->space_info, num_bytes);
+                        btrfs_space_info_update_bytes_pinned(info,
+                                        cache->space_info, num_bytes);
                        cache->space_info->bytes_used -= num_bytes;
                        cache->space_info->disk_used -= num_bytes * factor;
                        spin_unlock(&cache->lock);
@@ -6371,7 +4244,8 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache,
        spin_lock(&cache->space_info->lock);
        spin_lock(&cache->lock);
        cache->pinned += num_bytes;
-        update_bytes_pinned(cache->space_info, num_bytes);
+        btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
+                                             num_bytes);
        if (reserved) {
                cache->reserved -= num_bytes;
                cache->space_info->bytes_reserved -= num_bytes;
@@ -6580,7 +4454,8 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
        } else {
                cache->reserved += num_bytes;
                space_info->bytes_reserved += num_bytes;
-                update_bytes_may_use(space_info, -ram_bytes);
+                btrfs_space_info_update_bytes_may_use(cache->fs_info,
+                                                      space_info, -ram_bytes);
                if (delalloc)
                        cache->delalloc_bytes += num_bytes;
        }
@@ -6646,7 +4521,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
        up_write(&fs_info->commit_root_sem);
-        update_global_block_rsv(fs_info);
+        btrfs_update_global_block_rsv(fs_info);
 }
 /*
@@ -6736,7 +4611,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
                spin_lock(&space_info->lock);
                spin_lock(&cache->lock);
                cache->pinned -= len;
-                update_bytes_pinned(space_info, -len);
+                btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
                trace_btrfs_space_reservation(fs_info, "pinned",
                                              space_info->flags, len, 0);
@@ -6757,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
                                to_add = min(len, global_rsv->size -
                                             global_rsv->reserved);
                                global_rsv->reserved += to_add;
-                                update_bytes_may_use(space_info, to_add);
+                                btrfs_space_info_update_bytes_may_use(fs_info,
+                                                space_info, to_add);
                                if (global_rsv->reserved >= global_rsv->size)
                                        global_rsv->full = 1;
                                trace_btrfs_space_reservation(fs_info,
@@ -6769,8 +4645,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
                        spin_unlock(&global_rsv->lock);
                        /* Add to any tickets we may have */
                        if (len)
-                                space_info_add_new_bytes(fs_info, space_info,
+                                btrfs_space_info_add_new_bytes(fs_info,
-                                                         len);
+                                                space_info, len);
                }
                spin_unlock(&space_info->lock);
        }
@@ -7191,7 +5067,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
        }
 out:
        if (pin)
-                add_pinned_bytes(fs_info, &generic_ref, 1);
+                add_pinned_bytes(fs_info, &generic_ref);
        if (last_ref) {
                /*
@@ -7239,7 +5115,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
                btrfs_ref_tree_mod(fs_info, ref);
        if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
-                add_pinned_bytes(fs_info, ref, 1);
+                add_pinned_bytes(fs_info, ref);
        return ret;
 }
@@ -7292,10 +5168,10 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 }
 enum btrfs_loop_type {
-        LOOP_CACHING_NOWAIT = 0,
+        LOOP_CACHING_NOWAIT,
-        LOOP_CACHING_WAIT = 1,
+        LOOP_CACHING_WAIT,
-        LOOP_ALLOC_CHUNK = 2,
+        LOOP_ALLOC_CHUNK,
-        LOOP_NO_EMPTY_SIZE = 3,
+        LOOP_NO_EMPTY_SIZE,
 };
 static inline void
@@ -7661,8 +5537,8 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
                                return ret;
                        }
-                        ret = do_chunk_alloc(trans, ffe_ctl->flags,
+                        ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
-                                             CHUNK_ALLOC_FORCE);
+                                                CHUNK_ALLOC_FORCE);
                        /*
                         * If we can't allocate a new chunk we've already looped
@@ -7758,7 +5634,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
        trace_find_free_extent(fs_info, num_bytes, empty_size, flags);
-        space_info = __find_space_info(fs_info, flags);
+        space_info = btrfs_find_space_info(fs_info, flags);
        if (!space_info) {
                btrfs_err(fs_info, "No space info for %llu", flags);
                return -ENOSPC;
@@ -7863,9 +5739,8 @@ search:
                 */
                if (!block_group_bits(block_group, flags)) {
                        u64 extra = BTRFS_BLOCK_GROUP_DUP |
-                                BTRFS_BLOCK_GROUP_RAID1 |
+                                BTRFS_BLOCK_GROUP_RAID1_MASK |
-                                BTRFS_BLOCK_GROUP_RAID5 |
+                                BTRFS_BLOCK_GROUP_RAID56_MASK |
-                                BTRFS_BLOCK_GROUP_RAID6 |
                                BTRFS_BLOCK_GROUP_RAID10;
                        /*
@@ -7984,60 +5859,6 @@ loop:
        return ret;
 }
-#define DUMP_BLOCK_RSV(fs_info, rsv_name)                               \
-do {                                                                    \
-        struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name;           \
-        spin_lock(&__rsv->lock);                                        \
-        btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu",      \
-                   __rsv->size, __rsv->reserved);                       \
-        spin_unlock(&__rsv->lock);                                      \
-} while (0)
-static void dump_space_info(struct btrfs_fs_info *fs_info,
-                            struct btrfs_space_info *info, u64 bytes,
-                            int dump_block_groups)
-{
-        struct btrfs_block_group_cache *cache;
-        int index = 0;
-        spin_lock(&info->lock);
-        btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
-                   info->flags,
-                   info->total_bytes - btrfs_space_info_used(info, true),
-                   info->full ? "" : "not ");
-        btrfs_info(fs_info,
-                "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
-                info->total_bytes, info->bytes_used, info->bytes_pinned,
-                info->bytes_reserved, info->bytes_may_use,
-                info->bytes_readonly);
-        spin_unlock(&info->lock);
-        DUMP_BLOCK_RSV(fs_info, global_block_rsv);
-        DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
-        DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
-        DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
-        DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
-        if (!dump_block_groups)
-                return;
-        down_read(&info->groups_sem);
-again:
-        list_for_each_entry(cache, &info->block_groups[index], list) {
-                spin_lock(&cache->lock);
-                btrfs_info(fs_info,
-                        "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
-                        cache->key.objectid, cache->key.offset,
-                        btrfs_block_group_used(&cache->item), cache->pinned,
-                        cache->reserved, cache->ro ? "[readonly]" : "");
-                btrfs_dump_free_space(cache, bytes);
-                spin_unlock(&cache->lock);
-        }
-        if (++index < BTRFS_NR_RAID_TYPES)
-                goto again;
-        up_read(&info->groups_sem);
-}
 /*
 * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
 *                        hole that is at least as big as @num_bytes.
@@ -8113,12 +5934,13 @@ again:
                } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
                        struct btrfs_space_info *sinfo;
-                        sinfo = __find_space_info(fs_info, flags);
+                        sinfo = btrfs_find_space_info(fs_info, flags);
                        btrfs_err(fs_info,
                                  "allocation failed flags %llu, wanted %llu",
                                  flags, num_bytes);
                        if (sinfo)
-                                dump_space_info(fs_info, sinfo, num_bytes, 1);
+                                btrfs_dump_space_info(fs_info, sinfo,
+                                                      num_bytes, 1);
                }
        }
@@ -8456,73 +6278,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        return buf;
 }
-static struct btrfs_block_rsv *
-use_block_rsv(struct btrfs_trans_handle *trans,
-              struct btrfs_root *root, u32 blocksize)
-{
-        struct btrfs_fs_info *fs_info = root->fs_info;
-        struct btrfs_block_rsv *block_rsv;
-        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
-        int ret;
-        bool global_updated = false;
-        block_rsv = get_block_rsv(trans, root);
-        if (unlikely(block_rsv->size == 0))
-                goto try_reserve;
-again:
-        ret = block_rsv_use_bytes(block_rsv, blocksize);
-        if (!ret)
-                return block_rsv;
-        if (block_rsv->failfast)
-                return ERR_PTR(ret);
-        if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
-                global_updated = true;
-                update_global_block_rsv(fs_info);
-                goto again;
-        }
-        /*
-         * The global reserve still exists to save us from ourselves, so don't
-         * warn_on if we are short on our delayed refs reserve.
-         */
-        if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
-            btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
-                static DEFINE_RATELIMIT_STATE(_rs,
-                                DEFAULT_RATELIMIT_INTERVAL * 10,
-                                /*DEFAULT_RATELIMIT_BURST*/ 1);
-                if (__ratelimit(&_rs))
-                        WARN(1, KERN_DEBUG
-                                "BTRFS: block rsv returned %d\n", ret);
-        }
-try_reserve:
-        ret = reserve_metadata_bytes(root, block_rsv, blocksize,
-                                     BTRFS_RESERVE_NO_FLUSH);
-        if (!ret)
-                return block_rsv;
-        /*
-         * If we couldn't reserve metadata bytes try and use some from
-         * the global reserve if its space type is the same as the global
-         * reservation.
-         */
-        if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
-            block_rsv->space_info == global_rsv->space_info) {
-                ret = block_rsv_use_bytes(global_rsv, blocksize);
-                if (!ret)
-                        return global_rsv;
-        }
-        return ERR_PTR(ret);
-}
-static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
-                            struct btrfs_block_rsv *block_rsv, u32 blocksize)
-{
-        block_rsv_add_bytes(block_rsv, blocksize, false);
-        block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
-}
 /*
 * finds a free extent and does all the dirty work required for allocation
 * returns the tree buffer or an ERR_PTR on error.
@@ -8555,7 +6310,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
        }
 #endif
-        block_rsv = use_block_rsv(trans, root, blocksize);
+        block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
        if (IS_ERR(block_rsv))
                return ERR_CAST(block_rsv);
@@ -8613,7 +6368,7 @@ out_free_buf:
 out_free_reserved:
        btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
 out_unuse:
-        unuse_block_rsv(fs_info, block_rsv, blocksize);
+        btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
        return ERR_PTR(ret);
 }
@@ -9552,9 +7307,8 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
        num_devices = fs_info->fs_devices->rw_devices;
-        stripped = BTRFS_BLOCK_GROUP_RAID0 |
+        stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK |
-                BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+                BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10;
-                BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
        if (num_devices == 1) {
                stripped |= BTRFS_BLOCK_GROUP_DUP;
@@ -9565,7 +7319,7 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
                        return stripped;
                /* turn mirroring into duplication */
-                if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+                if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK |
                             BTRFS_BLOCK_GROUP_RAID10))
                        return stripped | BTRFS_BLOCK_GROUP_DUP;
        } else {
@@ -9636,7 +7390,7 @@ out:
                btrfs_info(cache->fs_info,
                        "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
                        sinfo_used, num_bytes, min_allocable_bytes);
-                dump_space_info(cache->fs_info, cache->space_info, 0, 0);
+                btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
        }
        return ret;
 }
@@ -9678,8 +7432,7 @@ again:
         */
        alloc_flags = update_block_group_flags(fs_info, cache->flags);
        if (alloc_flags != cache->flags) {
-                ret = do_chunk_alloc(trans, alloc_flags,
+                ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
-                                     CHUNK_ALLOC_FORCE);
                /*
                 * ENOSPC is allowed here, we may have enough space
                 * already allocated at the new raid level to
@@ -9695,7 +7448,7 @@ again:
        if (!ret)
                goto out;
        alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
-        ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+        ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
        if (ret < 0)
                goto out;
        ret = inc_block_group_ro(cache, 0);
@@ -9716,7 +7469,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
 {
        u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
-        return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+        return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
 }
 /*
@@ -9949,7 +7702,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
                        struct extent_map_tree *em_tree;
                        struct extent_map *em;
-                        em_tree = &root->fs_info->mapping_tree.map_tree;
+                        em_tree = &root->fs_info->mapping_tree;
                        read_lock(&em_tree->lock);
                        em = lookup_extent_mapping(em_tree, found_key.objectid,
                                                   found_key.offset);
@@ -10102,7 +7855,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
         */
        synchronize_rcu();
-        release_global_block_rsv(info);
+        btrfs_release_global_block_rsv(info);
        while (!list_empty(&info->space_info)) {
                int i;
@@ -10118,7 +7871,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                if (WARN_ON(space_info->bytes_pinned > 0 ||
                            space_info->bytes_reserved > 0 ||
                            space_info->bytes_may_use > 0))
-                        dump_space_info(info, space_info, 0, 0);
+                        btrfs_dump_space_info(info, space_info, 0, 0);
                list_del(&space_info->list);
                for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
                        struct kobject *kobj;
@@ -10141,7 +7894,6 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
        struct btrfs_space_info *space_info;
        struct raid_kobject *rkobj;
        LIST_HEAD(list);
-        int index;
        int ret = 0;
        spin_lock(&fs_info->pending_raid_kobjs_lock);
@@ -10149,11 +7901,10 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
        spin_unlock(&fs_info->pending_raid_kobjs_lock);
        list_for_each_entry(rkobj, &list, list) {
-                space_info = __find_space_info(fs_info, rkobj->flags);
+                space_info = btrfs_find_space_info(fs_info, rkobj->flags);
-                index = btrfs_bg_flags_to_raid_index(rkobj->flags);
                ret = kobject_add(&rkobj->kobj, &space_info->kobj,
-                                  "%s", get_raid_name(index));
+                                "%s", btrfs_bg_type_to_raid_name(rkobj->flags));
                if (ret) {
                        kobject_put(&rkobj->kobj);
                        break;
@@ -10243,21 +7994,21 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
 */
 static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
 {
-        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+        struct extent_map_tree *map_tree = &fs_info->mapping_tree;
        struct extent_map *em;
        struct btrfs_block_group_cache *bg;
        u64 start = 0;
        int ret = 0;
        while (1) {
-                read_lock(&map_tree->map_tree.lock);
+                read_lock(&map_tree->lock);
                /*
                 * lookup_extent_mapping will return the first extent map
                 * intersecting the range, so setting @len to 1 is enough to
                 * get the first chunk.
                 */
-                em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+                em = lookup_extent_mapping(map_tree, start, 1);
-                read_unlock(&map_tree->map_tree.lock);
+                read_unlock(&map_tree->lock);
                if (!em)
                        break;
@@ -10417,9 +8168,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
                }
                trace_btrfs_add_block_group(info, cache, 0);
-                update_space_info(info, cache->flags, found_key.offset,
+                btrfs_update_space_info(info, cache->flags, found_key.offset,
-                                  btrfs_block_group_used(&cache->item),
+                                        btrfs_block_group_used(&cache->item),
-                                  cache->bytes_super, &space_info);
+                                        cache->bytes_super, &space_info);
                cache->space_info = space_info;
@@ -10437,9 +8188,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
        list_for_each_entry_rcu(space_info, &info->space_info, list) {
                if (!(get_alloc_profile(info, space_info->flags) &
                      (BTRFS_BLOCK_GROUP_RAID10 |
-                       BTRFS_BLOCK_GROUP_RAID1 |
+                       BTRFS_BLOCK_GROUP_RAID1_MASK |
-                       BTRFS_BLOCK_GROUP_RAID5 |
+                       BTRFS_BLOCK_GROUP_RAID56_MASK |
-                       BTRFS_BLOCK_GROUP_RAID6 |
                       BTRFS_BLOCK_GROUP_DUP)))
                        continue;
                /*
@@ -10457,7 +8207,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
        }
        btrfs_add_raid_kobjects(info);
-        init_global_block_rsv(info);
+        btrfs_init_global_block_rsv(info);
        ret = check_chunk_block_group_mappings(info);
 error:
        btrfs_free_path(path);
@@ -10554,7 +8304,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
         * assigned to our block group. We want our bg to be added to the rbtree
         * with its ->space_info set.
         */
-        cache->space_info = __find_space_info(fs_info, cache->flags);
+        cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
        ASSERT(cache->space_info);
        ret = btrfs_add_block_group_cache(fs_info, cache);
@@ -10569,9 +8319,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
         * the rbtree, update the space info's counters.
         */
        trace_btrfs_add_block_group(fs_info, cache, 1);
-        update_space_info(fs_info, cache->flags, size, bytes_used,
+        btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
                                cache->bytes_super, &cache->space_info);
-        update_global_block_rsv(fs_info);
+        btrfs_update_global_block_rsv(fs_info);
        link_block_group(cache);
@@ -10598,6 +8348,35 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
        write_sequnlock(&fs_info->profiles_lock);
 }
+/*
+ * Clear incompat bits for the following feature(s):
+ *
+ * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group
+ *            in the whole filesystem
+ */
+static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
+{
+        if (flags & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+                struct list_head *head = &fs_info->space_info;
+                struct btrfs_space_info *sinfo;
+                list_for_each_entry_rcu(sinfo, head, list) {
+                        bool found = false;
+                        down_read(&sinfo->groups_sem);
+                        if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
+                                found = true;
+                        if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
+                                found = true;
+                        up_read(&sinfo->groups_sem);
+                        if (found)
+                                return;
+                }
+                btrfs_clear_fs_incompat(fs_info, RAID56);
+        }
+}
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                             u64 group_start, struct extent_map *em)
 {
@@ -10744,6 +8523,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                clear_avail_alloc_bits(fs_info, block_group->flags);
        }
        up_write(&block_group->space_info->groups_sem);
+        clear_incompat_bg_bits(fs_info, block_group->flags);
        if (kobj) {
                kobject_del(kobj);
                kobject_put(kobj);
@@ -10853,7 +8633,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        if (remove_em) {
                struct extent_map_tree *em_tree;
-                em_tree = &fs_info->mapping_tree.map_tree;
+                em_tree = &fs_info->mapping_tree;
                write_lock(&em_tree->lock);
                remove_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
@@ -10871,7 +8651,7 @@ struct btrfs_trans_handle *
 btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
                                     const u64 chunk_offset)
 {
-        struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+        struct extent_map_tree *em_tree = &fs_info->mapping_tree;
        struct extent_map *em;
        struct map_lookup *map;
        unsigned int num_items;
@@ -11020,7 +8800,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                spin_lock(&space_info->lock);
                spin_lock(&block_group->lock);
-                update_bytes_pinned(space_info, -block_group->pinned);
+                btrfs_space_info_update_bytes_pinned(fs_info, space_info,
+                                                     -block_group->pinned);
                space_info->bytes_readonly += block_group->pinned;
                percpu_counter_add_batch(&space_info->total_bytes_pinned,
                                   -block_group->pinned,
@@ -11076,43 +8857,6 @@ next:
        spin_unlock(&fs_info->unused_bgs_lock);
 }
-int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
-{
-        struct btrfs_super_block *disk_super;
-        u64 features;
-        u64 flags;
-        int mixed = 0;
-        int ret;
-        disk_super = fs_info->super_copy;
-        if (!btrfs_super_root(disk_super))
-                return -EINVAL;
-        features = btrfs_super_incompat_flags(disk_super);
-        if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
-                mixed = 1;
-        flags = BTRFS_BLOCK_GROUP_SYSTEM;
-        ret = create_space_info(fs_info, flags);
-        if (ret)
-                goto out;
-        if (mixed) {
-                flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
-                ret = create_space_info(fs_info, flags);
-        } else {
-                flags = BTRFS_BLOCK_GROUP_METADATA;
-                ret = create_space_info(fs_info, flags);
-                if (ret)
-                        goto out;
-                flags = BTRFS_BLOCK_GROUP_DATA;
-                ret = create_space_info(fs_info, flags);
-        }
-out:
-        return ret;
-}
 int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
                                   u64 start, u64 end)
 {
@@ -11171,12 +8915,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
                find_first_clear_extent_bit(&device->alloc_state, start,
                                            &start, &end,
                                            CHUNK_TRIMMED | CHUNK_ALLOCATED);
+                /* Ensure we skip the reserved area in the first 1M */
+                start = max_t(u64, start, SZ_1M);
                /*
                 * If find_first_clear_extent_bit find a range that spans the
                 * end of the device it will set end to -1, in this case it's up
                 * to the caller to trim the value to the size of the device.
                 */
                end = min(end, device->total_bytes - 1);
                len = end - start + 1;
                /* We didn't find any extents */
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5106008f5e28..1ff438fd5bc2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -359,6 +359,24 @@ do_insert:
        return NULL;
 }
+/**
+ * __etree_search - searche @tree for an entry that contains @offset. Such
+ * entry would have entry->start <= offset && entry->end >= offset.
+ *
+ * @tree - the tree to search
+ * @offset - offset that should fall within an entry in @tree
+ * @next_ret - pointer to the first entry whose range ends after @offset
+ * @prev - pointer to the first entry whose range begins before @offset
+ * @p_ret - pointer where new node should be anchored (used when inserting an
+ *          entry in the tree)
+ * @parent_ret - points to entry which would have been the parent of the entry,
+ *               containing @offset
+ *
+ * This function returns a pointer to the entry that contains @offset byte
+ * address. If no such entry exists, then NULL is returned and the other
+ * pointer arguments to the function are filled, otherwise the found entry is
+ * returned and other pointers are left untouched.
+ */
 static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
                                      struct rb_node **next_ret,
                                      struct rb_node **prev_ret,
@@ -504,9 +522,11 @@ static int insert_state(struct extent_io_tree *tree,
 {
        struct rb_node *node;
-        if (end < start)
+        if (end < start) {
-                WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
+                btrfs_err(tree->fs_info,
-                       end, start);
+                        "insert state: end < start %llu %llu", end, start);
+                WARN_ON(1);
+        }
        state->start = start;
        state->end = end;
@@ -516,7 +536,8 @@ static int insert_state(struct extent_io_tree *tree,
        if (node) {
                struct extent_state *found;
                found = rb_entry(node, struct extent_state, rb_node);
-                pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
+                btrfs_err(tree->fs_info,
+                       "found node %llu %llu on insert of %llu %llu",
                       found->start, found->end, start, end);
                return -EEXIST;
        }
@@ -1537,8 +1558,8 @@ out:
 }
 /**
- * find_first_clear_extent_bit - finds the first range that has @bits not set
+ * find_first_clear_extent_bit - find the first range that has @bits not set.
- * and that starts after @start
+ * This range could start before @start.
 *
 * @tree - the tree to search
 * @start - the offset at/after which the found extent should start
@@ -1578,12 +1599,52 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
                                goto out;
                        }
                }
+                /*
+                 * At this point 'node' either contains 'start' or start is
+                 * before 'node'
+                 */
                state = rb_entry(node, struct extent_state, rb_node);
-                if (in_range(start, state->start, state->end - state->start + 1) &&
-                        (state->state & bits)) {
+                if (in_range(start, state->start, state->end - state->start + 1)) {
-                        start = state->end + 1;
+                        if (state->state & bits) {
+                                /*
+                                 * |--range with bits sets--|
+                                 *    |
+                                 *    start
+                                 */
+                                start = state->end + 1;
+                        } else {
+                                /*
+                                 * 'start' falls within a range that doesn't
+                                 * have the bits set, so take its start as
+                                 * the beginning of the desired range
+                                 *
+                                 * |--range with bits cleared----|
+                                 *      |
+                                 *      start
+                                 */
+                                *start_ret = state->start;
+                                break;
+                        }
                } else {
-                        *start_ret = start;
+                        /*
+                         * |---prev range---|---hole/unset---|---node range---|
+                         *                          |
+                         *                        start
+                         *
+                         *                        or
+                         *
+                         * |---hole/unset--||--first node--|
+                         * 0   |
+                         *    start
+                         */
+                        if (prev) {
+                                state = rb_entry(prev, struct extent_state,
+                                                 rb_node);
+                                *start_ret = state->end + 1;
+                        } else {
+                                *start_ret = 0;
+                        }
                        break;
                }
        }
@@ -1719,10 +1780,10 @@ static noinline int lock_delalloc_pages(struct inode *inode,
 */
 EXPORT_FOR_TESTS
 noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
-                                    struct extent_io_tree *tree,
                                    struct page *locked_page, u64 *start,
                                    u64 *end)
 {
+        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
        u64 delalloc_start;
        u64 delalloc_end;
@@ -2800,12 +2861,11 @@ static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
 * never fail.  We're returning a bio right now but you can call btrfs_io_bio
 * for the appropriate container_of magic
 */
-struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
+struct bio *btrfs_bio_alloc(u64 first_byte)
 {
        struct bio *bio;
        bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
-        bio_set_dev(bio, bdev);
        bio->bi_iter.bi_sector = first_byte >> 9;
        btrfs_io_bio_init(btrfs_io_bio(bio));
        return bio;
@@ -2916,7 +2976,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
                }
        }
-        bio = btrfs_bio_alloc(bdev, offset);
+        bio = btrfs_bio_alloc(offset);
+        bio_set_dev(bio, bdev);
        bio_add_page(bio, page, page_size, pg_offset);
        bio->bi_end_io = end_io_func;
        bio->bi_private = tree;
@@ -3204,21 +3265,10 @@ static inline void contiguous_readpages(struct extent_io_tree *tree,
                                             unsigned long *bio_flags,
                                             u64 *prev_em_start)
 {
-        struct inode *inode;
+        struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
-        struct btrfs_ordered_extent *ordered;
        int index;
-        inode = pages[0]->mapping->host;
+        btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
-        while (1) {
-                lock_extent(tree, start, end);
-                ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
-                                                     end - start + 1);
-                if (!ordered)
-                        break;
-                unlock_extent(tree, start, end);
-                btrfs_start_ordered_extent(inode, ordered, 1);
-                btrfs_put_ordered_extent(ordered);
-        }
        for (index = 0; index < nr_pages; index++) {
                __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
@@ -3234,22 +3284,12 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                                   unsigned long *bio_flags,
                                   unsigned int read_flags)
 {
-        struct inode *inode = page->mapping->host;
+        struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
-        struct btrfs_ordered_extent *ordered;
        u64 start = page_offset(page);
        u64 end = start + PAGE_SIZE - 1;
        int ret;
-        while (1) {
+        btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
-                lock_extent(tree, start, end);
-                ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
-                                                PAGE_SIZE);
-                if (!ordered)
-                        break;
-                unlock_extent(tree, start, end);
-                btrfs_start_ordered_extent(inode, ordered, 1);
-                btrfs_put_ordered_extent(ordered);
-        }
        ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
                            bio_flags, read_flags, NULL);
@@ -3290,7 +3330,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
                struct page *page, struct writeback_control *wbc,
                u64 delalloc_start, unsigned long *nr_written)
 {
-        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        u64 page_end = delalloc_start + PAGE_SIZE - 1;
        bool found;
        u64 delalloc_to_write = 0;
@@ -3300,8 +3339,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
        while (delalloc_end < page_end) {
-                found = find_lock_delalloc_range(inode, tree,
+                found = find_lock_delalloc_range(inode, page,
-                                               page,
                                               &delalloc_start,
                                               &delalloc_end);
                if (!found) {
@@ -3310,7 +3348,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
                }
                ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
                                delalloc_end, &page_started, nr_written, wbc);
-                /* File system has been set read-only */
                if (ret) {
                        SetPageError(page);
                        /*
@@ -4542,6 +4579,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        struct btrfs_path *path;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct fiemap_cache cache = { 0 };
+        struct ulist *roots;
+        struct ulist *tmp_ulist;
        int end = 0;
        u64 em_start = 0;
        u64 em_len = 0;
@@ -4555,6 +4594,13 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                return -ENOMEM;
        path->leave_spinning = 1;
+        roots = ulist_alloc(GFP_KERNEL);
+        tmp_ulist = ulist_alloc(GFP_KERNEL);
+        if (!roots || !tmp_ulist) {
+                ret = -ENOMEM;
+                goto out_free_ulist;
+        }
        start = round_down(start, btrfs_inode_sectorsize(inode));
        len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
@@ -4565,8 +4611,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        ret = btrfs_lookup_file_extent(NULL, root, path,
                        btrfs_ino(BTRFS_I(inode)), -1, 0);
        if (ret < 0) {
-                btrfs_free_path(path);
+                goto out_free_ulist;
-                return ret;
        } else {
                WARN_ON(!ret);
                if (ret == 1)
@@ -4675,7 +4720,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         */
                        ret = btrfs_check_shared(root,
                                                 btrfs_ino(BTRFS_I(inode)),
-                                                 bytenr);
+                                                 bytenr, roots, tmp_ulist);
                        if (ret < 0)
                                goto out_free;
                        if (ret)
@@ -4718,9 +4763,13 @@ out_free:
                ret = emit_last_fiemap_cache(fieinfo, &cache);
        free_extent_map(em);
 out:
-        btrfs_free_path(path);
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
                             &cached_state);
+out_free_ulist:
+        btrfs_free_path(path);
+        ulist_free(roots);
+        ulist_free(tmp_ulist);
        return ret;
 }
@@ -4808,7 +4857,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
        eb->bflags = 0;
        rwlock_init(&eb->lock);
        atomic_set(&eb->blocking_readers, 0);
-        atomic_set(&eb->blocking_writers, 0);
+        eb->blocking_writers = 0;
        eb->lock_nested = false;
        init_waitqueue_head(&eb->write_lock_wq);
        init_waitqueue_head(&eb->read_lock_wq);
@@ -4827,10 +4876,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
        BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
 #ifdef CONFIG_BTRFS_DEBUG
-        atomic_set(&eb->spinning_writers, 0);
+        eb->spinning_writers = 0;
        atomic_set(&eb->spinning_readers, 0);
        atomic_set(&eb->read_locks, 0);
-        atomic_set(&eb->write_locks, 0);
+        eb->write_locks = 0;
 #endif
        return eb;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index aa18a16a6ed7..401423b16976 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -167,7 +167,7 @@ struct extent_buffer {
        struct rcu_head rcu_head;
        pid_t lock_owner;
-        atomic_t blocking_writers;
+        int blocking_writers;
        atomic_t blocking_readers;
        bool lock_nested;
        /* >= 0 if eb belongs to a log tree, -1 otherwise */
@@ -187,10 +187,10 @@ struct extent_buffer {
        wait_queue_head_t read_lock_wq;
        struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
 #ifdef CONFIG_BTRFS_DEBUG
-        atomic_t spinning_writers;
+        int spinning_writers;
        atomic_t spinning_readers;
        atomic_t read_locks;
-        atomic_t write_locks;
+        int write_locks;
        struct list_head leak_list;
 #endif
 };
@@ -497,7 +497,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
                                 u64 delalloc_end, struct page *locked_page,
                                 unsigned bits_to_clear,
                                 unsigned long page_ops);
-struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte);
+struct bio *btrfs_bio_alloc(u64 first_byte);
 struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
 struct bio *btrfs_bio_clone(struct bio *bio);
 struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size);
@@ -549,7 +549,7 @@ int free_io_failure(struct extent_io_tree *failure_tree,
                    struct extent_io_tree *io_tree,
                    struct io_failure_record *rec);
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-bool find_lock_delalloc_range(struct inode *inode, struct extent_io_tree *tree,
+bool find_lock_delalloc_range(struct inode *inode,
                             struct page *locked_page, u64 *start,
                             u64 *end);
 #endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index d431ea8198e4..1a599f50837b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -8,6 +8,7 @@
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/sched/mm.h>
+#include <crypto/hash.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -22,9 +23,13 @@
 #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
                                       PAGE_SIZE))
-#define MAX_ORDERED_SUM_BYTES(fs_info) ((PAGE_SIZE - \
+static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
-                                   sizeof(struct btrfs_ordered_sum)) / \
+                                        u16 csum_size)
-                                   sizeof(u32) * (fs_info)->sectorsize)
+{
+        u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size;
+        return ncsums * fs_info->sectorsize;
+}
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
@@ -144,7 +149,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 }
 static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
-                                   u64 logical_offset, u32 *dst, int dio)
+                                   u64 logical_offset, u8 *dst, int dio)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct bio_vec bvec;
@@ -182,7 +187,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
                }
                csum = btrfs_bio->csum;
        } else {
-                csum = (u8 *)dst;
+                csum = dst;
        }
        if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
@@ -211,7 +216,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
                if (!dio)
                        offset = page_offset(bvec.bv_page) + bvec.bv_offset;
                count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
-                                               (u32 *)csum, nblocks);
+                                               csum, nblocks);
                if (count)
                        goto found;
@@ -283,7 +288,8 @@ next:
        return 0;
 }
-blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+                                   u8 *dst)
 {
        return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
 }
@@ -374,7 +380,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                                      struct btrfs_csum_item);
                while (start < csum_end) {
                        size = min_t(size_t, csum_end - start,
-                                     MAX_ORDERED_SUM_BYTES(fs_info));
+                                     max_ordered_sum_bytes(fs_info, csum_size));
                        sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
                                       GFP_NOFS);
                        if (!sums) {
@@ -427,6 +433,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                       u64 file_start, int contig)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        struct btrfs_ordered_sum *sums;
        struct btrfs_ordered_extent *ordered = NULL;
        char *data;
@@ -439,6 +446,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
        int i;
        u64 offset;
        unsigned nofs_flag;
+        const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        nofs_flag = memalloc_nofs_save();
        sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
@@ -459,6 +467,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
        sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
        index = 0;
+        shash->tfm = fs_info->csum_shash;
        bio_for_each_segment(bvec, bio, iter) {
                if (!contig)
                        offset = page_offset(bvec.bv_page) + bvec.bv_offset;
@@ -498,17 +508,14 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
                                index = 0;
                        }
-                        sums->sums[index] = ~(u32)0;
+                        crypto_shash_init(shash);
                        data = kmap_atomic(bvec.bv_page);
-                        sums->sums[index]
+                        crypto_shash_update(shash, data + bvec.bv_offset
-                                = btrfs_csum_data(data + bvec.bv_offset
+                                            + (i * fs_info->sectorsize),
-                                                + (i * fs_info->sectorsize),
+                                            fs_info->sectorsize);
-                                                sums->sums[index],
-                                                fs_info->sectorsize);
                        kunmap_atomic(data);
-                        btrfs_csum_final(sums->sums[index],
+                        crypto_shash_final(shash, (char *)(sums->sums + index));
-                                        (char *)(sums->sums + index));
+                        index += csum_size;
-                        index++;
                        offset += fs_info->sectorsize;
                        this_sum_bytes += fs_info->sectorsize;
                        total_bytes += fs_info->sectorsize;
@@ -904,9 +911,9 @@ found:
        write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
                            ins_size);
+        index += ins_size;
        ins_size /= csum_size;
        total_bytes += ins_size * fs_info->sectorsize;
-        index += ins_size;
        btrfs_mark_buffer_dirty(path->nodes[0]);
        if (total_bytes < sums->len) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 89f5be2bfb43..58a18ed11546 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -26,6 +26,7 @@
 #include "volumes.h"
 #include "qgroup.h"
 #include "compression.h"
+#include "delalloc-space.h"
 static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
@@ -1550,30 +1551,20 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_root *root = inode->root;
-        struct btrfs_ordered_extent *ordered;
        u64 lockstart, lockend;
        u64 num_bytes;
        int ret;
        ret = btrfs_start_write_no_snapshotting(root);
        if (!ret)
-                return -ENOSPC;
+                return -EAGAIN;
        lockstart = round_down(pos, fs_info->sectorsize);
        lockend = round_up(pos + *write_bytes,
                           fs_info->sectorsize) - 1;
-        while (1) {
+        btrfs_lock_and_flush_ordered_range(&inode->io_tree, inode, lockstart,
-                lock_extent(&inode->io_tree, lockstart, lockend);
+                                           lockend, NULL);
-                ordered = btrfs_lookup_ordered_range(inode, lockstart,
-                                                     lockend - lockstart + 1);
-                if (!ordered) {
-                        break;
-                }
-                unlock_extent(&inode->io_tree, lockstart, lockend);
-                btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
-                btrfs_put_ordered_extent(ordered);
-        }
        num_bytes = lockend - lockstart + 1;
        ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
@@ -2721,6 +2712,11 @@ out_only_mutex:
                 * for detecting, at fsync time, if the inode isn't yet in the
                 * log tree or it's there but not up to date.
                 */
+                struct timespec64 now = current_time(inode);
+                inode_inc_iversion(inode);
+                inode->i_mtime = now;
+                inode->i_ctime = now;
                trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
                        err = PTR_ERR(trans);
@@ -2801,9 +2797,9 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
 }
 enum {
-        RANGE_BOUNDARY_WRITTEN_EXTENT = 0,
+        RANGE_BOUNDARY_WRITTEN_EXTENT,
-        RANGE_BOUNDARY_PREALLOC_EXTENT = 1,
+        RANGE_BOUNDARY_PREALLOC_EXTENT,
-        RANGE_BOUNDARY_HOLE = 2,
+        RANGE_BOUNDARY_HOLE,
 };
 static int btrfs_zero_range_check_range_boundary(struct inode *inode,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f74dc259307b..062be9dde4c6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,8 @@
 #include "extent_io.h"
 #include "inode-map.h"
 #include "volumes.h"
+#include "space-info.h"
+#include "delalloc-space.h"
 #define BITS_PER_BITMAP         (PAGE_SIZE * 8UL)
 #define MAX_CACHE_BYTES_PER_GIG SZ_32K
@@ -465,9 +467,8 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
        if (index == 0)
                offset = sizeof(u32) * io_ctl->num_pages;
-        crc = btrfs_csum_data(io_ctl->orig + offset, crc,
+        crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
-                              PAGE_SIZE - offset);
+        btrfs_crc32c_final(crc, (u8 *)&crc);
-        btrfs_csum_final(crc, (u8 *)&crc);
        io_ctl_unmap_page(io_ctl);
        tmp = page_address(io_ctl->pages[0]);
        tmp += index;
@@ -493,9 +494,8 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
        val = *tmp;
        io_ctl_map_page(io_ctl, 0);
-        crc = btrfs_csum_data(io_ctl->orig + offset, crc,
+        crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
-                              PAGE_SIZE - offset);
+        btrfs_crc32c_final(crc, (u8 *)&crc);
-        btrfs_csum_final(crc, (u8 *)&crc);
        if (val != crc) {
                btrfs_err_rl(io_ctl->fs_info,
                        "csum mismatch on free space cache");
@@ -3166,8 +3166,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
                        space_info->bytes_readonly += reserved_bytes;
                block_group->reserved -= reserved_bytes;
                space_info->bytes_reserved -= reserved_bytes;
-                spin_unlock(&space_info->lock);
                spin_unlock(&block_group->lock);
+                spin_unlock(&space_info->lock);
        }
        return ret;
@@ -3358,7 +3358,7 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
        if (cleanup) {
                mutex_lock(&fs_info->chunk_mutex);
-                em_tree = &fs_info->mapping_tree.map_tree;
+                em_tree = &fs_info->mapping_tree;
                write_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, block_group->key.objectid,
                                           1);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ffca2abf13d0..2e8bb402050b 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -11,6 +11,7 @@
 #include "free-space-cache.h"
 #include "inode-map.h"
 #include "transaction.h"
+#include "delalloc-space.h"
 static int caching_kthread(void *data)
 {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a2aabdb85226..1af069a9a0c7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -47,6 +47,7 @@
 #include "props.h"
 #include "qgroup.h"
 #include "dedupe.h"
+#include "delalloc-space.h"
 struct btrfs_iget_args {
        struct btrfs_key *location;
@@ -1932,17 +1933,19 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
        u64 length = 0;
        u64 map_length;
        int ret;
+        struct btrfs_io_geometry geom;
        if (bio_flags & EXTENT_BIO_COMPRESSED)
                return 0;
        length = bio->bi_iter.bi_size;
        map_length = length;
-        ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
+        ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
-                              NULL, 0);
+                                    &geom);
        if (ret < 0)
                return ret;
-        if (map_length < length + size)
+        if (geom.len < length + size)
                return 1;
        return 0;
 }
@@ -3203,16 +3206,23 @@ static int __readpage_endio_check(struct inode *inode,
                                  int icsum, struct page *page,
                                  int pgoff, u64 start, size_t len)
 {
+        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        char *kaddr;
-        u32 csum_expected;
+        u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
-        u32 csum = ~(u32)0;
+        u8 *csum_expected;
+        u8 csum[BTRFS_CSUM_SIZE];
-        csum_expected = *(((u32 *)io_bio->csum) + icsum);
+        csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
        kaddr = kmap_atomic(page);
-        csum = btrfs_csum_data(kaddr + pgoff, csum,  len);
+        shash->tfm = fs_info->csum_shash;
-        btrfs_csum_final(csum, (u8 *)&csum);
-        if (csum != csum_expected)
+        crypto_shash_init(shash);
+        crypto_shash_update(shash, kaddr + pgoff, len);
+        crypto_shash_final(shash, csum);
+        if (memcmp(csum, csum_expected, csum_size))
                goto zeroit;
        kunmap_atomic(kaddr);
@@ -3286,6 +3296,28 @@ void btrfs_add_delayed_iput(struct inode *inode)
                wake_up_process(fs_info->cleaner_kthread);
 }
+static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_inode *inode)
+{
+        list_del_init(&inode->delayed_iput);
+        spin_unlock(&fs_info->delayed_iput_lock);
+        iput(&inode->vfs_inode);
+        if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
+                wake_up(&fs_info->delayed_iputs_wait);
+        spin_lock(&fs_info->delayed_iput_lock);
+}
+static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
+                                   struct btrfs_inode *inode)
+{
+        if (!list_empty(&inode->delayed_iput)) {
+                spin_lock(&fs_info->delayed_iput_lock);
+                if (!list_empty(&inode->delayed_iput))
+                        run_delayed_iput_locked(fs_info, inode);
+                spin_unlock(&fs_info->delayed_iput_lock);
+        }
+}
 void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 {
@@ -3295,12 +3327,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
                inode = list_first_entry(&fs_info->delayed_iputs,
                                struct btrfs_inode, delayed_iput);
-                list_del_init(&inode->delayed_iput);
+                run_delayed_iput_locked(fs_info, inode);
-                spin_unlock(&fs_info->delayed_iput_lock);
-                iput(&inode->vfs_inode);
-                if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
-                        wake_up(&fs_info->delayed_iputs_wait);
-                spin_lock(&fs_info->delayed_iput_lock);
        }
        spin_unlock(&fs_info->delayed_iput_lock);
 }
@@ -3935,9 +3962,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_path *path;
        int ret = 0;
-        struct extent_buffer *leaf;
        struct btrfs_dir_item *di;
-        struct btrfs_key key;
        u64 index;
        u64 ino = btrfs_ino(inode);
        u64 dir_ino = btrfs_ino(dir);
@@ -3955,8 +3980,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
                ret = di ? PTR_ERR(di) : -ENOENT;
                goto err;
        }
-        leaf = path->nodes[0];
-        btrfs_dir_item_key_to_cpu(leaf, di, &key);
        ret = btrfs_delete_one_dir_name(trans, root, path, di);
        if (ret)
                goto err;
@@ -4009,6 +4032,17 @@ skip_backref:
                ret = 0;
        else if (ret)
                btrfs_abort_transaction(trans, ret);
+        /*
+         * If we have a pending delayed iput we could end up with the final iput
+         * being run in btrfs-cleaner context.  If we have enough of these built
+         * up we can end up burning a lot of time in btrfs-cleaner without any
+         * way to throttle the unlinks.  Since we're currently holding a ref on
+         * the inode we can run the delayed iput here without any issues as the
+         * final iput won't be done until after we drop the ref we're currently
+         * holding.
+         */
+        btrfs_run_delayed_iput(fs_info, inode);
 err:
        btrfs_free_path(path);
        if (ret)
@@ -5008,21 +5042,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
        if (size <= hole_start)
                return 0;
-        while (1) {
+        btrfs_lock_and_flush_ordered_range(io_tree, BTRFS_I(inode), hole_start,
-                struct btrfs_ordered_extent *ordered;
+                                           block_end - 1, &cached_state);
-                lock_extent_bits(io_tree, hole_start, block_end - 1,
-                                 &cached_state);
-                ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
-                                                     block_end - hole_start);
-                if (!ordered)
-                        break;
-                unlock_extent_cached(io_tree, hole_start, block_end - 1,
-                                     &cached_state);
-                btrfs_start_ordered_extent(inode, ordered, 1);
-                btrfs_put_ordered_extent(ordered);
-        }
        cur_offset = hole_start;
        while (1) {
                em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
@@ -8318,22 +8339,21 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
        struct bio *orig_bio = dip->orig_bio;
        u64 start_sector = orig_bio->bi_iter.bi_sector;
        u64 file_offset = dip->logical_offset;
-        u64 map_length;
        int async_submit = 0;
        u64 submit_len;
        int clone_offset = 0;
        int clone_len;
        int ret;
        blk_status_t status;
+        struct btrfs_io_geometry geom;
-        map_length = orig_bio->bi_iter.bi_size;
+        submit_len = orig_bio->bi_iter.bi_size;
-        submit_len = map_length;
+        ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
-        ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
+                                    start_sector << 9, submit_len, &geom);
-                              &map_length, NULL, 0);
        if (ret)
                return -EIO;
-        if (map_length >= submit_len) {
+        if (geom.len >= submit_len) {
                bio = orig_bio;
                dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
                goto submit;
@@ -8346,10 +8366,10 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
                async_submit = 1;
        /* bio split */
-        ASSERT(map_length <= INT_MAX);
+        ASSERT(geom.len <= INT_MAX);
        atomic_inc(&dip->pending_bios);
        do {
-                clone_len = min_t(int, submit_len, map_length);
+                clone_len = min_t(int, submit_len, geom.len);
                /*
                 * This will never fail as it's passing GPF_NOFS and
@@ -8386,9 +8406,8 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
                start_sector += clone_len >> 9;
                file_offset += clone_len;
-                map_length = submit_len;
+                ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
-                ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
+                                      start_sector << 9, submit_len, &geom);
-                                      start_sector << 9, &map_length, NULL, 0);
                if (ret)
                        goto out_err;
        } while (submit_len > 0);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cfeff1b8dce0..818f7ec8bb0e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -43,6 +43,8 @@
 #include "qgroup.h"
 #include "tree-log.h"
 #include "compression.h"
+#include "space-info.h"
+#include "delalloc-space.h"
 #ifdef CONFIG_64BIT
 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -3993,6 +3995,27 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
        if (!same_inode)
                inode_dio_wait(inode_out);
+        /*
+         * Workaround to make sure NOCOW buffered write reach disk as NOCOW.
+         *
+         * Btrfs' back references do not have a block level granularity, they
+         * work at the whole extent level.
+         * NOCOW buffered write without data space reserved may not be able
+         * to fall back to CoW due to lack of data space, thus could cause
+         * data loss.
+         *
+         * Here we take a shortcut by flushing the whole inode, so that all
+         * nocow write should reach disk as nocow before we increase the
+         * reference of the extent. We could do better by only flushing NOCOW
+         * data, but that needs extra accounting.
+         *
+         * Also we don't need to check ASYNC_EXTENT, as async extent will be
+         * CoWed anyway, not affecting nocow part.
+         */
+        ret = filemap_flush(inode_in->i_mapping);
+        if (ret < 0)
+                return ret;
        ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
                                       wb_len);
        if (ret < 0)
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 2f6c3c7851ed..98fccce4208c 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -15,19 +15,19 @@
 #ifdef CONFIG_BTRFS_DEBUG
 static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
 {
-        WARN_ON(atomic_read(&eb->spinning_writers));
+        WARN_ON(eb->spinning_writers);
-        atomic_inc(&eb->spinning_writers);
+        eb->spinning_writers++;
 }
 static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
 {
-        WARN_ON(atomic_read(&eb->spinning_writers) != 1);
+        WARN_ON(eb->spinning_writers != 1);
-        atomic_dec(&eb->spinning_writers);
+        eb->spinning_writers--;
 }
 static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
 {
-        WARN_ON(atomic_read(&eb->spinning_writers));
+        WARN_ON(eb->spinning_writers);
 }
 static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
@@ -58,17 +58,17 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
 static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
 {
-        atomic_inc(&eb->write_locks);
+        eb->write_locks++;
 }
 static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
 {
-        atomic_dec(&eb->write_locks);
+        eb->write_locks--;
 }
 void btrfs_assert_tree_locked(struct extent_buffer *eb)
 {
-        BUG_ON(!atomic_read(&eb->write_locks));
+        BUG_ON(!eb->write_locks);
 }
 #else
@@ -111,10 +111,10 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
         */
        if (eb->lock_nested && current->pid == eb->lock_owner)
                return;
-        if (atomic_read(&eb->blocking_writers) == 0) {
+        if (eb->blocking_writers == 0) {
                btrfs_assert_spinning_writers_put(eb);
                btrfs_assert_tree_locked(eb);
-                atomic_inc(&eb->blocking_writers);
+                eb->blocking_writers++;
                write_unlock(&eb->lock);
        }
 }
@@ -148,12 +148,11 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
         */
        if (eb->lock_nested && current->pid == eb->lock_owner)
                return;
-        BUG_ON(atomic_read(&eb->blocking_writers) != 1);
        write_lock(&eb->lock);
+        BUG_ON(eb->blocking_writers != 1);
        btrfs_assert_spinning_writers_get(eb);
-        /* atomic_dec_and_test implies a barrier */
+        if (--eb->blocking_writers == 0)
-        if (atomic_dec_and_test(&eb->blocking_writers))
+                cond_wake_up(&eb->write_lock_wq);
-                cond_wake_up_nomb(&eb->write_lock_wq);
 }
 /*
@@ -167,12 +166,10 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
        if (trace_btrfs_tree_read_lock_enabled())
                start_ns = ktime_get_ns();
 again:
-        BUG_ON(!atomic_read(&eb->blocking_writers) &&
-               current->pid == eb->lock_owner);
        read_lock(&eb->lock);
-        if (atomic_read(&eb->blocking_writers) &&
+        BUG_ON(eb->blocking_writers == 0 &&
-            current->pid == eb->lock_owner) {
+               current->pid == eb->lock_owner);
+        if (eb->blocking_writers && current->pid == eb->lock_owner) {
                /*
                 * This extent is already write-locked by our thread. We allow
                 * an additional read lock to be added because it's for the same
@@ -185,10 +182,10 @@ again:
                trace_btrfs_tree_read_lock(eb, start_ns);
                return;
        }
-        if (atomic_read(&eb->blocking_writers)) {
+        if (eb->blocking_writers) {
                read_unlock(&eb->lock);
                wait_event(eb->write_lock_wq,
-                           atomic_read(&eb->blocking_writers) == 0);
+                           eb->blocking_writers == 0);
                goto again;
        }
        btrfs_assert_tree_read_locks_get(eb);
@@ -203,11 +200,11 @@ again:
 */
 int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
 {
-        if (atomic_read(&eb->blocking_writers))
+        if (eb->blocking_writers)
                return 0;
        read_lock(&eb->lock);
-        if (atomic_read(&eb->blocking_writers)) {
+        if (eb->blocking_writers) {
                read_unlock(&eb->lock);
                return 0;
        }
@@ -223,13 +220,13 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
 */
 int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 {
-        if (atomic_read(&eb->blocking_writers))
+        if (eb->blocking_writers)
                return 0;
        if (!read_trylock(&eb->lock))
                return 0;
-        if (atomic_read(&eb->blocking_writers)) {
+        if (eb->blocking_writers) {
                read_unlock(&eb->lock);
                return 0;
        }
@@ -245,13 +242,11 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 */
 int btrfs_try_tree_write_lock(struct extent_buffer *eb)
 {
-        if (atomic_read(&eb->blocking_writers) ||
+        if (eb->blocking_writers || atomic_read(&eb->blocking_readers))
-            atomic_read(&eb->blocking_readers))
                return 0;
        write_lock(&eb->lock);
-        if (atomic_read(&eb->blocking_writers) ||
+        if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) {
-            atomic_read(&eb->blocking_readers)) {
                write_unlock(&eb->lock);
                return 0;
        }
@@ -322,10 +317,9 @@ void btrfs_tree_lock(struct extent_buffer *eb)
        WARN_ON(eb->lock_owner == current->pid);
 again:
        wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
-        wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
+        wait_event(eb->write_lock_wq, eb->blocking_writers == 0);
        write_lock(&eb->lock);
-        if (atomic_read(&eb->blocking_readers) ||
+        if (atomic_read(&eb->blocking_readers) || eb->blocking_writers) {
-            atomic_read(&eb->blocking_writers)) {
                write_unlock(&eb->lock);
                goto again;
        }
@@ -340,7 +334,7 @@ again:
 */
 void btrfs_tree_unlock(struct extent_buffer *eb)
 {
-        int blockers = atomic_read(&eb->blocking_writers);
+        int blockers = eb->blocking_writers;
        BUG_ON(blockers > 1);
@@ -351,7 +345,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
        if (blockers) {
                btrfs_assert_no_spinning_writers(eb);
-                atomic_dec(&eb->blocking_writers);
+                eb->blocking_writers--;
                /* Use the lighter barrier after atomic */
                smp_mb__after_atomic();
                cond_wake_up_nomb(&eb->write_lock_wq);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 52889da69113..1744ba8b2754 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -13,6 +13,7 @@
 #include "extent_io.h"
 #include "disk-io.h"
 #include "compression.h"
+#include "delalloc-space.h"
 static struct kmem_cache *btrfs_ordered_extent_cache;
@@ -924,14 +925,16 @@ out:
 * be reclaimed before their checksum is actually put into the btree
 */
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
-                           u32 *sum, int len)
+                           u8 *sum, int len)
 {
+        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_ordered_sum *ordered_sum;
        struct btrfs_ordered_extent *ordered;
        struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
        unsigned long num_sectors;
        unsigned long i;
        u32 sectorsize = btrfs_inode_sectorsize(inode);
+        const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        int index = 0;
        ordered = btrfs_lookup_ordered_extent(inode, offset);
@@ -947,10 +950,10 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
                        num_sectors = ordered_sum->len >>
                                      inode->i_sb->s_blocksize_bits;
                        num_sectors = min_t(int, len - index, num_sectors - i);
-                        memcpy(sum + index, ordered_sum->sums + i,
+                        memcpy(sum + index, ordered_sum->sums + i * csum_size,
-                               num_sectors);
+                               num_sectors * csum_size);
-                        index += (int)num_sectors;
+                        index += (int)num_sectors * csum_size;
                        if (index == len)
                                goto out;
                        disk_bytenr += num_sectors * sectorsize;
@@ -962,6 +965,51 @@ out:
        return index;
 }
+/*
+ * btrfs_flush_ordered_range - Lock the passed range and ensures all pending
+ * ordered extents in it are run to completion.
+ *
+ * @tree:         IO tree used for locking out other users of the range
+ * @inode:        Inode whose ordered tree is to be searched
+ * @start:        Beginning of range to flush
+ * @end:          Last byte of range to lock
+ * @cached_state: If passed, will return the extent state responsible for the
+ * locked range. It's the caller's responsibility to free the cached state.
+ *
+ * This function always returns with the given range locked, ensuring after it's
+ * called no order extent can be pending.
+ */
+void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
+                                        struct btrfs_inode *inode, u64 start,
+                                        u64 end,
+                                        struct extent_state **cached_state)
+{
+        struct btrfs_ordered_extent *ordered;
+        struct extent_state *cachedp = NULL;
+        if (cached_state)
+                cachedp = *cached_state;
+        while (1) {
+                lock_extent_bits(tree, start, end, &cachedp);
+                ordered = btrfs_lookup_ordered_range(inode, start,
+                                                     end - start + 1);
+                if (!ordered) {
+                        /*
+                         * If no external cached_state has been passed then
+                         * decrement the extra ref taken for cachedp since we
+                         * aren't exposing it outside of this function
+                         */
+                        if (!cached_state)
+                                refcount_dec(&cachedp->refs);
+                        break;
+                }
+                unlock_extent_cached(tree, start, end, &cachedp);
+                btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
+                btrfs_put_ordered_extent(ordered);
+        }
+}
 int __init ordered_data_init(void)
 {
        btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4c5991c3de14..5204171ea962 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -23,7 +23,7 @@ struct btrfs_ordered_sum {
        int len;
        struct list_head list;
        /* last field is a variable length array of csums */
-        u32 sums[];
+        u8 sums[];
 };
 /*
@@ -183,11 +183,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                                struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
-                           u32 *sum, int len);
+                           u8 *sum, int len);
 u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
                               const u64 range_start, const u64 range_len);
 u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
                              const u64 range_start, const u64 range_len);
+void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
+                                        struct btrfs_inode *inode, u64 start,
+                                        u64 end,
+                                        struct extent_state **cached_state);
 int __init ordered_data_init(void);
 void __cold ordered_data_exit(void);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 1141ca5fae6a..9cb50577d982 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -153,11 +153,11 @@ static void print_eb_refs_lock(struct extent_buffer *eb)
 #ifdef CONFIG_BTRFS_DEBUG
        btrfs_info(eb->fs_info,
 "refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
-                   atomic_read(&eb->refs), atomic_read(&eb->write_locks),
+                   atomic_read(&eb->refs), eb->write_locks,
                   atomic_read(&eb->read_locks),
-                   atomic_read(&eb->blocking_writers),
+                   eb->blocking_writers,
                   atomic_read(&eb->blocking_readers),
-                   atomic_read(&eb->spinning_writers),
+                   eb->spinning_writers,
                   atomic_read(&eb->spinning_readers),
                   eb->lock_owner, current->pid);
 #endif
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index a9e2e66152ee..e0469816c678 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -257,11 +257,7 @@ static int prop_compression_validate(const char *value, size_t len)
        if (!value)
                return 0;
-        if (!strncmp("lzo", value, 3))
+        if (btrfs_compress_is_valid_type(value, len))
-                return 0;
-        else if (!strncmp("zlib", value, 4))
-                return 0;
-        else if (!strncmp("zstd", value, 4))
                return 0;
        return -EINVAL;
@@ -341,7 +337,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
        for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
                const struct prop_handler *h = &prop_handlers[i];
                const char *value;
-                u64 num_bytes;
+                u64 num_bytes = 0;
                if (!h->inheritable)
                        continue;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 3e6ffbbd8b0a..f8a3c1b0a15a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2614,6 +2614,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
        int ret = 0;
        int i;
        u64 *i_qgroups;
+        bool committing = false;
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *srcgroup;
@@ -2621,7 +2622,25 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
        u32 level_size = 0;
        u64 nums;
-        mutex_lock(&fs_info->qgroup_ioctl_lock);
+        /*
+         * There are only two callers of this function.
+         *
+         * One in create_subvol() in the ioctl context, which needs to hold
+         * the qgroup_ioctl_lock.
+         *
+         * The other one in create_pending_snapshot() where no other qgroup
+         * code can modify the fs as they all need to either start a new trans
+         * or hold a trans handler, thus we don't need to hold
+         * qgroup_ioctl_lock.
+         * This would avoid long and complex lock chain and make lockdep happy.
+         */
+        spin_lock(&fs_info->trans_lock);
+        if (trans->transaction->state == TRANS_STATE_COMMIT_DOING)
+                committing = true;
+        spin_unlock(&fs_info->trans_lock);
+        if (!committing)
+                mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
                goto out;
@@ -2785,7 +2804,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 unlock:
        spin_unlock(&fs_info->qgroup_lock);
 out:
-        mutex_unlock(&fs_info->qgroup_ioctl_lock);
+        if (!committing)
+                mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index f5d4c13a8dbc..2503485db859 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -7,7 +7,7 @@
 #ifndef BTRFS_RAID56_H
 #define BTRFS_RAID56_H
-static inline int nr_parity_stripes(struct map_lookup *map)
+static inline int nr_parity_stripes(const struct map_lookup *map)
 {
        if (map->type & BTRFS_BLOCK_GROUP_RAID5)
                return 1;
@@ -17,7 +17,7 @@ static inline int nr_parity_stripes(struct map_lookup *map)
                return 0;
 }
-static inline int nr_data_stripes(struct map_lookup *map)
+static inline int nr_data_stripes(const struct map_lookup *map)
 {
        return map->num_stripes - nr_parity_stripes(map);
 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 22a3c69864fa..7f219851fa23 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -20,6 +20,7 @@
 #include "inode-map.h"
 #include "qgroup.h"
 #include "print-tree.h"
+#include "delalloc-space.h"
 /*
 * backref_node, mapping_node and tree_block start with this
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 22124122728c..47733fb55df7 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -9,6 +9,8 @@
 #include "transaction.h"
 #include "disk-io.h"
 #include "print-tree.h"
+#include "qgroup.h"
+#include "space-info.h"
 /*
 * Read a root item from the tree. In case we detect a root item smaller then
@@ -497,3 +499,57 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
        btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
        spin_unlock(&root->root_item_lock);
 }
+/*
+ * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
+ * root: the root of the parent directory
+ * rsv: block reservation
+ * items: the number of items that we need do reservation
+ * use_global_rsv: allow fallback to the global block reservation
+ *
+ * This function is used to reserve the space for snapshot/subvolume
+ * creation and deletion. Those operations are different with the
+ * common file/directory operations, they change two fs/file trees
+ * and root tree, the number of items that the qgroup reserves is
+ * different with the free space reservation. So we can not use
+ * the space reservation mechanism in start_transaction().
+ */
+int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
+                                     struct btrfs_block_rsv *rsv, int items,
+                                     bool use_global_rsv)
+{
+        u64 qgroup_num_bytes = 0;
+        u64 num_bytes;
+        int ret;
+        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+                /* One for parent inode, two for dir entries */
+                qgroup_num_bytes = 3 * fs_info->nodesize;
+                ret = btrfs_qgroup_reserve_meta_prealloc(root,
+                                qgroup_num_bytes, true);
+                if (ret)
+                        return ret;
+        }
+        num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
+        rsv->space_info = btrfs_find_space_info(fs_info,
+                                            BTRFS_BLOCK_GROUP_METADATA);
+        ret = btrfs_block_rsv_add(root, rsv, num_bytes,
+                                  BTRFS_RESERVE_FLUSH_ALL);
+        if (ret == -ENOSPC && use_global_rsv)
+                ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
+        if (ret && qgroup_num_bytes)
+                btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
+        return ret;
+}
+void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
+                                      struct btrfs_block_rsv *rsv)
+{
+        btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
+}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f7b29f9db5e2..0c99cf9fb595 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -6,6 +6,7 @@
 #include <linux/blkdev.h>
 #include <linux/ratelimit.h>
 #include <linux/sched/mm.h>
+#include <crypto/hash.h>
 #include "ctree.h"
 #include "volumes.h"
 #include "disk-io.h"
@@ -1787,11 +1788,12 @@ static int scrub_checksum(struct scrub_block *sblock)
 static int scrub_checksum_data(struct scrub_block *sblock)
 {
        struct scrub_ctx *sctx = sblock->sctx;
+        struct btrfs_fs_info *fs_info = sctx->fs_info;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        u8 csum[BTRFS_CSUM_SIZE];
        u8 *on_disk_csum;
        struct page *page;
        void *buffer;
-        u32 crc = ~(u32)0;
        u64 len;
        int index;
@@ -1799,6 +1801,9 @@ static int scrub_checksum_data(struct scrub_block *sblock)
        if (!sblock->pagev[0]->have_csum)
                return 0;
+        shash->tfm = fs_info->csum_shash;
+        crypto_shash_init(shash);
        on_disk_csum = sblock->pagev[0]->csum;
        page = sblock->pagev[0]->page;
        buffer = kmap_atomic(page);
@@ -1808,7 +1813,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
        for (;;) {
                u64 l = min_t(u64, len, PAGE_SIZE);
-                crc = btrfs_csum_data(buffer, crc, l);
+                crypto_shash_update(shash, buffer, l);
                kunmap_atomic(buffer);
                len -= l;
                if (len == 0)
@@ -1820,7 +1825,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
                buffer = kmap_atomic(page);
        }
-        btrfs_csum_final(crc, csum);
+        crypto_shash_final(shash, csum);
        if (memcmp(csum, on_disk_csum, sctx->csum_size))
                sblock->checksum_error = 1;
@@ -1832,16 +1837,19 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
        struct scrub_ctx *sctx = sblock->sctx;
        struct btrfs_header *h;
        struct btrfs_fs_info *fs_info = sctx->fs_info;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        u8 calculated_csum[BTRFS_CSUM_SIZE];
        u8 on_disk_csum[BTRFS_CSUM_SIZE];
        struct page *page;
        void *mapped_buffer;
        u64 mapped_size;
        void *p;
-        u32 crc = ~(u32)0;
        u64 len;
        int index;
+        shash->tfm = fs_info->csum_shash;
+        crypto_shash_init(shash);
        BUG_ON(sblock->page_count < 1);
        page = sblock->pagev[0]->page;
        mapped_buffer = kmap_atomic(page);
@@ -1875,7 +1883,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
        for (;;) {
                u64 l = min_t(u64, len, mapped_size);
-                crc = btrfs_csum_data(p, crc, l);
+                crypto_shash_update(shash, p, l);
                kunmap_atomic(mapped_buffer);
                len -= l;
                if (len == 0)
@@ -1889,7 +1897,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
                p = mapped_buffer;
        }
-        btrfs_csum_final(crc, calculated_csum);
+        crypto_shash_final(shash, calculated_csum);
        if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
                sblock->checksum_error = 1;
@@ -1900,18 +1908,22 @@ static int scrub_checksum_super(struct scrub_block *sblock)
 {
        struct btrfs_super_block *s;
        struct scrub_ctx *sctx = sblock->sctx;
+        struct btrfs_fs_info *fs_info = sctx->fs_info;
+        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        u8 calculated_csum[BTRFS_CSUM_SIZE];
        u8 on_disk_csum[BTRFS_CSUM_SIZE];
        struct page *page;
        void *mapped_buffer;
        u64 mapped_size;
        void *p;
-        u32 crc = ~(u32)0;
        int fail_gen = 0;
        int fail_cor = 0;
        u64 len;
        int index;
+        shash->tfm = fs_info->csum_shash;
+        crypto_shash_init(shash);
        BUG_ON(sblock->page_count < 1);
        page = sblock->pagev[0]->page;
        mapped_buffer = kmap_atomic(page);
@@ -1934,7 +1946,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
        for (;;) {
                u64 l = min_t(u64, len, mapped_size);
-                crc = btrfs_csum_data(p, crc, l);
+                crypto_shash_update(shash, p, l);
                kunmap_atomic(mapped_buffer);
                len -= l;
                if (len == 0)
@@ -1948,7 +1960,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
                p = mapped_buffer;
        }
-        btrfs_csum_final(crc, calculated_csum);
+        crypto_shash_final(shash, calculated_csum);
        if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
                ++fail_cor;
@@ -2448,7 +2460,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
        ASSERT(index < UINT_MAX);
        num_sectors = sum->len / sctx->fs_info->sectorsize;
-        memcpy(csum, sum->sums + index, sctx->csum_size);
+        memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
        if (index == num_sectors - 1) {
                list_del(&sum->list);
                kfree(sum);
@@ -2660,18 +2672,18 @@ static int get_raid56_logic_offset(u64 physical, int num,
        u64 last_offset;
        u32 stripe_index;
        u32 rot;
+        const int data_stripes = nr_data_stripes(map);
-        last_offset = (physical - map->stripes[num].physical) *
+        last_offset = (physical - map->stripes[num].physical) * data_stripes;
-                      nr_data_stripes(map);
        if (stripe_start)
                *stripe_start = last_offset;
        *offset = last_offset;
-        for (i = 0; i < nr_data_stripes(map); i++) {
+        for (i = 0; i < data_stripes; i++) {
                *offset = last_offset + i * map->stripe_len;
                stripe_nr = div64_u64(*offset, map->stripe_len);
-                stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
+                stripe_nr = div_u64(stripe_nr, data_stripes);
                /* Work out the disk rotation on this stripe-set */
                stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
@@ -3079,7 +3091,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                offset = map->stripe_len * (num / map->sub_stripes);
                increment = map->stripe_len * factor;
                mirror_num = num % map->sub_stripes + 1;
-        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
                increment = map->stripe_len;
                mirror_num = num % map->num_stripes + 1;
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
@@ -3410,15 +3422,15 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
                                          struct btrfs_block_group_cache *cache)
 {
        struct btrfs_fs_info *fs_info = sctx->fs_info;
-        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+        struct extent_map_tree *map_tree = &fs_info->mapping_tree;
        struct map_lookup *map;
        struct extent_map *em;
        int i;
        int ret = 0;
-        read_lock(&map_tree->map_tree.lock);
+        read_lock(&map_tree->lock);
-        em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
+        em = lookup_extent_mapping(map_tree, chunk_offset, 1);
-        read_unlock(&map_tree->map_tree.lock);
+        read_unlock(&map_tree->lock);
        if (!em) {
                /*
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7fe4770f0e5..69b59bf75882 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -686,7 +686,7 @@ static int send_cmd(struct send_ctx *sctx)
        hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
        hdr->crc = 0;
-        crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
+        crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
        hdr->crc = cpu_to_le32(crc);
        ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
@@ -6929,9 +6929,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
        if (ret)
                goto out;
+        mutex_lock(&fs_info->balance_mutex);
+        if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
+                mutex_unlock(&fs_info->balance_mutex);
+                btrfs_warn_rl(fs_info,
+                "cannot run send because a balance operation is in progress");
+                ret = -EAGAIN;
+                goto out;
+        }
+        fs_info->send_in_progress++;
+        mutex_unlock(&fs_info->balance_mutex);
        current->journal_info = BTRFS_SEND_TRANS_STUB;
        ret = send_subvol(sctx);
        current->journal_info = NULL;
+        mutex_lock(&fs_info->balance_mutex);
+        fs_info->send_in_progress--;
+        mutex_unlock(&fs_info->balance_mutex);
        if (ret < 0)
                goto out;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
new file mode 100644
index 000000000000..ab7b9ec4c240
--- /dev/null
+++ b/fs/btrfs/space-info.c
@@ -0,0 +1,1094 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "ctree.h"
+#include "space-info.h"
+#include "sysfs.h"
+#include "volumes.h"
+#include "free-space-cache.h"
+#include "ordered-data.h"
+#include "transaction.h"
+#include "math.h"
+u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+                          bool may_use_included)
+{
+        ASSERT(s_info);
+        return s_info->bytes_used + s_info->bytes_reserved +
+                s_info->bytes_pinned + s_info->bytes_readonly +
+                (may_use_included ? s_info->bytes_may_use : 0);
+}
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+        struct list_head *head = &info->space_info;
+        struct btrfs_space_info *found;
+        rcu_read_lock();
+        list_for_each_entry_rcu(found, head, list)
+                found->full = 0;
+        rcu_read_unlock();
+}
+static const char *alloc_name(u64 flags)
+{
+        switch (flags) {
+        case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
+                return "mixed";
+        case BTRFS_BLOCK_GROUP_METADATA:
+                return "metadata";
+        case BTRFS_BLOCK_GROUP_DATA:
+                return "data";
+        case BTRFS_BLOCK_GROUP_SYSTEM:
+                return "system";
+        default:
+                WARN_ON(1);
+                return "invalid-combination";
+        };
+}
+static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+{
+        struct btrfs_space_info *space_info;
+        int i;
+        int ret;
+        space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
+        if (!space_info)
+                return -ENOMEM;
+        ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
+                                 GFP_KERNEL);
+        if (ret) {
+                kfree(space_info);
+                return ret;
+        }
+        for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+                INIT_LIST_HEAD(&space_info->block_groups[i]);
+        init_rwsem(&space_info->groups_sem);
+        spin_lock_init(&space_info->lock);
+        space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+        space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+        init_waitqueue_head(&space_info->wait);
+        INIT_LIST_HEAD(&space_info->ro_bgs);
+        INIT_LIST_HEAD(&space_info->tickets);
+        INIT_LIST_HEAD(&space_info->priority_tickets);
+        ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
+                                    info->space_info_kobj, "%s",
+                                    alloc_name(space_info->flags));
+        if (ret) {
+                kobject_put(&space_info->kobj);
+                return ret;
+        }
+        list_add_rcu(&space_info->list, &info->space_info);
+        if (flags & BTRFS_BLOCK_GROUP_DATA)
+                info->data_sinfo = space_info;
+        return ret;
+}
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
+{
+        struct btrfs_super_block *disk_super;
+        u64 features;
+        u64 flags;
+        int mixed = 0;
+        int ret;
+        disk_super = fs_info->super_copy;
+        if (!btrfs_super_root(disk_super))
+                return -EINVAL;
+        features = btrfs_super_incompat_flags(disk_super);
+        if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+                mixed = 1;
+        flags = BTRFS_BLOCK_GROUP_SYSTEM;
+        ret = create_space_info(fs_info, flags);
+        if (ret)
+                goto out;
+        if (mixed) {
+                flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
+                ret = create_space_info(fs_info, flags);
+        } else {
+                flags = BTRFS_BLOCK_GROUP_METADATA;
+                ret = create_space_info(fs_info, flags);
+                if (ret)
+                        goto out;
+                flags = BTRFS_BLOCK_GROUP_DATA;
+                ret = create_space_info(fs_info, flags);
+        }
+out:
+        return ret;
+}
+void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                             u64 total_bytes, u64 bytes_used,
+                             u64 bytes_readonly,
+                             struct btrfs_space_info **space_info)
+{
+        struct btrfs_space_info *found;
+        int factor;
+        factor = btrfs_bg_type_to_factor(flags);
+        found = btrfs_find_space_info(info, flags);
+        ASSERT(found);
+        spin_lock(&found->lock);
+        found->total_bytes += total_bytes;
+        found->disk_total += total_bytes * factor;
+        found->bytes_used += bytes_used;
+        found->disk_used += bytes_used * factor;
+        found->bytes_readonly += bytes_readonly;
+        if (total_bytes > 0)
+                found->full = 0;
+        btrfs_space_info_add_new_bytes(info, found,
+                                       total_bytes - bytes_used -
+                                       bytes_readonly);
+        spin_unlock(&found->lock);
+        *space_info = found;
+}
+struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+                                               u64 flags)
+{
+        struct list_head *head = &info->space_info;
+        struct btrfs_space_info *found;
+        flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
+        rcu_read_lock();
+        list_for_each_entry_rcu(found, head, list) {
+                if (found->flags & flags) {
+                        rcu_read_unlock();
+                        return found;
+                }
+        }
+        rcu_read_unlock();
+        return NULL;
+}
+static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
+{
+        return (global->size << 1);
+}
+static int can_overcommit(struct btrfs_fs_info *fs_info,
+                          struct btrfs_space_info *space_info, u64 bytes,
+                          enum btrfs_reserve_flush_enum flush,
+                          bool system_chunk)
+{
+        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+        u64 profile;
+        u64 space_size;
+        u64 avail;
+        u64 used;
+        int factor;
+        /* Don't overcommit when in mixed mode. */
+        if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
+                return 0;
+        if (system_chunk)
+                profile = btrfs_system_alloc_profile(fs_info);
+        else
+                profile = btrfs_metadata_alloc_profile(fs_info);
+        used = btrfs_space_info_used(space_info, false);
+        /*
+         * We only want to allow over committing if we have lots of actual space
+         * free, but if we don't have enough space to handle the global reserve
+         * space then we could end up having a real enospc problem when trying
+         * to allocate a chunk or some other such important allocation.
+         */
+        spin_lock(&global_rsv->lock);
+        space_size = calc_global_rsv_need_space(global_rsv);
+        spin_unlock(&global_rsv->lock);
+        if (used + space_size >= space_info->total_bytes)
+                return 0;
+        used += space_info->bytes_may_use;
+        avail = atomic64_read(&fs_info->free_chunk_space);
+        /*
+         * If we have dup, raid1 or raid10 then only half of the free
+         * space is actually usable.  For raid56, the space info used
+         * doesn't include the parity drive, so we don't have to
+         * change the math
+         */
+        factor = btrfs_bg_type_to_factor(profile);
+        avail = div_u64(avail, factor);
+        /*
+         * If we aren't flushing all things, let us overcommit up to
+         * 1/2th of the space. If we can flush, don't let us overcommit
+         * too much, let it overcommit up to 1/8 of the space.
+         */
+        if (flush == BTRFS_RESERVE_FLUSH_ALL)
+                avail >>= 3;
+        else
+                avail >>= 1;
+        if (used + bytes < space_info->total_bytes + avail)
+                return 1;
+        return 0;
+}
+/*
+ * This is for space we already have accounted in space_info->bytes_may_use, so
+ * basically when we're returning space from block_rsv's.
+ */
+void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_space_info *space_info,
+                                    u64 num_bytes)
+{
+        struct reserve_ticket *ticket;
+        struct list_head *head;
+        u64 used;
+        enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
+        bool check_overcommit = false;
+        spin_lock(&space_info->lock);
+        head = &space_info->priority_tickets;
+        /*
+         * If we are over our limit then we need to check and see if we can
+         * overcommit, and if we can't then we just need to free up our space
+         * and not satisfy any requests.
+         */
+        used = btrfs_space_info_used(space_info, true);
+        if (used - num_bytes >= space_info->total_bytes)
+                check_overcommit = true;
+again:
+        while (!list_empty(head) && num_bytes) {
+                ticket = list_first_entry(head, struct reserve_ticket,
+                                          list);
+                /*
+                 * We use 0 bytes because this space is already reserved, so
+                 * adding the ticket space would be a double count.
+                 */
+                if (check_overcommit &&
+                    !can_overcommit(fs_info, space_info, 0, flush, false))
+                        break;
+                if (num_bytes >= ticket->bytes) {
+                        list_del_init(&ticket->list);
+                        num_bytes -= ticket->bytes;
+                        ticket->bytes = 0;
+                        space_info->tickets_id++;
+                        wake_up(&ticket->wait);
+                } else {
+                        ticket->bytes -= num_bytes;
+                        num_bytes = 0;
+                }
+        }
+        if (num_bytes && head == &space_info->priority_tickets) {
+                head = &space_info->tickets;
+                flush = BTRFS_RESERVE_FLUSH_ALL;
+                goto again;
+        }
+        btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes);
+        trace_btrfs_space_reservation(fs_info, "space_info",
+                                      space_info->flags, num_bytes, 0);
+        spin_unlock(&space_info->lock);
+}
+/*
+ * This is for newly allocated space that isn't accounted in
+ * space_info->bytes_may_use yet.  So if we allocate a chunk or unpin an extent
+ * we use this helper.
+ */
+void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_space_info *space_info,
+                                    u64 num_bytes)
+{
+        struct reserve_ticket *ticket;
+        struct list_head *head = &space_info->priority_tickets;
+again:
+        while (!list_empty(head) && num_bytes) {
+                ticket = list_first_entry(head, struct reserve_ticket,
+                                          list);
+                if (num_bytes >= ticket->bytes) {
+                        trace_btrfs_space_reservation(fs_info, "space_info",
+                                                      space_info->flags,
+                                                      ticket->bytes, 1);
+                        list_del_init(&ticket->list);
+                        num_bytes -= ticket->bytes;
+                        btrfs_space_info_update_bytes_may_use(fs_info,
+                                                              space_info,
+                                                              ticket->bytes);
+                        ticket->bytes = 0;
+                        space_info->tickets_id++;
+                        wake_up(&ticket->wait);
+                } else {
+                        trace_btrfs_space_reservation(fs_info, "space_info",
+                                                      space_info->flags,
+                                                      num_bytes, 1);
+                        btrfs_space_info_update_bytes_may_use(fs_info,
+                                                              space_info,
+                                                              num_bytes);
+                        ticket->bytes -= num_bytes;
+                        num_bytes = 0;
+                }
+        }
+        if (num_bytes && head == &space_info->priority_tickets) {
+                head = &space_info->tickets;
+                goto again;
+        }
+}
+#define DUMP_BLOCK_RSV(fs_info, rsv_name)                               \
+do {                                                                    \
+        struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name;           \
+        spin_lock(&__rsv->lock);                                        \
+        btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu",      \
+                   __rsv->size, __rsv->reserved);                       \
+        spin_unlock(&__rsv->lock);                                      \
+} while (0)
+void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
+                           struct btrfs_space_info *info, u64 bytes,
+                           int dump_block_groups)
+{
+        struct btrfs_block_group_cache *cache;
+        int index = 0;
+        spin_lock(&info->lock);
+        btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull",
+                   info->flags,
+                   info->total_bytes - btrfs_space_info_used(info, true),
+                   info->full ? "" : "not ");
+        btrfs_info(fs_info,
+                "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu",
+                info->total_bytes, info->bytes_used, info->bytes_pinned,
+                info->bytes_reserved, info->bytes_may_use,
+                info->bytes_readonly);
+        spin_unlock(&info->lock);
+        DUMP_BLOCK_RSV(fs_info, global_block_rsv);
+        DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
+        DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
+        DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
+        DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
+        if (!dump_block_groups)
+                return;
+        down_read(&info->groups_sem);
+again:
+        list_for_each_entry(cache, &info->block_groups[index], list) {
+                spin_lock(&cache->lock);
+                btrfs_info(fs_info,
+                        "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
+                        cache->key.objectid, cache->key.offset,
+                        btrfs_block_group_used(&cache->item), cache->pinned,
+                        cache->reserved, cache->ro ? "[readonly]" : "");
+                btrfs_dump_free_space(cache, bytes);
+                spin_unlock(&cache->lock);
+        }
+        if (++index < BTRFS_NR_RAID_TYPES)
+                goto again;
+        up_read(&info->groups_sem);
+}
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
+                                         unsigned long nr_pages, int nr_items)
+{
+        struct super_block *sb = fs_info->sb;
+        if (down_read_trylock(&sb->s_umount)) {
+                writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE);
+                up_read(&sb->s_umount);
+        } else {
+                /*
+                 * We needn't worry the filesystem going from r/w to r/o though
+                 * we don't acquire ->s_umount mutex, because the filesystem
+                 * should guarantee the delalloc inodes list be empty after
+                 * the filesystem is readonly(all dirty pages are written to
+                 * the disk).
+                 */
+                btrfs_start_delalloc_roots(fs_info, nr_items);
+                if (!current->journal_info)
+                        btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
+        }
+}
+static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
+                                        u64 to_reclaim)
+{
+        u64 bytes;
+        u64 nr;
+        bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
+        nr = div64_u64(to_reclaim, bytes);
+        if (!nr)
+                nr = 1;
+        return nr;
+}
+#define EXTENT_SIZE_PER_ITEM    SZ_256K
+/*
+ * shrink metadata reservation for delalloc
+ */
+static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
+                            u64 orig, bool wait_ordered)
+{
+        struct btrfs_space_info *space_info;
+        struct btrfs_trans_handle *trans;
+        u64 delalloc_bytes;
+        u64 dio_bytes;
+        u64 async_pages;
+        u64 items;
+        long time_left;
+        unsigned long nr_pages;
+        int loops;
+        /* Calc the number of the pages we need flush for space reservation */
+        items = calc_reclaim_items_nr(fs_info, to_reclaim);
+        to_reclaim = items * EXTENT_SIZE_PER_ITEM;
+        trans = (struct btrfs_trans_handle *)current->journal_info;
+        space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+        delalloc_bytes = percpu_counter_sum_positive(
+                                                &fs_info->delalloc_bytes);
+        dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+        if (delalloc_bytes == 0 && dio_bytes == 0) {
+                if (trans)
+                        return;
+                if (wait_ordered)
+                        btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
+                return;
+        }
+        /*
+         * If we are doing more ordered than delalloc we need to just wait on
+         * ordered extents, otherwise we'll waste time trying to flush delalloc
+         * that likely won't give us the space back we need.
+         */
+        if (dio_bytes > delalloc_bytes)
+                wait_ordered = true;
+        loops = 0;
+        while ((delalloc_bytes || dio_bytes) && loops < 3) {
+                nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
+                /*
+                 * Triggers inode writeback for up to nr_pages. This will invoke
+                 * ->writepages callback and trigger delalloc filling
+                 *  (btrfs_run_delalloc_range()).
+                 */
+                btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items);
+                /*
+                 * We need to wait for the compressed pages to start before
+                 * we continue.
+                 */
+                async_pages = atomic_read(&fs_info->async_delalloc_pages);
+                if (!async_pages)
+                        goto skip_async;
+                /*
+                 * Calculate how many compressed pages we want to be written
+                 * before we continue. I.e if there are more async pages than we
+                 * require wait_event will wait until nr_pages are written.
+                 */
+                if (async_pages <= nr_pages)
+                        async_pages = 0;
+                else
+                        async_pages -= nr_pages;
+                wait_event(fs_info->async_submit_wait,
+                           atomic_read(&fs_info->async_delalloc_pages) <=
+                           (int)async_pages);
+skip_async:
+                spin_lock(&space_info->lock);
+                if (list_empty(&space_info->tickets) &&
+                    list_empty(&space_info->priority_tickets)) {
+                        spin_unlock(&space_info->lock);
+                        break;
+                }
+                spin_unlock(&space_info->lock);
+                loops++;
+                if (wait_ordered && !trans) {
+                        btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
+                } else {
+                        time_left = schedule_timeout_killable(1);
+                        if (time_left)
+                                break;
+                }
+                delalloc_bytes = percpu_counter_sum_positive(
+                                                &fs_info->delalloc_bytes);
+                dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes);
+        }
+}
+/**
+ * maybe_commit_transaction - possibly commit the transaction if its ok to
+ * @root - the root we're allocating for
+ * @bytes - the number of bytes we want to reserve
+ * @force - force the commit
+ *
+ * This will check to make sure that committing the transaction will actually
+ * get us somewhere and then commit the transaction if it does.  Otherwise it
+ * will return -ENOSPC.
+ */
+static int may_commit_transaction(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_space_info *space_info)
+{
+        struct reserve_ticket *ticket = NULL;
+        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
+        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+        struct btrfs_trans_handle *trans;
+        u64 bytes_needed;
+        u64 reclaim_bytes = 0;
+        trans = (struct btrfs_trans_handle *)current->journal_info;
+        if (trans)
+                return -EAGAIN;
+        spin_lock(&space_info->lock);
+        if (!list_empty(&space_info->priority_tickets))
+                ticket = list_first_entry(&space_info->priority_tickets,
+                                          struct reserve_ticket, list);
+        else if (!list_empty(&space_info->tickets))
+                ticket = list_first_entry(&space_info->tickets,
+                                          struct reserve_ticket, list);
+        bytes_needed = (ticket) ? ticket->bytes : 0;
+        spin_unlock(&space_info->lock);
+        if (!bytes_needed)
+                return 0;
+        trans = btrfs_join_transaction(fs_info->extent_root);
+        if (IS_ERR(trans))
+                return PTR_ERR(trans);
+        /*
+         * See if there is enough pinned space to make this reservation, or if
+         * we have block groups that are going to be freed, allowing us to
+         * possibly do a chunk allocation the next loop through.
+         */
+        if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
+            __percpu_counter_compare(&space_info->total_bytes_pinned,
+                                     bytes_needed,
+                                     BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
+                goto commit;
+        /*
+         * See if there is some space in the delayed insertion reservation for
+         * this reservation.
+         */
+        if (space_info != delayed_rsv->space_info)
+                goto enospc;
+        spin_lock(&delayed_rsv->lock);
+        reclaim_bytes += delayed_rsv->reserved;
+        spin_unlock(&delayed_rsv->lock);
+        spin_lock(&delayed_refs_rsv->lock);
+        reclaim_bytes += delayed_refs_rsv->reserved;
+        spin_unlock(&delayed_refs_rsv->lock);
+        if (reclaim_bytes >= bytes_needed)
+                goto commit;
+        bytes_needed -= reclaim_bytes;
+        if (__percpu_counter_compare(&space_info->total_bytes_pinned,
+                                   bytes_needed,
+                                   BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
+                goto enospc;
+commit:
+        return btrfs_commit_transaction(trans);
+enospc:
+        btrfs_end_transaction(trans);
+        return -ENOSPC;
+}
+/*
+ * Try to flush some data based on policy set by @state. This is only advisory
+ * and may fail for various reasons. The caller is supposed to examine the
+ * state of @space_info to detect the outcome.
+ */
+static void flush_space(struct btrfs_fs_info *fs_info,
+                       struct btrfs_space_info *space_info, u64 num_bytes,
+                       int state)
+{
+        struct btrfs_root *root = fs_info->extent_root;
+        struct btrfs_trans_handle *trans;
+        int nr;
+        int ret = 0;
+        switch (state) {
+        case FLUSH_DELAYED_ITEMS_NR:
+        case FLUSH_DELAYED_ITEMS:
+                if (state == FLUSH_DELAYED_ITEMS_NR)
+                        nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2;
+                else
+                        nr = -1;
+                trans = btrfs_join_transaction(root);
+                if (IS_ERR(trans)) {
+                        ret = PTR_ERR(trans);
+                        break;
+                }
+                ret = btrfs_run_delayed_items_nr(trans, nr);
+                btrfs_end_transaction(trans);
+                break;
+        case FLUSH_DELALLOC:
+        case FLUSH_DELALLOC_WAIT:
+                shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
+                                state == FLUSH_DELALLOC_WAIT);
+                break;
+        case FLUSH_DELAYED_REFS_NR:
+        case FLUSH_DELAYED_REFS:
+                trans = btrfs_join_transaction(root);
+                if (IS_ERR(trans)) {
+                        ret = PTR_ERR(trans);
+                        break;
+                }
+                if (state == FLUSH_DELAYED_REFS_NR)
+                        nr = calc_reclaim_items_nr(fs_info, num_bytes);
+                else
+                        nr = 0;
+                btrfs_run_delayed_refs(trans, nr);
+                btrfs_end_transaction(trans);
+                break;
+        case ALLOC_CHUNK:
+        case ALLOC_CHUNK_FORCE:
+                trans = btrfs_join_transaction(root);
+                if (IS_ERR(trans)) {
+                        ret = PTR_ERR(trans);
+                        break;
+                }
+                ret = btrfs_chunk_alloc(trans,
+                                btrfs_metadata_alloc_profile(fs_info),
+                                (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
+                                        CHUNK_ALLOC_FORCE);
+                btrfs_end_transaction(trans);
+                if (ret > 0 || ret == -ENOSPC)
+                        ret = 0;
+                break;
+        case COMMIT_TRANS:
+                /*
+                 * If we have pending delayed iputs then we could free up a
+                 * bunch of pinned space, so make sure we run the iputs before
+                 * we do our pinned bytes check below.
+                 */
+                btrfs_run_delayed_iputs(fs_info);
+                btrfs_wait_on_delayed_iputs(fs_info);
+                ret = may_commit_transaction(fs_info, space_info);
+                break;
+        default:
+                ret = -ENOSPC;
+                break;
+        }
+        trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
+                                ret);
+        return;
+}
+static inline u64
+btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+                                 struct btrfs_space_info *space_info,
+                                 bool system_chunk)
+{
+        struct reserve_ticket *ticket;
+        u64 used;
+        u64 expected;
+        u64 to_reclaim = 0;
+        list_for_each_entry(ticket, &space_info->tickets, list)
+                to_reclaim += ticket->bytes;
+        list_for_each_entry(ticket, &space_info->priority_tickets, list)
+                to_reclaim += ticket->bytes;
+        if (to_reclaim)
+                return to_reclaim;
+        to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
+        if (can_overcommit(fs_info, space_info, to_reclaim,
+                           BTRFS_RESERVE_FLUSH_ALL, system_chunk))
+                return 0;
+        used = btrfs_space_info_used(space_info, true);
+        if (can_overcommit(fs_info, space_info, SZ_1M,
+                           BTRFS_RESERVE_FLUSH_ALL, system_chunk))
+                expected = div_factor_fine(space_info->total_bytes, 95);
+        else
+                expected = div_factor_fine(space_info->total_bytes, 90);
+        if (used > expected)
+                to_reclaim = used - expected;
+        else
+                to_reclaim = 0;
+        to_reclaim = min(to_reclaim, space_info->bytes_may_use +
+                                     space_info->bytes_reserved);
+        return to_reclaim;
+}
+static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
+                                        struct btrfs_space_info *space_info,
+                                        u64 used, bool system_chunk)
+{
+        u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+        /* If we're just plain full then async reclaim just slows us down. */
+        if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
+                return 0;
+        if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+                                              system_chunk))
+                return 0;
+        return (used >= thresh && !btrfs_fs_closing(fs_info) &&
+                !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
+}
+static bool wake_all_tickets(struct list_head *head)
+{
+        struct reserve_ticket *ticket;
+        while (!list_empty(head)) {
+                ticket = list_first_entry(head, struct reserve_ticket, list);
+                list_del_init(&ticket->list);
+                ticket->error = -ENOSPC;
+                wake_up(&ticket->wait);
+                if (ticket->bytes != ticket->orig_bytes)
+                        return true;
+        }
+        return false;
+}
+/*
+ * This is for normal flushers, we can wait all goddamned day if we want to.  We
+ * will loop and continuously try to flush as long as we are making progress.
+ * We count progress as clearing off tickets each time we have to loop.
+ */
+static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+{
+        struct btrfs_fs_info *fs_info;
+        struct btrfs_space_info *space_info;
+        u64 to_reclaim;
+        int flush_state;
+        int commit_cycles = 0;
+        u64 last_tickets_id;
+        fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
+        space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+        spin_lock(&space_info->lock);
+        to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+                                                      false);
+        if (!to_reclaim) {
+                space_info->flush = 0;
+                spin_unlock(&space_info->lock);
+                return;
+        }
+        last_tickets_id = space_info->tickets_id;
+        spin_unlock(&space_info->lock);
+        flush_state = FLUSH_DELAYED_ITEMS_NR;
+        do {
+                flush_space(fs_info, space_info, to_reclaim, flush_state);
+                spin_lock(&space_info->lock);
+                if (list_empty(&space_info->tickets)) {
+                        space_info->flush = 0;
+                        spin_unlock(&space_info->lock);
+                        return;
+                }
+                to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
+                                                              space_info,
+                                                              false);
+                if (last_tickets_id == space_info->tickets_id) {
+                        flush_state++;
+                } else {
+                        last_tickets_id = space_info->tickets_id;
+                        flush_state = FLUSH_DELAYED_ITEMS_NR;
+                        if (commit_cycles)
+                                commit_cycles--;
+                }
+                /*
+                 * We don't want to force a chunk allocation until we've tried
+                 * pretty hard to reclaim space.  Think of the case where we
+                 * freed up a bunch of space and so have a lot of pinned space
+                 * to reclaim.  We would rather use that than possibly create a
+                 * underutilized metadata chunk.  So if this is our first run
+                 * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+                 * commit the transaction.  If nothing has changed the next go
+                 * around then we can force a chunk allocation.
+                 */
+                if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+                        flush_state++;
+                if (flush_state > COMMIT_TRANS) {
+                        commit_cycles++;
+                        if (commit_cycles > 2) {
+                                if (wake_all_tickets(&space_info->tickets)) {
+                                        flush_state = FLUSH_DELAYED_ITEMS_NR;
+                                        commit_cycles--;
+                                } else {
+                                        space_info->flush = 0;
+                                }
+                        } else {
+                                flush_state = FLUSH_DELAYED_ITEMS_NR;
+                        }
+                }
+                spin_unlock(&space_info->lock);
+        } while (flush_state <= COMMIT_TRANS);
+}
+void btrfs_init_async_reclaim_work(struct work_struct *work)
+{
+        INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+}
+static const enum btrfs_flush_state priority_flush_states[] = {
+        FLUSH_DELAYED_ITEMS_NR,
+        FLUSH_DELAYED_ITEMS,
+        ALLOC_CHUNK,
+};
+static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
+                                            struct btrfs_space_info *space_info,
+                                            struct reserve_ticket *ticket)
+{
+        u64 to_reclaim;
+        int flush_state;
+        spin_lock(&space_info->lock);
+        to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+                                                      false);
+        if (!to_reclaim) {
+                spin_unlock(&space_info->lock);
+                return;
+        }
+        spin_unlock(&space_info->lock);
+        flush_state = 0;
+        do {
+                flush_space(fs_info, space_info, to_reclaim,
+                            priority_flush_states[flush_state]);
+                flush_state++;
+                spin_lock(&space_info->lock);
+                if (ticket->bytes == 0) {
+                        spin_unlock(&space_info->lock);
+                        return;
+                }
+                spin_unlock(&space_info->lock);
+        } while (flush_state < ARRAY_SIZE(priority_flush_states));
+}
+static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
+                               struct btrfs_space_info *space_info,
+                               struct reserve_ticket *ticket)
+{
+        DEFINE_WAIT(wait);
+        u64 reclaim_bytes = 0;
+        int ret = 0;
+        spin_lock(&space_info->lock);
+        while (ticket->bytes > 0 && ticket->error == 0) {
+                ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
+                if (ret) {
+                        ret = -EINTR;
+                        break;
+                }
+                spin_unlock(&space_info->lock);
+                schedule();
+                finish_wait(&ticket->wait, &wait);
+                spin_lock(&space_info->lock);
+        }
+        if (!ret)
+                ret = ticket->error;
+        if (!list_empty(&ticket->list))
+                list_del_init(&ticket->list);
+        if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
+                reclaim_bytes = ticket->orig_bytes - ticket->bytes;
+        spin_unlock(&space_info->lock);
+        if (reclaim_bytes)
+                btrfs_space_info_add_old_bytes(fs_info, space_info,
+                                               reclaim_bytes);
+        return ret;
+}
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @space_info - the space info we want to allocate from
+ * @orig_bytes - the number of bytes we want
+ * @flush - whether or not we can flush to make our reservation
+ *
+ * This will reserve orig_bytes number of bytes from the space info associated
+ * with the block_rsv.  If there is not enough space it will make an attempt to
+ * flush out space to make room.  It will do this by flushing delalloc if
+ * possible or committing the transaction.  If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
+ */
+static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_space_info *space_info,
+                                    u64 orig_bytes,
+                                    enum btrfs_reserve_flush_enum flush,
+                                    bool system_chunk)
+{
+        struct reserve_ticket ticket;
+        u64 used;
+        u64 reclaim_bytes = 0;
+        int ret = 0;
+        ASSERT(orig_bytes);
+        ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
+        spin_lock(&space_info->lock);
+        ret = -ENOSPC;
+        used = btrfs_space_info_used(space_info, true);
+        /*
+         * Carry on if we have enough space (short-circuit) OR call
+         * can_overcommit() to ensure we can overcommit to continue.
+         */
+        if ((used + orig_bytes <= space_info->total_bytes) ||
+            can_overcommit(fs_info, space_info, orig_bytes, flush,
+                           system_chunk)) {
+                btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+                                                      orig_bytes);
+                trace_btrfs_space_reservation(fs_info, "space_info",
+                                              space_info->flags, orig_bytes, 1);
+                ret = 0;
+        }
+        /*
+         * If we couldn't make a reservation then setup our reservation ticket
+         * and kick the async worker if it's not already running.
+         *
+         * If we are a priority flusher then we just need to add our ticket to
+         * the list and we will do our own flushing further down.
+         */
+        if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
+                ticket.orig_bytes = orig_bytes;
+                ticket.bytes = orig_bytes;
+                ticket.error = 0;
+                init_waitqueue_head(&ticket.wait);
+                if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+                        list_add_tail(&ticket.list, &space_info->tickets);
+                        if (!space_info->flush) {
+                                space_info->flush = 1;
+                                trace_btrfs_trigger_flush(fs_info,
+                                                          space_info->flags,
+                                                          orig_bytes, flush,
+                                                          "enospc");
+                                queue_work(system_unbound_wq,
+                                           &fs_info->async_reclaim_work);
+                        }
+                } else {
+                        list_add_tail(&ticket.list,
+                                      &space_info->priority_tickets);
+                }
+        } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
+                used += orig_bytes;
+                /*
+                 * We will do the space reservation dance during log replay,
+                 * which means we won't have fs_info->fs_root set, so don't do
+                 * the async reclaim as we will panic.
+                 */
+                if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
+                    need_do_async_reclaim(fs_info, space_info,
+                                          used, system_chunk) &&
+                    !work_busy(&fs_info->async_reclaim_work)) {
+                        trace_btrfs_trigger_flush(fs_info, space_info->flags,
+                                                  orig_bytes, flush, "preempt");
+                        queue_work(system_unbound_wq,
+                                   &fs_info->async_reclaim_work);
+                }
+        }
+        spin_unlock(&space_info->lock);
+        if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
+                return ret;
+        if (flush == BTRFS_RESERVE_FLUSH_ALL)
+                return wait_reserve_ticket(fs_info, space_info, &ticket);
+        ret = 0;
+        priority_reclaim_metadata_space(fs_info, space_info, &ticket);
+        spin_lock(&space_info->lock);
+        if (ticket.bytes) {
+                if (ticket.bytes < orig_bytes)
+                        reclaim_bytes = orig_bytes - ticket.bytes;
+                list_del_init(&ticket.list);
+                ret = -ENOSPC;
+        }
+        spin_unlock(&space_info->lock);
+        if (reclaim_bytes)
+                btrfs_space_info_add_old_bytes(fs_info, space_info,
+                                               reclaim_bytes);
+        ASSERT(list_empty(&ticket.list));
+        return ret;
+}
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @block_rsv - the block_rsv we're allocating for
+ * @orig_bytes - the number of bytes we want
+ * @flush - whether or not we can flush to make our reservation
+ *
+ * This will reserve orig_bytes number of bytes from the space info associated
+ * with the block_rsv.  If there is not enough space it will make an attempt to
+ * flush out space to make room.  It will do this by flushing delalloc if
+ * possible or committing the transaction.  If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
+ */
+int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
+                                 struct btrfs_block_rsv *block_rsv,
+                                 u64 orig_bytes,
+                                 enum btrfs_reserve_flush_enum flush)
+{
+        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+        int ret;
+        bool system_chunk = (root == fs_info->chunk_root);
+        ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
+                                       orig_bytes, flush, system_chunk);
+        if (ret == -ENOSPC &&
+            unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
+                if (block_rsv != global_rsv &&
+                    !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes))
+                        ret = 0;
+        }
+        if (ret == -ENOSPC) {
+                trace_btrfs_space_reservation(fs_info, "space_info:enospc",
+                                              block_rsv->space_info->flags,
+                                              orig_bytes, 1);
+                if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
+                        btrfs_dump_space_info(fs_info, block_rsv->space_info,
+                                              orig_bytes, 0);
+        }
+        return ret;
+}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
new file mode 100644
index 000000000000..c2b54b8e1a14
--- /dev/null
+++ b/fs/btrfs/space-info.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BTRFS_SPACE_INFO_H
+#define BTRFS_SPACE_INFO_H
+struct btrfs_space_info {
+        spinlock_t lock;
+        u64 total_bytes;        /* total bytes in the space,
+                                   this doesn't take mirrors into account */
+        u64 bytes_used;         /* total bytes used,
+                                   this doesn't take mirrors into account */
+        u64 bytes_pinned;       /* total bytes pinned, will be freed when the
+                                   transaction finishes */
+        u64 bytes_reserved;     /* total bytes the allocator has reserved for
+                                   current allocations */
+        u64 bytes_may_use;      /* number of bytes that may be used for
+                                   delalloc/allocations */
+        u64 bytes_readonly;     /* total bytes that are read only */
+        u64 max_extent_size;    /* This will hold the maximum extent size of
+                                   the space info if we had an ENOSPC in the
+                                   allocator. */
+        unsigned int full:1;    /* indicates that we cannot allocate any more
+                                   chunks for this space */
+        unsigned int chunk_alloc:1;     /* set if we are allocating a chunk */
+        unsigned int flush:1;           /* set if we are trying to make space */
+        unsigned int force_alloc;       /* set if we need to force a chunk
+                                           alloc for this space */
+        u64 disk_used;          /* total bytes used on disk */
+        u64 disk_total;         /* total bytes on disk, takes mirrors into
+                                   account */
+        u64 flags;
+        /*
+         * bytes_pinned is kept in line with what is actually pinned, as in
+         * we've called update_block_group and dropped the bytes_used counter
+         * and increased the bytes_pinned counter.  However this means that
+         * bytes_pinned does not reflect the bytes that will be pinned once the
+         * delayed refs are flushed, so this counter is inc'ed every time we
+         * call btrfs_free_extent so it is a realtime count of what will be
+         * freed once the transaction is committed.  It will be zeroed every
+         * time the transaction commits.
+         */
+        struct percpu_counter total_bytes_pinned;
+        struct list_head list;
+        /* Protected by the spinlock 'lock'. */
+        struct list_head ro_bgs;
+        struct list_head priority_tickets;
+        struct list_head tickets;
+        /*
+         * tickets_id just indicates the next ticket will be handled, so note
+         * it's not stored per ticket.
+         */
+        u64 tickets_id;
+        struct rw_semaphore groups_sem;
+        /* for block groups in our same type */
+        struct list_head block_groups[BTRFS_NR_RAID_TYPES];
+        wait_queue_head_t wait;
+        struct kobject kobj;
+        struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
+};
+struct reserve_ticket {
+        u64 orig_bytes;
+        u64 bytes;
+        int error;
+        struct list_head list;
+        wait_queue_head_t wait;
+};
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+        return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+                (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
+/*
+ *
+ * Declare a helper function to detect underflow of various space info members
+ */
+#define DECLARE_SPACE_INFO_UPDATE(name)                                 \
+static inline void                                                      \
+btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info,           \
+                               struct btrfs_space_info *sinfo,          \
+                               s64 bytes)                               \
+{                                                                       \
+        lockdep_assert_held(&sinfo->lock);                              \
+        trace_update_##name(fs_info, sinfo, sinfo->name, bytes);        \
+        if (bytes < 0 && sinfo->name < -bytes) {                        \
+                WARN_ON(1);                                             \
+                sinfo->name = 0;                                        \
+                return;                                                 \
+        }                                                               \
+        sinfo->name += bytes;                                           \
+}
+DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
+DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
+void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_space_info *space_info,
+                                    u64 num_bytes);
+void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
+                                    struct btrfs_space_info *space_info,
+                                    u64 num_bytes);
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
+void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                             u64 total_bytes, u64 bytes_used,
+                             u64 bytes_readonly,
+                             struct btrfs_space_info **space_info);
+struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+                                               u64 flags);
+u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+                          bool may_use_included);
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
+                           struct btrfs_space_info *info, u64 bytes,
+                           int dump_block_groups);
+int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
+                                 struct btrfs_block_rsv *block_rsv,
+                                 u64 orig_bytes,
+                                 enum btrfs_reserve_flush_enum flush);
+#endif /* BTRFS_SPACE_INFO_H */
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0645ec428b4f..78de9d5d80c6 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -42,6 +42,7 @@
 #include "dev-replace.h"
 #include "free-space-cache.h"
 #include "backref.h"
+#include "space-info.h"
 #include "tests/btrfs-tests.h"
 #include "qgroup.h"
@@ -1553,6 +1554,8 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
        } else {
                snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
                btrfs_sb(s)->bdev_holder = fs_type;
+                if (!strstr(crc32c_impl(), "generic"))
+                        set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
                error = btrfs_fill_super(s, fs_devices, data);
        }
        if (!error)
@@ -1601,14 +1604,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 {
        struct vfsmount *mnt_root;
        struct dentry *root;
-        fmode_t mode = FMODE_READ;
        char *subvol_name = NULL;
        u64 subvol_objectid = 0;
        int error = 0;
-        if (!(flags & SB_RDONLY))
-                mode |= FMODE_WRITE;
        error = btrfs_parse_subvol_options(data, &subvol_name,
                                        &subvol_objectid);
        if (error) {
@@ -1904,8 +1903,9 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
        u64 type;
        u64 avail_space;
        u64 min_stripe_size;
-        int min_stripes = 1, num_stripes = 1;
+        int min_stripes, num_stripes = 1;
        int i = 0, nr_devices;
+        const struct btrfs_raid_attr *rattr;
        /*
         * We aren't under the device list lock, so this is racy-ish, but good
@@ -1929,21 +1929,18 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
        /* calc min stripe number for data space allocation */
        type = btrfs_data_alloc_profile(fs_info);
-        if (type & BTRFS_BLOCK_GROUP_RAID0) {
+        rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
-                min_stripes = 2;
+        min_stripes = rattr->devs_min;
+        if (type & BTRFS_BLOCK_GROUP_RAID0)
                num_stripes = nr_devices;
-        } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
+        else if (type & BTRFS_BLOCK_GROUP_RAID1)
-                min_stripes = 2;
                num_stripes = 2;
-        } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
+        else if (type & BTRFS_BLOCK_GROUP_RAID10)
-                min_stripes = 4;
                num_stripes = 4;
-        }
-        if (type & BTRFS_BLOCK_GROUP_DUP)
+        /* Adjust for more than 1 stripe per device */
-                min_stripe_size = 2 * BTRFS_STRIPE_LEN;
+        min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
-        else
-                min_stripe_size = BTRFS_STRIPE_LEN;
        rcu_read_lock();
        list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
@@ -2466,3 +2463,4 @@ late_initcall(init_btrfs_fs);
 module_exit(exit_btrfs_fs)
 MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c1dfc97893ba..9539f8143b7a 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -16,6 +16,7 @@
 #include "transaction.h"
 #include "sysfs.h"
 #include "volumes.h"
+#include "space-info.h"
 static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
 static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 7bf4d5734dbe..1bf6b5a79191 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -10,6 +10,7 @@
 #include "btrfs-tests.h"
 #include "../ctree.h"
 #include "../extent_io.h"
+#include "../btrfs_inode.h"
 #define PROCESS_UNLOCK          (1 << 0)
 #define PROCESS_RELEASE         (1 << 1)
@@ -58,7 +59,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
 static int test_find_delalloc(u32 sectorsize)
 {
        struct inode *inode;
-        struct extent_io_tree tmp;
+        struct extent_io_tree *tmp;
        struct page *page;
        struct page *locked_page = NULL;
        unsigned long index = 0;
@@ -76,12 +77,13 @@ static int test_find_delalloc(u32 sectorsize)
                test_std_err(TEST_ALLOC_INODE);
                return -ENOMEM;
        }
+        tmp = &BTRFS_I(inode)->io_tree;
        /*
         * Passing NULL as we don't have fs_info but tracepoints are not used
         * at this point
         */
-        extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL);
+        extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST, NULL);
        /*
         * First go through and create and mark all of our pages dirty, we pin
@@ -108,10 +110,10 @@ static int test_find_delalloc(u32 sectorsize)
         * |--- delalloc ---|
         * |---  search  ---|
         */
-        set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL);
+        set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
        start = 0;
        end = 0;
-        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+        found = find_lock_delalloc_range(inode, locked_page, &start,
                                         &end);
        if (!found) {
                test_err("should have found at least one delalloc");
@@ -122,7 +124,7 @@ static int test_find_delalloc(u32 sectorsize)
                        sectorsize - 1, start, end);
                goto out_bits;
        }
-        unlock_extent(&tmp, start, end);
+        unlock_extent(tmp, start, end);
        unlock_page(locked_page);
        put_page(locked_page);
@@ -139,10 +141,10 @@ static int test_find_delalloc(u32 sectorsize)
                test_err("couldn't find the locked page");
                goto out_bits;
        }
-        set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);
+        set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
        start = test_start;
        end = 0;
-        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+        found = find_lock_delalloc_range(inode, locked_page, &start,
                                         &end);
        if (!found) {
                test_err("couldn't find delalloc in our range");
@@ -158,7 +160,7 @@ static int test_find_delalloc(u32 sectorsize)
                test_err("there were unlocked pages in the range");
                goto out_bits;
        }
-        unlock_extent(&tmp, start, end);
+        unlock_extent(tmp, start, end);
        /* locked_page was unlocked above */
        put_page(locked_page);
@@ -176,7 +178,7 @@ static int test_find_delalloc(u32 sectorsize)
        }
        start = test_start;
        end = 0;
-        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+        found = find_lock_delalloc_range(inode, locked_page, &start,
                                         &end);
        if (found) {
                test_err("found range when we shouldn't have");
@@ -194,10 +196,10 @@ static int test_find_delalloc(u32 sectorsize)
         *
         * We are re-using our test_start from above since it works out well.
         */
-        set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL);
+        set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
        start = test_start;
        end = 0;
-        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+        found = find_lock_delalloc_range(inode, locked_page, &start,
                                         &end);
        if (!found) {
                test_err("didn't find our range");
@@ -213,7 +215,7 @@ static int test_find_delalloc(u32 sectorsize)
                test_err("pages in range were not all locked");
                goto out_bits;
        }
-        unlock_extent(&tmp, start, end);
+        unlock_extent(tmp, start, end);
        /*
         * Now to test where we run into a page that is no longer dirty in the
@@ -238,7 +240,7 @@ static int test_find_delalloc(u32 sectorsize)
         * this changes at any point in the future we will need to fix this
         * tests expected behavior.
         */
-        found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+        found = find_lock_delalloc_range(inode, locked_page, &start,
                                         &end);
        if (!found) {
                test_err("didn't find our range");
@@ -256,7 +258,7 @@ static int test_find_delalloc(u32 sectorsize)
        }
        ret = 0;
 out_bits:
-        clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1);
+        clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1);
 out:
        if (locked_page)
                put_page(locked_page);
@@ -432,6 +434,89 @@ out:
        return ret;
 }
+static int test_find_first_clear_extent_bit(void)
+{
+        struct extent_io_tree tree;
+        u64 start, end;
+        test_msg("running find_first_clear_extent_bit test");
+        extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
+        /*
+         * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
+         * 4M-32M
+         */
+        set_extent_bits(&tree, SZ_1M, SZ_4M - 1,
+                        CHUNK_TRIMMED | CHUNK_ALLOCATED);
+        find_first_clear_extent_bit(&tree, SZ_512K, &start, &end,
+                                    CHUNK_TRIMMED | CHUNK_ALLOCATED);
+        if (start != 0 || end != SZ_1M -1)
+                test_err("error finding beginning range: start %llu end %llu",
+                         start, end);
+        /* Now add 32M-64M so that we have a hole between 4M-32M */
+        set_extent_bits(&tree, SZ_32M, SZ_64M - 1,
+                        CHUNK_TRIMMED | CHUNK_ALLOCATED);
+        /*
+         * Request first hole starting at 12M, we should get 4M-32M
+         */
+        find_first_clear_extent_bit(&tree, 12 * SZ_1M, &start, &end,
+                                    CHUNK_TRIMMED | CHUNK_ALLOCATED);
+        if (start != SZ_4M || end != SZ_32M - 1)
+                test_err("error finding trimmed range: start %llu end %llu",
+                         start, end);
+        /*
+         * Search in the middle of allocated range, should get the next one
+         * available, which happens to be unallocated -> 4M-32M
+         */
+        find_first_clear_extent_bit(&tree, SZ_2M, &start, &end,
+                                    CHUNK_TRIMMED | CHUNK_ALLOCATED);
+        if (start != SZ_4M || end != SZ_32M -1)
+                test_err("error finding next unalloc range: start %llu end %llu",
+                         start, end);
+        /*
+         * Set 64M-72M with CHUNK_ALLOC flag, then search for CHUNK_TRIMMED flag
+         * being unset in this range, we should get the entry in range 64M-72M
+         */
+        set_extent_bits(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED);
+        find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end,
+                                    CHUNK_TRIMMED);
+        if (start != SZ_64M || end != SZ_64M + SZ_8M - 1)
+                test_err("error finding exact range: start %llu end %llu",
+                         start, end);
+        find_first_clear_extent_bit(&tree, SZ_64M - SZ_8M, &start, &end,
+                                    CHUNK_TRIMMED);
+        /*
+         * Search in the middle of set range whose immediate neighbour doesn't
+         * have the bits set so it must be returned
+         */
+        if (start != SZ_64M || end != SZ_64M + SZ_8M - 1)
+                test_err("error finding next alloc range: start %llu end %llu",
+                         start, end);
+        /*
+         * Search beyond any known range, shall return after last known range
+         * and end should be -1
+         */
+        find_first_clear_extent_bit(&tree, -1, &start, &end, CHUNK_TRIMMED);
+        if (start != SZ_64M + SZ_8M || end != -1)
+                test_err(
+                "error handling beyond end of range search: start %llu end %llu",
+                        start, end);
+        return 0;
+}
 int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
 {
        int ret;
@@ -442,6 +527,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
        if (ret)
                goto out;
+        ret = test_find_first_clear_extent_bit();
+        if (ret)
+                goto out;
        ret = test_eb_bitmaps(sectorsize, nodesize);
 out:
        return ret;
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 87aeabe9d610..4a7f796c9900 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -66,7 +66,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
        em->len = SZ_16K;
        em->block_start = 0;
        em->block_len = SZ_16K;
+        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+        write_unlock(&em_tree->lock);
        if (ret < 0) {
                test_err("cannot add extent range [0, 16K)");
                goto out;
@@ -85,7 +87,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
        em->len = SZ_4K;
        em->block_start = SZ_32K; /* avoid merging */
        em->block_len = SZ_4K;
+        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+        write_unlock(&em_tree->lock);
        if (ret < 0) {
                test_err("cannot add extent range [16K, 20K)");
                goto out;
@@ -104,7 +108,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
        em->len = len;
        em->block_start = start;
        em->block_len = len;
+        write_lock(&em_tree->lock);
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
+        write_unlock(&em_tree->lock);
        if (ret) {
                test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
                goto out;
@@ -148,7 +154,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
        em->len = SZ_1K;
        em->block_start = EXTENT_MAP_INLINE;
        em->block_len = (u64)-1;
+        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+        write_unlock(&em_tree->lock);
        if (ret < 0) {
                test_err("cannot add extent range [0, 1K)");
                goto out;
@@ -167,7 +175,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
        em->len = SZ_4K;
        em->block_start = SZ_4K;
        em->block_len = SZ_4K;
+        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+        write_unlock(&em_tree->lock);
        if (ret < 0) {
                test_err("cannot add extent range [4K, 8K)");
                goto out;
@@ -186,7 +196,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
        em->len = SZ_1K;
        em->block_start = EXTENT_MAP_INLINE;
        em->block_len = (u64)-1;
+        write_lock(&em_tree->lock);
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
+        write_unlock(&em_tree->lock);
        if (ret) {
                test_err("case2 [0 1K]: ret %d", ret);
                goto out;
@@ -225,7 +237,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
        em->len = SZ_4K;
        em->block_start = SZ_4K;
        em->block_len = SZ_4K;
+        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+        write_unlock(&em_tree->lock);
        if (ret < 0) {
                test_err("cannot add extent range [4K, 8K)");
                goto out;
@@ -244,7 +258,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
        em->len = SZ_16K;
        em->block_start = 0;
        em->block_len = SZ_16K;
+        write_lock(&em_tree->lock);
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
+        write_unlock(&em_tree->lock);
        if (ret) {
                test_err("case3 [0x%llx 0x%llx): ret %d",
                         start, start + len, ret);
@@ -320,7 +336,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
        em->len = SZ_8K;
        em->block_start = 0;
        em->block_len = SZ_8K;
+        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+        write_unlock(&em_tree->lock);
        if (ret < 0) {
                test_err("cannot add extent range [0, 8K)");
                goto out;
@@ -339,7 +357,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
        em->len = 24 * SZ_1K;
        em->block_start = SZ_16K; /* avoid merging */
        em->block_len = 24 * SZ_1K;
+        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
+        write_unlock(&em_tree->lock);
        if (ret < 0) {
                test_err("cannot add extent range [8K, 32K)");
                goto out;
@@ -357,7 +377,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
        em->len = SZ_32K;
        em->block_start = 0;
        em->block_len = SZ_32K;
+        write_lock(&em_tree->lock);
        ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
+        write_unlock(&em_tree->lock);
        if (ret) {
                test_err("case4 [0x%llx 0x%llx): ret %d",
                         start, len, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3f6811cdf803..3b8ae1a8f02d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -129,6 +129,24 @@ static inline int extwriter_counter_read(struct btrfs_transaction *trans)
 }
 /*
+ * To be called after all the new block groups attached to the transaction
+ * handle have been created (btrfs_create_pending_block_groups()).
+ */
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
+{
+        struct btrfs_fs_info *fs_info = trans->fs_info;
+        if (!trans->chunk_bytes_reserved)
+                return;
+        WARN_ON_ONCE(!list_empty(&trans->new_bgs));
+        btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
+                                trans->chunk_bytes_reserved);
+        trans->chunk_bytes_reserved = 0;
+}
+/*
 * either allocate a new transaction or hop into the existing one
 */
 static noinline int join_transaction(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 78c446c222b7..527ea94b57d9 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -224,5 +224,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction);
 void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
 void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root);
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 #endif
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 96fce4bef4e7..ccd5706199d7 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -132,6 +132,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
        struct btrfs_file_extent_item *fi;
        u32 sectorsize = fs_info->sectorsize;
        u32 item_size = btrfs_item_size_nr(leaf, slot);
+        u64 extent_end;
        if (!IS_ALIGNED(key->offset, sectorsize)) {
                file_extent_err(leaf, slot,
@@ -207,6 +208,16 @@ static int check_extent_data_item(struct extent_buffer *leaf,
            CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
                return -EUCLEAN;
+        /* Catch extent end overflow */
+        if (check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
+                               key->offset, &extent_end)) {
+                file_extent_err(leaf, slot,
+        "extent end overflow, have file offset %llu extent num bytes %llu",
+                                key->offset,
+                                btrfs_file_extent_num_bytes(leaf, fi));
+                return -EUCLEAN;
+        }
        /*
         * Check that no two consecutive file extent items, in the same leaf,
         * present ranges that overlap each other.
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3fc8d854d7fb..6c8297bcfeb7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3323,6 +3323,30 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 }
 /*
+ * Check if an inode was logged in the current transaction. We can't always rely
+ * on an inode's logged_trans value, because it's an in-memory only field and
+ * therefore not persisted. This means that its value is lost if the inode gets
+ * evicted and loaded again from disk (in which case it has a value of 0, and
+ * certainly it is smaller then any possible transaction ID), when that happens
+ * the full_sync flag is set in the inode's runtime flags, so on that case we
+ * assume eviction happened and ignore the logged_trans value, assuming the
+ * worst case, that the inode was logged before in the current transaction.
+ */
+static bool inode_logged(struct btrfs_trans_handle *trans,
+                         struct btrfs_inode *inode)
+{
+        if (inode->logged_trans == trans->transid)
+                return true;
+        if (inode->last_trans == trans->transid &&
+            test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
+            !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
+                return true;
+        return false;
+}
+/*
 * If both a file and directory are logged, and unlinks or renames are
 * mixed in, we have a few interesting corners:
 *
@@ -3356,7 +3380,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
        int bytes_del = 0;
        u64 dir_ino = btrfs_ino(dir);
-        if (dir->logged_trans < trans->transid)
+        if (!inode_logged(trans, dir))
                return 0;
        ret = join_running_log_trans(root);
@@ -3460,7 +3484,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
        u64 index;
        int ret;
-        if (inode->logged_trans < trans->transid)
+        if (!inode_logged(trans, inode))
                return 0;
        ret = join_running_log_trans(root);
@@ -5420,9 +5444,19 @@ log_extents:
                }
        }
+        /*
+         * Don't update last_log_commit if we logged that an inode exists after
+         * it was loaded to memory (full_sync bit set).
+         * This is to prevent data loss when we do a write to the inode, then
+         * the inode gets evicted after all delalloc was flushed, then we log
+         * it exists (due to a rename for example) and then fsync it. This last
+         * fsync would do nothing (not logging the extents previously written).
+         */
        spin_lock(&inode->lock);
        inode->logged_trans = trans->transid;
-        inode->last_log_commit = inode->last_sub_trans;
+        if (inode_only != LOG_INODE_EXISTS ||
+            !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+                inode->last_log_commit = inode->last_sub_trans;
        spin_unlock(&inode->lock);
 out_unlock:
        mutex_unlock(&inode->log_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1c2a6e4b39da..a13ddba1ebc3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -28,6 +28,7 @@
 #include "dev-replace.h"
 #include "sysfs.h"
 #include "tree-checker.h"
+#include "space-info.h"
 const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = {
@@ -123,12 +124,14 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        },
 };
-const char *get_raid_name(enum btrfs_raid_types type)
+const char *btrfs_bg_type_to_raid_name(u64 flags)
 {
-        if (type >= BTRFS_NR_RAID_TYPES)
+        const int index = btrfs_bg_flags_to_raid_index(flags);
+        if (index >= BTRFS_NR_RAID_TYPES)
                return NULL;
-        return btrfs_raid_array[type].raid_name;
+        return btrfs_raid_array[index].raid_name;
 }
 /*
@@ -237,7 +240,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 * chunk_mutex
 * -----------
 * protects chunks, adding or removing during allocation, trim or when a new
- * device is added/removed
+ * device is added/removed. Additionally it also protects post_commit_list of
+ * individual devices, since they can be added to the transaction's
+ * post_commit_list only with chunk_mutex held.
 *
 * cleaner_mutex
 * -------------
@@ -1818,7 +1823,7 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
        struct rb_node *n;
        u64 ret = 0;
-        em_tree = &fs_info->mapping_tree.map_tree;
+        em_tree = &fs_info->mapping_tree;
        read_lock(&em_tree->lock);
        n = rb_last(&em_tree->map.rb_root);
        if (n) {
@@ -2941,7 +2946,7 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
        struct extent_map_tree *em_tree;
        struct extent_map *em;
-        em_tree = &fs_info->mapping_tree.map_tree;
+        em_tree = &fs_info->mapping_tree;
        read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, length);
        read_unlock(&em_tree->lock);
@@ -3474,6 +3479,18 @@ static int chunk_devid_filter(struct extent_buffer *leaf,
        return 1;
 }
+static u64 calc_data_stripes(u64 type, int num_stripes)
+{
+        const int index = btrfs_bg_flags_to_raid_index(type);
+        const int ncopies = btrfs_raid_array[index].ncopies;
+        const int nparity = btrfs_raid_array[index].nparity;
+        if (nparity)
+                return num_stripes - nparity;
+        else
+                return num_stripes / ncopies;
+}
 /* [pstart, pend) */
 static int chunk_drange_filter(struct extent_buffer *leaf,
                               struct btrfs_chunk *chunk,
@@ -3483,22 +3500,15 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
        int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
        u64 stripe_offset;
        u64 stripe_length;
+        u64 type;
        int factor;
        int i;
        if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
                return 0;
-        if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
+        type = btrfs_chunk_type(leaf, chunk);
-             BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) {
+        factor = calc_data_stripes(type, num_stripes);
-                factor = num_stripes / 2;
-        } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) {
-                factor = num_stripes - 1;
-        } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) {
-                factor = num_stripes - 2;
-        } else {
-                factor = num_stripes;
-        }
        for (i = 0; i < num_stripes; i++) {
                stripe = btrfs_stripe_nr(chunk, i);
@@ -3921,11 +3931,9 @@ static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
                bp += ret;                                              \
        } while (0)
-        if (flags & BTRFS_BALANCE_ARGS_CONVERT) {
+        if (flags & BTRFS_BALANCE_ARGS_CONVERT)
-                int index = btrfs_bg_flags_to_raid_index(bargs->target);
+                CHECK_APPEND_1ARG("convert=%s,",
+                                  btrfs_bg_type_to_raid_name(bargs->target));
-                CHECK_APPEND_1ARG("convert=%s,", get_raid_name(index));
-        }
        if (flags & BTRFS_BALANCE_ARGS_SOFT)
                CHECK_APPEND_NOARG("soft,");
@@ -4047,6 +4055,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
        u64 num_devices;
        unsigned seq;
        bool reducing_integrity;
+        int i;
        if (btrfs_fs_closing(fs_info) ||
            atomic_read(&fs_info->balance_pause_req) ||
@@ -4076,48 +4085,43 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
        }
        num_devices = btrfs_num_devices(fs_info);
+        allowed = 0;
+        for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
+                if (num_devices >= btrfs_raid_array[i].devs_min)
+                        allowed |= btrfs_raid_array[i].bg_flag;
-        allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
-        if (num_devices > 1)
-                allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
-        if (num_devices > 2)
-                allowed |= BTRFS_BLOCK_GROUP_RAID5;
-        if (num_devices > 3)
-                allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
-                            BTRFS_BLOCK_GROUP_RAID6);
        if (validate_convert_profile(&bctl->data, allowed)) {
-                int index = btrfs_bg_flags_to_raid_index(bctl->data.target);
                btrfs_err(fs_info,
                          "balance: invalid convert data profile %s",
-                          get_raid_name(index));
+                          btrfs_bg_type_to_raid_name(bctl->data.target));
                ret = -EINVAL;
                goto out;
        }
        if (validate_convert_profile(&bctl->meta, allowed)) {
-                int index = btrfs_bg_flags_to_raid_index(bctl->meta.target);
                btrfs_err(fs_info,
                          "balance: invalid convert metadata profile %s",
-                          get_raid_name(index));
+                          btrfs_bg_type_to_raid_name(bctl->meta.target));
                ret = -EINVAL;
                goto out;
        }
        if (validate_convert_profile(&bctl->sys, allowed)) {
-                int index = btrfs_bg_flags_to_raid_index(bctl->sys.target);
                btrfs_err(fs_info,
                          "balance: invalid convert system profile %s",
-                          get_raid_name(index));
+                          btrfs_bg_type_to_raid_name(bctl->sys.target));
                ret = -EINVAL;
                goto out;
        }
-        /* allow to reduce meta or sys integrity only if force set */
+        /*
-        allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+         * Allow to reduce metadata or system integrity only if force set for
-                        BTRFS_BLOCK_GROUP_RAID10 |
+         * profiles with redundancy (copies, parity)
-                        BTRFS_BLOCK_GROUP_RAID5 |
+         */
-                        BTRFS_BLOCK_GROUP_RAID6;
+        allowed = 0;
+        for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
+                if (btrfs_raid_array[i].ncopies >= 2 ||
+                    btrfs_raid_array[i].tolerated_failures >= 1)
+                        allowed |= btrfs_raid_array[i].bg_flag;
+        }
        do {
                seq = read_seqbegin(&fs_info->profiles_lock);
@@ -4152,12 +4156,18 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
        if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
                btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
-                int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
-                int data_index = btrfs_bg_flags_to_raid_index(data_target);
                btrfs_warn(fs_info,
        "balance: metadata profile %s has lower redundancy than data profile %s",
-                           get_raid_name(meta_index), get_raid_name(data_index));
+                                btrfs_bg_type_to_raid_name(meta_target),
+                                btrfs_bg_type_to_raid_name(data_target));
+        }
+        if (fs_info->send_in_progress) {
+                btrfs_warn_rl(fs_info,
+"cannot run balance while send operations are in progress (%d in progress)",
+                              fs_info->send_in_progress);
+                ret = -EAGAIN;
+                goto out;
        }
        ret = insert_balance_item(fs_info, bctl);
@@ -4949,6 +4959,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        sub_stripes = btrfs_raid_array[index].sub_stripes;
        dev_stripes = btrfs_raid_array[index].dev_stripes;
        devs_max = btrfs_raid_array[index].devs_max;
+        if (!devs_max)
+                devs_max = BTRFS_MAX_DEVS(info);
        devs_min = btrfs_raid_array[index].devs_min;
        devs_increment = btrfs_raid_array[index].devs_increment;
        ncopies = btrfs_raid_array[index].ncopies;
@@ -4957,8 +4969,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        if (type & BTRFS_BLOCK_GROUP_DATA) {
                max_stripe_size = SZ_1G;
                max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
-                if (!devs_max)
-                        devs_max = BTRFS_MAX_DEVS(info);
        } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
                /* for larger filesystems, use larger metadata chunks */
                if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
@@ -4966,13 +4976,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                else
                        max_stripe_size = SZ_256M;
                max_chunk_size = max_stripe_size;
-                if (!devs_max)
-                        devs_max = BTRFS_MAX_DEVS(info);
        } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
                max_stripe_size = SZ_32M;
                max_chunk_size = 2 * max_stripe_size;
-                if (!devs_max)
-                        devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
        } else {
                btrfs_err(info, "invalid chunk type 0x%llx requested",
                       type);
@@ -5143,7 +5149,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        em->block_len = em->len;
        em->orig_block_len = stripe_size;
-        em_tree = &info->mapping_tree.map_tree;
+        em_tree = &info->mapping_tree;
        write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em, 0);
        if (ret) {
@@ -5324,20 +5330,9 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
 static inline int btrfs_chunk_max_errors(struct map_lookup *map)
 {
-        int max_errors;
+        const int index = btrfs_bg_flags_to_raid_index(map->type);
-        if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+        return btrfs_raid_array[index].tolerated_failures;
-                         BTRFS_BLOCK_GROUP_RAID10 |
-                         BTRFS_BLOCK_GROUP_RAID5 |
-                         BTRFS_BLOCK_GROUP_DUP)) {
-                max_errors = 1;
-        } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) {
-                max_errors = 2;
-        } else {
-                max_errors = 0;
-        }
-        return max_errors;
 }
 int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
@@ -5378,21 +5373,16 @@ end:
        return readonly;
 }
-void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
+void btrfs_mapping_tree_free(struct extent_map_tree *tree)
-{
-        extent_map_tree_init(&tree->map_tree);
-}
-void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
 {
        struct extent_map *em;
        while (1) {
-                write_lock(&tree->map_tree.lock);
+                write_lock(&tree->lock);
-                em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
+                em = lookup_extent_mapping(tree, 0, (u64)-1);
                if (em)
-                        remove_extent_mapping(&tree->map_tree, em);
+                        remove_extent_mapping(tree, em);
-                write_unlock(&tree->map_tree.lock);
+                write_unlock(&tree->lock);
                if (!em)
                        break;
                /* once for us */
@@ -5419,7 +5409,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
                return 1;
        map = em->map_lookup;
-        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
+        if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
                ret = map->num_stripes;
        else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
                ret = map->sub_stripes;
@@ -5493,7 +5483,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
        struct btrfs_device *srcdev;
        ASSERT((map->type &
-                 (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
+                 (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
        if (map->type & BTRFS_BLOCK_GROUP_RAID10)
                num_stripes = map->sub_stripes;
@@ -5682,7 +5672,7 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
                                              &remaining_stripes);
                div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
                last_stripe *= sub_stripes;
-        } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+        } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
                                BTRFS_BLOCK_GROUP_DUP)) {
                num_stripes = map->num_stripes;
        } else {
@@ -5926,6 +5916,102 @@ static bool need_full_stripe(enum btrfs_map_op op)
        return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
 }
+/*
+ * btrfs_get_io_geometry - calculates the geomery of a particular (address, len)
+ *                     tuple. This information is used to calculate how big a
+ *                     particular bio can get before it straddles a stripe.
+ *
+ * @fs_info - the filesystem
+ * @logical - address that we want to figure out the geometry of
+ * @len     - the length of IO we are going to perform, starting at @logical
+ * @op      - type of operation - write or read
+ * @io_geom - pointer used to return values
+ *
+ * Returns < 0 in case a chunk for the given logical address cannot be found,
+ * usually shouldn't happen unless @logical is corrupted, 0 otherwise.
+ */
+int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+                        u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
+{
+        struct extent_map *em;
+        struct map_lookup *map;
+        u64 offset;
+        u64 stripe_offset;
+        u64 stripe_nr;
+        u64 stripe_len;
+        u64 raid56_full_stripe_start = (u64)-1;
+        int data_stripes;
+        ASSERT(op != BTRFS_MAP_DISCARD);
+        em = btrfs_get_chunk_map(fs_info, logical, len);
+        if (IS_ERR(em))
+                return PTR_ERR(em);
+        map = em->map_lookup;
+        /* Offset of this logical address in the chunk */
+        offset = logical - em->start;
+        /* Len of a stripe in a chunk */
+        stripe_len = map->stripe_len;
+        /* Stripe wher this block falls in */
+        stripe_nr = div64_u64(offset, stripe_len);
+        /* Offset of stripe in the chunk */
+        stripe_offset = stripe_nr * stripe_len;
+        if (offset < stripe_offset) {
+                btrfs_crit(fs_info,
+"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
+                        stripe_offset, offset, em->start, logical, stripe_len);
+                free_extent_map(em);
+                return -EINVAL;
+        }
+        /* stripe_offset is the offset of this block in its stripe */
+        stripe_offset = offset - stripe_offset;
+        data_stripes = nr_data_stripes(map);
+        if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+                u64 max_len = stripe_len - stripe_offset;
+                /*
+                 * In case of raid56, we need to know the stripe aligned start
+                 */
+                if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+                        unsigned long full_stripe_len = stripe_len * data_stripes;
+                        raid56_full_stripe_start = offset;
+                        /*
+                         * Allow a write of a full stripe, but make sure we
+                         * don't allow straddling of stripes
+                         */
+                        raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
+                                        full_stripe_len);
+                        raid56_full_stripe_start *= full_stripe_len;
+                        /*
+                         * For writes to RAID[56], allow a full stripeset across
+                         * all disks. For other RAID types and for RAID[56]
+                         * reads, just allow a single stripe (on a single disk).
+                         */
+                        if (op == BTRFS_MAP_WRITE) {
+                                max_len = stripe_len * data_stripes -
+                                          (offset - raid56_full_stripe_start);
+                        }
+                }
+                len = min_t(u64, em->len - offset, max_len);
+        } else {
+                len = em->len - offset;
+        }
+        io_geom->len = len;
+        io_geom->offset = offset;
+        io_geom->stripe_len = stripe_len;
+        io_geom->stripe_nr = stripe_nr;
+        io_geom->stripe_offset = stripe_offset;
+        io_geom->raid56_stripe_offset = raid56_full_stripe_start;
+        return 0;
+}
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                             enum btrfs_map_op op,
                             u64 logical, u64 *length,
@@ -5939,6 +6025,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
        u64 stripe_nr;
        u64 stripe_len;
        u32 stripe_index;
+        int data_stripes;
        int i;
        int ret = 0;
        int num_stripes;
@@ -5951,76 +6038,29 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
        int patch_the_first_stripe_for_dev_replace = 0;
        u64 physical_to_patch_in_first_stripe = 0;
        u64 raid56_full_stripe_start = (u64)-1;
+        struct btrfs_io_geometry geom;
+        ASSERT(bbio_ret);
        if (op == BTRFS_MAP_DISCARD)
                return __btrfs_map_block_for_discard(fs_info, logical,
                                                     *length, bbio_ret);
-        em = btrfs_get_chunk_map(fs_info, logical, *length);
+        ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
-        if (IS_ERR(em))
+        if (ret < 0)
-                return PTR_ERR(em);
+                return ret;
+        em = btrfs_get_chunk_map(fs_info, logical, *length);
+        ASSERT(em);
        map = em->map_lookup;
-        offset = logical - em->start;
-        stripe_len = map->stripe_len;
-        stripe_nr = offset;
-        /*
-         * stripe_nr counts the total number of stripes we have to stride
-         * to get to this block
-         */
-        stripe_nr = div64_u64(stripe_nr, stripe_len);
-        stripe_offset = stripe_nr * stripe_len;
-        if (offset < stripe_offset) {
-                btrfs_crit(fs_info,
-                           "stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu",
-                           stripe_offset, offset, em->start, logical,
-                           stripe_len);
-                free_extent_map(em);
-                return -EINVAL;
-        }
-        /* stripe_offset is the offset of this block in its stripe*/
-        stripe_offset = offset - stripe_offset;
-        /* if we're here for raid56, we need to know the stripe aligned start */
-        if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-                unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
-                raid56_full_stripe_start = offset;
-                /* allow a write of a full stripe, but make sure we don't
+        *length = geom.len;
-                 * allow straddling of stripes
+        offset = geom.offset;
-                 */
+        stripe_len = geom.stripe_len;
-                raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
+        stripe_nr = geom.stripe_nr;
-                                full_stripe_len);
+        stripe_offset = geom.stripe_offset;
-                raid56_full_stripe_start *= full_stripe_len;
+        raid56_full_stripe_start = geom.raid56_stripe_offset;
-        }
+        data_stripes = nr_data_stripes(map);
-        if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
-                u64 max_len;
-                /* For writes to RAID[56], allow a full stripeset across all disks.
-                   For other RAID types and for RAID[56] reads, just allow a single
-                   stripe (on a single disk). */
-                if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
-                    (op == BTRFS_MAP_WRITE)) {
-                        max_len = stripe_len * nr_data_stripes(map) -
-                                (offset - raid56_full_stripe_start);
-                } else {
-                        /* we limit the length of each bio to what fits in a stripe */
-                        max_len = stripe_len - stripe_offset;
-                }
-                *length = min_t(u64, em->len - offset, max_len);
-        } else {
-                *length = em->len - offset;
-        }
-        /*
-         * This is for when we're called from btrfs_bio_fits_in_stripe and all
-         * it cares about is the length
-         */
-        if (!bbio_ret)
-                goto out;
        down_read(&dev_replace->rwsem);
        dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
@@ -6052,7 +6092,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                                &stripe_index);
                if (!need_full_stripe(op))
                        mirror_num = 1;
-        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
                if (need_full_stripe(op))
                        num_stripes = map->num_stripes;
                else if (mirror_num)
@@ -6094,7 +6134,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
                        /* push stripe_nr back to the start of the full stripe */
                        stripe_nr = div64_u64(raid56_full_stripe_start,
-                                        stripe_len * nr_data_stripes(map));
+                                        stripe_len * data_stripes);
                        /* RAID[56] write or recovery. Return all stripes */
                        num_stripes = map->num_stripes;
@@ -6110,10 +6150,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                         * Mirror #3 is RAID6 Q block.
                         */
                        stripe_nr = div_u64_rem(stripe_nr,
-                                        nr_data_stripes(map), &stripe_index);
+                                        data_stripes, &stripe_index);
                        if (mirror_num > 1)
-                                stripe_index = nr_data_stripes(map) +
+                                stripe_index = data_stripes + mirror_num - 2;
-                                                mirror_num - 2;
                        /* We distribute the parity blocks across stripes */
                        div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
@@ -6171,8 +6210,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
                div_u64_rem(stripe_nr, num_stripes, &rot);
                /* Fill in the logical address of each stripe */
-                tmp = stripe_nr * nr_data_stripes(map);
+                tmp = stripe_nr * data_stripes;
-                for (i = 0; i < nr_data_stripes(map); i++)
+                for (i = 0; i < data_stripes; i++)
                        bbio->raid_map[(i+rot) % num_stripes] =
                                em->start + (tmp + i) * map->stripe_len;
@@ -6687,7 +6726,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
                          struct btrfs_chunk *chunk)
 {
        struct btrfs_fs_info *fs_info = leaf->fs_info;
-        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+        struct extent_map_tree *map_tree = &fs_info->mapping_tree;
        struct map_lookup *map;
        struct extent_map *em;
        u64 logical;
@@ -6712,9 +6751,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
                        return ret;
        }
-        read_lock(&map_tree->map_tree.lock);
+        read_lock(&map_tree->lock);
-        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
+        em = lookup_extent_mapping(map_tree, logical, 1);
-        read_unlock(&map_tree->map_tree.lock);
+        read_unlock(&map_tree->lock);
        /* already mapped? */
        if (em && em->start <= logical && em->start + em->len > logical) {
@@ -6783,9 +6822,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
        }
-        write_lock(&map_tree->map_tree.lock);
+        write_lock(&map_tree->lock);
-        ret = add_extent_mapping(&map_tree->map_tree, em, 0);
+        ret = add_extent_mapping(map_tree, em, 0);
-        write_unlock(&map_tree->map_tree.lock);
+        write_unlock(&map_tree->lock);
        if (ret < 0) {
                btrfs_err(fs_info,
                          "failed to add chunk map, start=%llu len=%llu: %d",
@@ -7103,14 +7142,14 @@ out_short_read:
 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
                                        struct btrfs_device *failing_dev)
 {
-        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+        struct extent_map_tree *map_tree = &fs_info->mapping_tree;
        struct extent_map *em;
        u64 next_start = 0;
        bool ret = true;
-        read_lock(&map_tree->map_tree.lock);
+        read_lock(&map_tree->lock);
-        em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1);
+        em = lookup_extent_mapping(map_tree, 0, (u64)-1);
-        read_unlock(&map_tree->map_tree.lock);
+        read_unlock(&map_tree->lock);
        /* No chunk at all? Return false anyway */
        if (!em) {
                ret = false;
@@ -7148,10 +7187,10 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
                next_start = extent_map_end(em);
                free_extent_map(em);
-                read_lock(&map_tree->map_tree.lock);
+                read_lock(&map_tree->lock);
-                em = lookup_extent_mapping(&map_tree->map_tree, next_start,
+                em = lookup_extent_mapping(map_tree, next_start,
                                           (u64)(-1) - next_start);
-                read_unlock(&map_tree->map_tree.lock);
+                read_unlock(&map_tree->lock);
        }
 out:
        return ret;
@@ -7600,10 +7639,9 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
 */
 int btrfs_bg_type_to_factor(u64 flags)
 {
-        if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+        const int index = btrfs_bg_flags_to_raid_index(flags);
-                     BTRFS_BLOCK_GROUP_RAID10))
-                return 2;
+        return btrfs_raid_array[index].ncopies;
-        return 1;
 }
@@ -7612,7 +7650,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
                                 u64 chunk_offset, u64 devid,
                                 u64 physical_offset, u64 physical_len)
 {
-        struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+        struct extent_map_tree *em_tree = &fs_info->mapping_tree;
        struct extent_map *em;
        struct map_lookup *map;
        struct btrfs_device *dev;
@@ -7701,7 +7739,7 @@ out:
 static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
 {
-        struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+        struct extent_map_tree *em_tree = &fs_info->mapping_tree;
        struct extent_map *em;
        struct rb_node *node;
        int ret = 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 136a3eb64604..7f6aa1816409 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,6 +23,21 @@ struct btrfs_pending_bios {
        struct bio *tail;
 };
+struct btrfs_io_geometry {
+        /* remaining bytes before crossing a stripe */
+        u64 len;
+        /* offset of logical address in chunk */
+        u64 offset;
+        /* length of single IO stripe */
+        u64 stripe_len;
+        /* number of stripe where address falls */
+        u64 stripe_nr;
+        /* offset of address in stripe */
+        u64 stripe_offset;
+        /* offset of raid56 stripe into the chunk */
+        u64 raid56_stripe_offset;
+};
 /*
 * Use sequence counter to get consistent device stat data on
 * 32-bit processors.
@@ -43,8 +58,8 @@ struct btrfs_pending_bios {
 #define BTRFS_DEV_STATE_FLUSH_SENT      (4)
 struct btrfs_device {
-        struct list_head dev_list;
+        struct list_head dev_list; /* device_list_mutex */
-        struct list_head dev_alloc_list;
+        struct list_head dev_alloc_list; /* chunk mutex */
        struct list_head post_commit_list; /* chunk mutex */
        struct btrfs_fs_devices *fs_devices;
        struct btrfs_fs_info *fs_info;
@@ -229,9 +244,14 @@ struct btrfs_fs_devices {
         * this mutex lock.
         */
        struct mutex device_list_mutex;
+        /* List of all devices, protected by device_list_mutex */
        struct list_head devices;
-        /* devices not currently being allocated */
+        /*
+         * Devices which can satisfy space allocation. Protected by
+         * chunk_mutex
+         */
        struct list_head alloc_list;
        struct btrfs_fs_devices *seed;
@@ -336,16 +356,16 @@ struct btrfs_device_info {
 };
 struct btrfs_raid_attr {
-        int sub_stripes;        /* sub_stripes info for map */
+        u8 sub_stripes;         /* sub_stripes info for map */
-        int dev_stripes;        /* stripes per dev */
+        u8 dev_stripes;         /* stripes per dev */
-        int devs_max;           /* max devs to use */
+        u8 devs_max;            /* max devs to use */
-        int devs_min;           /* min devs needed */
+        u8 devs_min;            /* min devs needed */
-        int tolerated_failures; /* max tolerated fail devs */
+        u8 tolerated_failures;  /* max tolerated fail devs */
-        int devs_increment;     /* ndevs has to be a multiple of this */
+        u8 devs_increment;      /* ndevs has to be a multiple of this */
-        int ncopies;            /* how many copies to data has */
+        u8 ncopies;             /* how many copies to data has */
-        int nparity;            /* number of stripes worth of bytes to store
+        u8 nparity;             /* number of stripes worth of bytes to store
                                 * parity information */
-        int mindev_error;       /* error code if min devs requisite is unmet */
+        u8 mindev_error;        /* error code if min devs requisite is unmet */
        const char raid_name[8]; /* name of the raid */
        u64 bg_flag;            /* block group flag of the raid */
 };
@@ -408,13 +428,14 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                     u64 logical, u64 *length,
                     struct btrfs_bio **bbio_ret);
+int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
+                u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
 int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
                     u64 physical, u64 **logical, int *naddrs, int *stripe_len);
 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
-void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
+void btrfs_mapping_tree_free(struct extent_map_tree *tree);
-void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
 blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
                           int mirror_num, int async_submit);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
@@ -557,8 +578,6 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
        return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
 }
-const char *get_raid_name(enum btrfs_raid_types type);
 void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
 struct list_head *btrfs_get_fs_uuids(void);
@@ -568,6 +587,7 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
                                        struct btrfs_device *failing_dev);
 int btrfs_bg_type_to_factor(u64 flags);
+const char *btrfs_bg_type_to_raid_name(u64 flags);
 int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
 #endif