Merge branch 'raid56-scrub-replace' of git://github.com/miaoxie/linux-btrfs into for-linus

author: Chris Mason <clm@fb.com> 2014-12-02 21:42:03 -0500
committer: Chris Mason <clm@fb.com> 2014-12-02 21:42:03 -0500
commit: 9627aeee3e203e30679549e4962633698a6bf87f (patch)
tree: 30ee313a7049bf3fcc17e346df5737e967fd9a95 /fs/btrfs
parent: cb83b7b81698a4abe531e0ba18b9e288b06947ce (diff)
parent: 5d3edd8f44aac94de7b16f4c54290e24f5e8c532 (diff)
10 files changed, 1556 insertions, 148 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 817234168a7f..14a72ed14ef7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
 {
        int i;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-        /* lockdep really cares that we take all of these spinlocks
-         * in the right order.  If any of the locks in the path are not
-         * currently blocking, it is going to complain.  So, make really
-         * really sure by forcing the path to blocking before we clear
-         * the path blocking.
-         */
        if (held) {
                btrfs_set_lock_blocking_rw(held, held_rw);
                if (held_rw == BTRFS_WRITE_LOCK)
@@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
                        held_rw = BTRFS_READ_LOCK_BLOCKING;
        }
        btrfs_set_path_blocking(p);
-#endif
        for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
                if (p->nodes[i] && p->locks[i]) {
@@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
                }
        }
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (held)
                btrfs_clear_lock_blocking_rw(held, held_rw);
-#endif
 }
 /* this also releases the path */
@@ -2893,7 +2883,7 @@ cow_done:
                                        }
                                        p->locks[level] = BTRFS_WRITE_LOCK;
                                } else {
-                                        err = btrfs_try_tree_read_lock(b);
+                                        err = btrfs_tree_read_lock_atomic(b);
                                        if (!err) {
                                                btrfs_set_path_blocking(p);
                                                btrfs_tree_read_lock(b);
@@ -3025,7 +3015,7 @@ again:
                        }
                        level = btrfs_header_level(b);
-                        err = btrfs_try_tree_read_lock(b);
+                        err = btrfs_tree_read_lock_atomic(b);
                        if (!err) {
                                btrfs_set_path_blocking(p);
                                btrfs_tree_read_lock(b);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d71915e04e92..e6fbbd74b716 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4167,7 +4167,12 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
 /* dev-replace.c */
 void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
 void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info);
-void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info);
+void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount);
+static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
+{
+        btrfs_bio_counter_sub(fs_info, 1);
+}
 /* reada.c */
 struct reada_control {
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 3fbd0628620b..ca6a3a3b6b6c 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -316,11 +316,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        struct btrfs_device *tgt_device = NULL;
        struct btrfs_device *src_device = NULL;
-        if (btrfs_fs_incompat(fs_info, RAID56)) {
-                btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
-                return -EOPNOTSUPP;
-        }
        switch (args->start.cont_reading_from_srcdev_mode) {
        case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
        case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
@@ -927,9 +922,9 @@ void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
        percpu_counter_inc(&fs_info->bio_counter);
 }
-void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
+void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
 {
-        percpu_counter_dec(&fs_info->bio_counter);
+        percpu_counter_sub(&fs_info->bio_counter, amount);
        if (waitqueue_active(&fs_info->replace_wait))
                wake_up(&fs_info->replace_wait);
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5665d2149249..f8229ef1b46d 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -128,6 +128,26 @@ again:
 }
 /*
+ * take a spinning read lock.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
+ */
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
+{
+        if (atomic_read(&eb->blocking_writers))
+                return 0;
+        read_lock(&eb->lock);
+        if (atomic_read(&eb->blocking_writers)) {
+                read_unlock(&eb->lock);
+                return 0;
+        }
+        atomic_inc(&eb->read_locks);
+        atomic_inc(&eb->spinning_readers);
+        return 1;
+}
+/*
 * returns 1 if we get the read lock and 0 if we don't
 * this won't wait for blocking writers
 */
@@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
            atomic_read(&eb->blocking_readers))
                return 0;
-        if (!write_trylock(&eb->lock))
+        write_lock(&eb->lock);
-                return 0;
        if (atomic_read(&eb->blocking_writers) ||
            atomic_read(&eb->blocking_readers)) {
                write_unlock(&eb->lock);
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index b81e0e9a4894..c44a9d5f5362 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
 void btrfs_assert_tree_locked(struct extent_buffer *eb);
 int btrfs_try_tree_read_lock(struct extent_buffer *eb);
 int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
 static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
 {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 6a41631cb959..8ab2a17bbba8 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -58,9 +58,23 @@
 */
 #define RBIO_CACHE_READY_BIT    3
+/*
+ * bbio and raid_map is managed by the caller, so we shouldn't free
+ * them here. And besides that, all rbios with this flag should not
+ * be cached, because we need raid_map to check the rbios' stripe
+ * is the same or not, but it is very likely that the caller has
+ * free raid_map, so don't cache those rbios.
+ */
+#define RBIO_HOLD_BBIO_MAP_BIT  4
 #define RBIO_CACHE_SIZE 1024
+enum btrfs_rbio_ops {
+        BTRFS_RBIO_WRITE        = 0,
+        BTRFS_RBIO_READ_REBUILD = 1,
+        BTRFS_RBIO_PARITY_SCRUB = 2,
+};
 struct btrfs_raid_bio {
        struct btrfs_fs_info *fs_info;
        struct btrfs_bio *bbio;
@@ -117,13 +131,16 @@ struct btrfs_raid_bio {
        /* number of data stripes (no p/q) */
        int nr_data;
+        int real_stripes;
+        int stripe_npages;
        /*
         * set if we're doing a parity rebuild
         * for a read from higher up, which is handled
         * differently from a parity rebuild as part of
         * rmw
         */
-        int read_rebuild;
+        enum btrfs_rbio_ops operation;
        /* first bad stripe */
        int faila;
@@ -131,6 +148,7 @@ struct btrfs_raid_bio {
        /* second bad stripe (for raid6 use) */
        int failb;
+        int scrubp;
        /*
         * number of pages needed to represent the full
         * stripe
@@ -144,8 +162,13 @@ struct btrfs_raid_bio {
         */
        int bio_list_bytes;
+        int generic_bio_cnt;
        atomic_t refs;
+        atomic_t stripes_pending;
+        atomic_t error;
        /*
         * these are two arrays of pointers.  We allocate the
         * rbio big enough to hold them both and setup their
@@ -162,6 +185,11 @@ struct btrfs_raid_bio {
         * here for faster lookup
         */
        struct page **bio_pages;
+        /*
+         * bitmap to record which horizontal stripe has data
+         */
+        unsigned long *dbitmap;
 };
 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
@@ -176,6 +204,10 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio);
 static void index_rbio_pages(struct btrfs_raid_bio *rbio);
 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
+                                         int need_check);
+static void async_scrub_parity(struct btrfs_raid_bio *rbio);
 /*
 * the stripe hash table is used for locking, and to collect
 * bios in hopes of making a full stripe
@@ -324,6 +356,7 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
 {
        bio_list_merge(&dest->bio_list, &victim->bio_list);
        dest->bio_list_bytes += victim->bio_list_bytes;
+        dest->generic_bio_cnt += victim->generic_bio_cnt;
        bio_list_init(&victim->bio_list);
 }
@@ -577,11 +610,20 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
            cur->raid_map[0])
                return 0;
-        /* reads can't merge with writes */
+        /* we can't merge with different operations */
-        if (last->read_rebuild !=
+        if (last->operation != cur->operation)
-            cur->read_rebuild) {
+                return 0;
+        /*
+         * We've need read the full stripe from the drive.
+         * check and repair the parity and write the new results.
+         *
+         * We're not allowed to add any new bios to the
+         * bio list here, anyone else that wants to
+         * change this stripe needs to do their own rmw.
+         */
+        if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
+            cur->operation == BTRFS_RBIO_PARITY_SCRUB)
                return 0;
-        }
        return 1;
 }
@@ -601,7 +643,7 @@ static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
 */
 static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
-        if (rbio->nr_data + 1 == rbio->bbio->num_stripes)
+        if (rbio->nr_data + 1 == rbio->real_stripes)
                return NULL;
        index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
@@ -772,11 +814,14 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
                        spin_unlock(&rbio->bio_list_lock);
                        spin_unlock_irqrestore(&h->lock, flags);
-                        if (next->read_rebuild)
+                        if (next->operation == BTRFS_RBIO_READ_REBUILD)
                                async_read_rebuild(next);
-                        else {
+                        else if (next->operation == BTRFS_RBIO_WRITE) {
                                steal_rbio(rbio, next);
                                async_rmw_stripe(next);
+                        } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
+                                steal_rbio(rbio, next);
+                                async_scrub_parity(next);
                        }
                        goto done_nolock;
@@ -796,6 +841,21 @@ done_nolock:
                remove_rbio_from_cache(rbio);
 }
+static inline void
+__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
+{
+        if (need) {
+                kfree(raid_map);
+                kfree(bbio);
+        }
+}
+static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
+{
+        __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
+                        !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
+}
 static void __free_raid_bio(struct btrfs_raid_bio *rbio)
 {
        int i;
@@ -814,8 +874,9 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
                        rbio->stripe_pages[i] = NULL;
                }
        }
-        kfree(rbio->raid_map);
-        kfree(rbio->bbio);
+        free_bbio_and_raid_map(rbio);
        kfree(rbio);
 }
@@ -833,6 +894,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
 {
        struct bio *cur = bio_list_get(&rbio->bio_list);
        struct bio *next;
+        if (rbio->generic_bio_cnt)
+                btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
        free_raid_bio(rbio);
        while (cur) {
@@ -858,13 +923,13 @@ static void raid_write_end_io(struct bio *bio, int err)
        bio_put(bio);
-        if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
+        if (!atomic_dec_and_test(&rbio->stripes_pending))
                return;
        err = 0;
        /* OK, we have read all the stripes we need to. */
-        if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
+        if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
                err = -EIO;
        rbio_orig_end_io(rbio, err, 0);
@@ -925,16 +990,16 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
 {
        struct btrfs_raid_bio *rbio;
        int nr_data = 0;
-        int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes);
+        int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
+        int num_pages = rbio_nr_pages(stripe_len, real_stripes);
+        int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
        void *p;
-        rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2,
+        rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
+                       DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
                        GFP_NOFS);
-        if (!rbio) {
+        if (!rbio)
-                kfree(raid_map);
-                kfree(bbio);
                return ERR_PTR(-ENOMEM);
-        }
        bio_list_init(&rbio->bio_list);
        INIT_LIST_HEAD(&rbio->plug_list);
@@ -946,9 +1011,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
        rbio->fs_info = root->fs_info;
        rbio->stripe_len = stripe_len;
        rbio->nr_pages = num_pages;
+        rbio->real_stripes = real_stripes;
+        rbio->stripe_npages = stripe_npages;
        rbio->faila = -1;
        rbio->failb = -1;
        atomic_set(&rbio->refs, 1);
+        atomic_set(&rbio->error, 0);
+        atomic_set(&rbio->stripes_pending, 0);
        /*
         * the stripe_pages and bio_pages array point to the extra
@@ -957,11 +1026,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
        p = rbio + 1;
        rbio->stripe_pages = p;
        rbio->bio_pages = p + sizeof(struct page *) * num_pages;
+        rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
-        if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
+        if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE)
-                nr_data = bbio->num_stripes - 2;
+                nr_data = real_stripes - 2;
        else
-                nr_data = bbio->num_stripes - 1;
+                nr_data = real_stripes - 1;
        rbio->nr_data = nr_data;
        return rbio;
@@ -1073,7 +1143,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
 static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
 {
        if (rbio->faila >= 0 || rbio->failb >= 0) {
-                BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1);
+                BUG_ON(rbio->faila == rbio->real_stripes - 1);
                __raid56_parity_recover(rbio);
        } else {
                finish_rmw(rbio);
@@ -1134,7 +1204,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
 static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 {
        struct btrfs_bio *bbio = rbio->bbio;
-        void *pointers[bbio->num_stripes];
+        void *pointers[rbio->real_stripes];
        int stripe_len = rbio->stripe_len;
        int nr_data = rbio->nr_data;
        int stripe;
@@ -1148,11 +1218,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        bio_list_init(&bio_list);
-        if (bbio->num_stripes - rbio->nr_data == 1) {
+        if (rbio->real_stripes - rbio->nr_data == 1) {
-                p_stripe = bbio->num_stripes - 1;
+                p_stripe = rbio->real_stripes - 1;
-        } else if (bbio->num_stripes - rbio->nr_data == 2) {
+        } else if (rbio->real_stripes - rbio->nr_data == 2) {
-                p_stripe = bbio->num_stripes - 2;
+                p_stripe = rbio->real_stripes - 2;
-                q_stripe = bbio->num_stripes - 1;
+                q_stripe = rbio->real_stripes - 1;
        } else {
                BUG();
        }
@@ -1169,7 +1239,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
        set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
        spin_unlock_irq(&rbio->bio_list_lock);
-        atomic_set(&rbio->bbio->error, 0);
+        atomic_set(&rbio->error, 0);
        /*
         * now that we've set rmw_locked, run through the
@@ -1209,7 +1279,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
                        SetPageUptodate(p);
                        pointers[stripe++] = kmap(p);
-                        raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE,
+                        raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
                                                pointers);
                } else {
                        /* raid5 */
@@ -1218,7 +1288,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
                }
-                for (stripe = 0; stripe < bbio->num_stripes; stripe++)
+                for (stripe = 0; stripe < rbio->real_stripes; stripe++)
                        kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
        }
@@ -1227,7 +1297,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
         * higher layers (the bio_list in our rbio) and our p/q.  Ignore
         * everything else.
         */
-        for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
+        for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
                for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
                        struct page *page;
                        if (stripe < rbio->nr_data) {
@@ -1245,8 +1315,34 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
                }
        }
-        atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list));
+        if (likely(!bbio->num_tgtdevs))
-        BUG_ON(atomic_read(&bbio->stripes_pending) == 0);
+                goto write_data;
+        for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+                if (!bbio->tgtdev_map[stripe])
+                        continue;
+                for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+                        struct page *page;
+                        if (stripe < rbio->nr_data) {
+                                page = page_in_rbio(rbio, stripe, pagenr, 1);
+                                if (!page)
+                                        continue;
+                        } else {
+                               page = rbio_stripe_page(rbio, stripe, pagenr);
+                        }
+                        ret = rbio_add_io_page(rbio, &bio_list, page,
+                                               rbio->bbio->tgtdev_map[stripe],
+                                               pagenr, rbio->stripe_len);
+                        if (ret)
+                                goto cleanup;
+                }
+        }
+write_data:
+        atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
+        BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
        while (1) {
                bio = bio_list_pop(&bio_list);
@@ -1283,7 +1379,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
                stripe = &rbio->bbio->stripes[i];
                stripe_start = stripe->physical;
                if (physical >= stripe_start &&
-                    physical < stripe_start + rbio->stripe_len) {
+                    physical < stripe_start + rbio->stripe_len &&
+                    bio->bi_bdev == stripe->dev->bdev) {
                        return i;
                }
        }
@@ -1331,11 +1428,11 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
        if (rbio->faila == -1) {
                /* first failure on this rbio */
                rbio->faila = failed;
-                atomic_inc(&rbio->bbio->error);
+                atomic_inc(&rbio->error);
        } else if (rbio->failb == -1) {
                /* second failure on this rbio */
                rbio->failb = failed;
-                atomic_inc(&rbio->bbio->error);
+                atomic_inc(&rbio->error);
        } else {
                ret = -EIO;
        }
@@ -1394,11 +1491,11 @@ static void raid_rmw_end_io(struct bio *bio, int err)
        bio_put(bio);
-        if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
+        if (!atomic_dec_and_test(&rbio->stripes_pending))
                return;
        err = 0;
-        if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
+        if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
                goto cleanup;
        /*
@@ -1439,7 +1536,6 @@ static void async_read_rebuild(struct btrfs_raid_bio *rbio)
 static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 {
        int bios_to_read = 0;
-        struct btrfs_bio *bbio = rbio->bbio;
        struct bio_list bio_list;
        int ret;
        int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
@@ -1455,7 +1551,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
        index_rbio_pages(rbio);
-        atomic_set(&rbio->bbio->error, 0);
+        atomic_set(&rbio->error, 0);
        /*
         * build a list of bios to read all the missing parts of this
         * stripe
@@ -1503,7 +1599,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
         * the bbio may be freed once we submit the last bio.  Make sure
         * not to touch it after that
         */
-        atomic_set(&bbio->stripes_pending, bios_to_read);
+        atomic_set(&rbio->stripes_pending, bios_to_read);
        while (1) {
                bio = bio_list_pop(&bio_list);
                if (!bio)
@@ -1686,19 +1782,30 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
        struct btrfs_raid_bio *rbio;
        struct btrfs_plug_cb *plug = NULL;
        struct blk_plug_cb *cb;
+        int ret;
        rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
-        if (IS_ERR(rbio))
+        if (IS_ERR(rbio)) {
+                __free_bbio_and_raid_map(bbio, raid_map, 1);
                return PTR_ERR(rbio);
+        }
        bio_list_add(&rbio->bio_list, bio);
        rbio->bio_list_bytes = bio->bi_iter.bi_size;
+        rbio->operation = BTRFS_RBIO_WRITE;
+        btrfs_bio_counter_inc_noblocked(root->fs_info);
+        rbio->generic_bio_cnt = 1;
        /*
         * don't plug on full rbios, just get them out the door
         * as quickly as we can
         */
-        if (rbio_is_full(rbio))
+        if (rbio_is_full(rbio)) {
-                return full_stripe_write(rbio);
+                ret = full_stripe_write(rbio);
+                if (ret)
+                        btrfs_bio_counter_dec(root->fs_info);
+                return ret;
+        }
        cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
                               sizeof(*plug));
@@ -1709,10 +1816,13 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
                        INIT_LIST_HEAD(&plug->rbio_list);
                }
                list_add_tail(&rbio->plug_list, &plug->rbio_list);
+                ret = 0;
        } else {
-                return __raid56_parity_write(rbio);
+                ret = __raid56_parity_write(rbio);
+                if (ret)
+                        btrfs_bio_counter_dec(root->fs_info);
        }
-        return 0;
+        return ret;
 }
 /*
@@ -1730,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
        int err;
        int i;
-        pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *),
+        pointers = kzalloc(rbio->real_stripes * sizeof(void *),
                           GFP_NOFS);
        if (!pointers) {
                err = -ENOMEM;
@@ -1740,7 +1850,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
        faila = rbio->faila;
        failb = rbio->failb;
-        if (rbio->read_rebuild) {
+        if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
                spin_lock_irq(&rbio->bio_list_lock);
                set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
                spin_unlock_irq(&rbio->bio_list_lock);
@@ -1749,15 +1859,23 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
        index_rbio_pages(rbio);
        for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+                /*
+                 * Now we just use bitmap to mark the horizontal stripes in
+                 * which we have data when doing parity scrub.
+                 */
+                if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
+                    !test_bit(pagenr, rbio->dbitmap))
+                        continue;
                /* setup our array of pointers with pages
                 * from each stripe
                 */
-                for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
+                for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
                        /*
                         * if we're rebuilding a read, we have to use
                         * pages from the bio list
                         */
-                        if (rbio->read_rebuild &&
+                        if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
                            (stripe == faila || stripe == failb)) {
                                page = page_in_rbio(rbio, stripe, pagenr, 0);
                        } else {
@@ -1767,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
                }
                /* all raid6 handling here */
-                if (rbio->raid_map[rbio->bbio->num_stripes - 1] ==
+                if (rbio->raid_map[rbio->real_stripes - 1] ==
                    RAID6_Q_STRIPE) {
                        /*
@@ -1817,10 +1935,10 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
                        }
                        if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
-                                raid6_datap_recov(rbio->bbio->num_stripes,
+                                raid6_datap_recov(rbio->real_stripes,
                                                  PAGE_SIZE, faila, pointers);
                        } else {
-                                raid6_2data_recov(rbio->bbio->num_stripes,
+                                raid6_2data_recov(rbio->real_stripes,
                                                  PAGE_SIZE, faila, failb,
                                                  pointers);
                        }
@@ -1850,7 +1968,7 @@ pstripe:
                 * know they can be trusted.  If this was a read reconstruction,
                 * other endio functions will fiddle the uptodate bits
                 */
-                if (!rbio->read_rebuild) {
+                if (rbio->operation == BTRFS_RBIO_WRITE) {
                        for (i = 0;  i < nr_pages; i++) {
                                if (faila != -1) {
                                        page = rbio_stripe_page(rbio, faila, i);
@@ -1862,12 +1980,12 @@ pstripe:
                                }
                        }
                }
-                for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
+                for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
                        /*
                         * if we're rebuilding a read, we have to use
                         * pages from the bio list
                         */
-                        if (rbio->read_rebuild &&
+                        if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
                            (stripe == faila || stripe == failb)) {
                                page = page_in_rbio(rbio, stripe, pagenr, 0);
                        } else {
@@ -1882,9 +2000,9 @@ cleanup:
        kfree(pointers);
 cleanup_io:
+        if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
-        if (rbio->read_rebuild) {
+                if (err == 0 &&
-                if (err == 0)
+                    !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
                        cache_rbio_pages(rbio);
                else
                        clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -1893,7 +2011,13 @@ cleanup_io:
        } else if (err == 0) {
                rbio->faila = -1;
                rbio->failb = -1;
-                finish_rmw(rbio);
+                if (rbio->operation == BTRFS_RBIO_WRITE)
+                        finish_rmw(rbio);
+                else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
+                        finish_parity_scrub(rbio, 0);
+                else
+                        BUG();
        } else {
                rbio_orig_end_io(rbio, err, 0);
        }
@@ -1917,10 +2041,10 @@ static void raid_recover_end_io(struct bio *bio, int err)
                set_bio_pages_uptodate(bio);
        bio_put(bio);
-        if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
+        if (!atomic_dec_and_test(&rbio->stripes_pending))
                return;
-        if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
+        if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
                rbio_orig_end_io(rbio, -EIO, 0);
        else
                __raid_recover_end_io(rbio);
@@ -1937,7 +2061,6 @@ static void raid_recover_end_io(struct bio *bio, int err)
 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 {
        int bios_to_read = 0;
-        struct btrfs_bio *bbio = rbio->bbio;
        struct bio_list bio_list;
        int ret;
        int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
@@ -1951,16 +2074,16 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
        if (ret)
                goto cleanup;
-        atomic_set(&rbio->bbio->error, 0);
+        atomic_set(&rbio->error, 0);
        /*
         * read everything that hasn't failed.  Thanks to the
         * stripe cache, it is possible that some or all of these
         * pages are going to be uptodate.
         */
-        for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
+        for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
                if (rbio->faila == stripe || rbio->failb == stripe) {
-                        atomic_inc(&rbio->bbio->error);
+                        atomic_inc(&rbio->error);
                        continue;
                }
@@ -1990,7 +2113,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
                 * were up to date, or we might have no bios to read because
                 * the devices were gone.
                 */
-                if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) {
+                if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
                        __raid_recover_end_io(rbio);
                        goto out;
                } else {
@@ -2002,7 +2125,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
         * the bbio may be freed once we submit the last bio.  Make sure
         * not to touch it after that
         */
-        atomic_set(&bbio->stripes_pending, bios_to_read);
+        atomic_set(&rbio->stripes_pending, bios_to_read);
        while (1) {
                bio = bio_list_pop(&bio_list);
                if (!bio)
@@ -2021,7 +2144,7 @@ out:
        return 0;
 cleanup:
-        if (rbio->read_rebuild)
+        if (rbio->operation == BTRFS_RBIO_READ_REBUILD)
                rbio_orig_end_io(rbio, -EIO, 0);
        return -EIO;
 }
@@ -2034,34 +2157,42 @@ cleanup:
 */
 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
                          struct btrfs_bio *bbio, u64 *raid_map,
-                          u64 stripe_len, int mirror_num)
+                          u64 stripe_len, int mirror_num, int generic_io)
 {
        struct btrfs_raid_bio *rbio;
        int ret;
        rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
-        if (IS_ERR(rbio))
+        if (IS_ERR(rbio)) {
+                __free_bbio_and_raid_map(bbio, raid_map, generic_io);
                return PTR_ERR(rbio);
+        }
-        rbio->read_rebuild = 1;
+        rbio->operation = BTRFS_RBIO_READ_REBUILD;
        bio_list_add(&rbio->bio_list, bio);
        rbio->bio_list_bytes = bio->bi_iter.bi_size;
        rbio->faila = find_logical_bio_stripe(rbio, bio);
        if (rbio->faila == -1) {
                BUG();
-                kfree(raid_map);
+                __free_bbio_and_raid_map(bbio, raid_map, generic_io);
-                kfree(bbio);
                kfree(rbio);
                return -EIO;
        }
+        if (generic_io) {
+                btrfs_bio_counter_inc_noblocked(root->fs_info);
+                rbio->generic_bio_cnt = 1;
+        } else {
+                set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags);
+        }
        /*
         * reconstruct from the q stripe if they are
         * asking for mirror 3
         */
        if (mirror_num == 3)
-                rbio->failb = bbio->num_stripes - 2;
+                rbio->failb = rbio->real_stripes - 2;
        ret = lock_stripe_add(rbio);
@@ -2098,3 +2229,483 @@ static void read_rebuild_work(struct btrfs_work *work)
        rbio = container_of(work, struct btrfs_raid_bio, work);
        __raid56_parity_recover(rbio);
 }
+/*
+ * The following code is used to scrub/replace the parity stripe
+ *
+ * Note: We need make sure all the pages that add into the scrub/replace
+ * raid bio are correct and not be changed during the scrub/replace. That
+ * is those pages just hold metadata or file data with checksum.
+ */
+struct btrfs_raid_bio *
+raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
+                               struct btrfs_bio *bbio, u64 *raid_map,
+                               u64 stripe_len, struct btrfs_device *scrub_dev,
+                               unsigned long *dbitmap, int stripe_nsectors)
+{
+        struct btrfs_raid_bio *rbio;
+        int i;
+        rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
+        if (IS_ERR(rbio))
+                return NULL;
+        bio_list_add(&rbio->bio_list, bio);
+        /*
+         * This is a special bio which is used to hold the completion handler
+         * and make the scrub rbio is similar to the other types
+         */
+        ASSERT(!bio->bi_iter.bi_size);
+        rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
+        for (i = 0; i < rbio->real_stripes; i++) {
+                if (bbio->stripes[i].dev == scrub_dev) {
+                        rbio->scrubp = i;
+                        break;
+                }
+        }
+        /* Now we just support the sectorsize equals to page size */
+        ASSERT(root->sectorsize == PAGE_SIZE);
+        ASSERT(rbio->stripe_npages == stripe_nsectors);
+        bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
+        return rbio;
+}
+void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
+                                   struct page *page, u64 logical)
+{
+        int stripe_offset;
+        int index;
+        ASSERT(logical >= rbio->raid_map[0]);
+        ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] +
+                                rbio->stripe_len * rbio->nr_data);
+        stripe_offset = (int)(logical - rbio->raid_map[0]);
+        index = stripe_offset >> PAGE_CACHE_SHIFT;
+        rbio->bio_pages[index] = page;
+}
+/*
+ * We just scrub the parity that we have correct data on the same horizontal,
+ * so we needn't allocate all pages for all the stripes.
+ */
+static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
+{
+        int i;
+        int bit;
+        int index;
+        struct page *page;
+        for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
+                for (i = 0; i < rbio->real_stripes; i++) {
+                        index = i * rbio->stripe_npages + bit;
+                        if (rbio->stripe_pages[index])
+                                continue;
+                        page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+                        if (!page)
+                                return -ENOMEM;
+                        rbio->stripe_pages[index] = page;
+                        ClearPageUptodate(page);
+                }
+        }
+        return 0;
+}
+/*
+ * end io function used by finish_rmw.  When we finally
+ * get here, we've written a full stripe
+ */
+static void raid_write_parity_end_io(struct bio *bio, int err)
+{
+        struct btrfs_raid_bio *rbio = bio->bi_private;
+        if (err)
+                fail_bio_stripe(rbio, bio);
+        bio_put(bio);
+        if (!atomic_dec_and_test(&rbio->stripes_pending))
+                return;
+        err = 0;
+        if (atomic_read(&rbio->error))
+                err = -EIO;
+        rbio_orig_end_io(rbio, err, 0);
+}
+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
+                                         int need_check)
+{
+        struct btrfs_bio *bbio = rbio->bbio;
+        void *pointers[rbio->real_stripes];
+        DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
+        int nr_data = rbio->nr_data;
+        int stripe;
+        int pagenr;
+        int p_stripe = -1;
+        int q_stripe = -1;
+        struct page *p_page = NULL;
+        struct page *q_page = NULL;
+        struct bio_list bio_list;
+        struct bio *bio;
+        int is_replace = 0;
+        int ret;
+        bio_list_init(&bio_list);
+        if (rbio->real_stripes - rbio->nr_data == 1) {
+                p_stripe = rbio->real_stripes - 1;
+        } else if (rbio->real_stripes - rbio->nr_data == 2) {
+                p_stripe = rbio->real_stripes - 2;
+                q_stripe = rbio->real_stripes - 1;
+        } else {
+                BUG();
+        }
+        if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
+                is_replace = 1;
+                bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
+        }
+        /*
+         * Because the higher layers(scrubber) are unlikely to
+         * use this area of the disk again soon, so don't cache
+         * it.
+         */
+        clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
+        if (!need_check)
+                goto writeback;
+        p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+        if (!p_page)
+                goto cleanup;
+        SetPageUptodate(p_page);
+        if (q_stripe != -1) {
+                q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+                if (!q_page) {
+                        __free_page(p_page);
+                        goto cleanup;
+                }
+                SetPageUptodate(q_page);
+        }
+        atomic_set(&rbio->error, 0);
+        for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
+                struct page *p;
+                void *parity;
+                /* first collect one page from each data stripe */
+                for (stripe = 0; stripe < nr_data; stripe++) {
+                        p = page_in_rbio(rbio, stripe, pagenr, 0);
+                        pointers[stripe] = kmap(p);
+                }
+                /* then add the parity stripe */
+                pointers[stripe++] = kmap(p_page);
+                if (q_stripe != -1) {
+                        /*
+                         * raid6, add the qstripe and call the
+                         * library function to fill in our p/q
+                         */
+                        pointers[stripe++] = kmap(q_page);
+                        raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
+                                                pointers);
+                } else {
+                        /* raid5 */
+                        memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+                        run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+                }
+                /* Check scrubbing pairty and repair it */
+                p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
+                parity = kmap(p);
+                if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
+                        memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
+                else
+                        /* Parity is right, needn't writeback */
+                        bitmap_clear(rbio->dbitmap, pagenr, 1);
+                kunmap(p);
+                for (stripe = 0; stripe < rbio->real_stripes; stripe++)
+                        kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
+        }
+        __free_page(p_page);
+        if (q_page)
+                __free_page(q_page);
+writeback:
+        /*
+         * time to start writing.  Make bios for everything from the
+         * higher layers (the bio_list in our rbio) and our p/q.  Ignore
+         * everything else.
+         */
+        for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
+                struct page *page;
+                page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
+                ret = rbio_add_io_page(rbio, &bio_list,
+                               page, rbio->scrubp, pagenr, rbio->stripe_len);
+                if (ret)
+                        goto cleanup;
+        }
+        if (!is_replace)
+                goto submit_write;
+        for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
+                struct page *page;
+                page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
+                ret = rbio_add_io_page(rbio, &bio_list, page,
+                                       bbio->tgtdev_map[rbio->scrubp],
+                                       pagenr, rbio->stripe_len);
+                if (ret)
+                        goto cleanup;
+        }
+submit_write:
+        nr_data = bio_list_size(&bio_list);
+        if (!nr_data) {
+                /* Every parity is right */
+                rbio_orig_end_io(rbio, 0, 0);
+                return;
+        }
+        atomic_set(&rbio->stripes_pending, nr_data);
+        while (1) {
+                bio = bio_list_pop(&bio_list);
+                if (!bio)
+                        break;
+                bio->bi_private = rbio;
+                bio->bi_end_io = raid_write_parity_end_io;
+                BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
+                submit_bio(WRITE, bio);
+        }
+        return;
+cleanup:
+        rbio_orig_end_io(rbio, -EIO, 0);
+}
+static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
+{
+        if (stripe >= 0 && stripe < rbio->nr_data)
+                return 1;
+        return 0;
+}
+/*
+ * While we're doing the parity check and repair, we could have errors
+ * in reading pages off the disk.  This checks for errors and if we're
+ * not able to read the page it'll trigger parity reconstruction.  The
+ * parity scrub will be finished after we've reconstructed the failed
+ * stripes
+ */
+static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
+{
+        if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+                goto cleanup;
+        if (rbio->faila >= 0 || rbio->failb >= 0) {
+                int dfail = 0, failp = -1;
+                if (is_data_stripe(rbio, rbio->faila))
+                        dfail++;
+                else if (is_parity_stripe(rbio->faila))
+                        failp = rbio->faila;
+                if (is_data_stripe(rbio, rbio->failb))
+                        dfail++;
+                else if (is_parity_stripe(rbio->failb))
+                        failp = rbio->failb;
+                /*
+                 * Because we can not use a scrubbing parity to repair
+                 * the data, so the capability of the repair is declined.
+                 * (In the case of RAID5, we can not repair anything)
+                 */
+                if (dfail > rbio->bbio->max_errors - 1)
+                        goto cleanup;
+                /*
+                 * If all data is good, only parity is correctly, just
+                 * repair the parity.
+                 */
+                if (dfail == 0) {
+                        finish_parity_scrub(rbio, 0);
+                        return;
+                }
+                /*
+                 * Here means we got one corrupted data stripe and one
+                 * corrupted parity on RAID6, if the corrupted parity
+                 * is scrubbing parity, luckly, use the other one to repair
+                 * the data, or we can not repair the data stripe.
+                 */
+                if (failp != rbio->scrubp)
+                        goto cleanup;
+                __raid_recover_end_io(rbio);
+        } else {
+                finish_parity_scrub(rbio, 1);
+        }
+        return;
+cleanup:
+        rbio_orig_end_io(rbio, -EIO, 0);
+}
+/*
+ * end io for the read phase of the rmw cycle.  All the bios here are physical
+ * stripe bios we've read from the disk so we can recalculate the parity of the
+ * stripe.
+ *
+ * This will usually kick off finish_rmw once all the bios are read in, but it
+ * may trigger parity reconstruction if we had any errors along the way
+ */
+static void raid56_parity_scrub_end_io(struct bio *bio, int err)
+{
+        struct btrfs_raid_bio *rbio = bio->bi_private;
+        if (err)
+                fail_bio_stripe(rbio, bio);
+        else
+                set_bio_pages_uptodate(bio);
+        bio_put(bio);
+        if (!atomic_dec_and_test(&rbio->stripes_pending))
+                return;
+        /*
+         * this will normally call finish_rmw to start our write
+         * but if there are any failed stripes we'll reconstruct
+         * from parity first
+         */
+        validate_rbio_for_parity_scrub(rbio);
+}
+static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
+{
+        int bios_to_read = 0;
+        struct bio_list bio_list;
+        int ret;
+        int pagenr;
+        int stripe;
+        struct bio *bio;
+        ret = alloc_rbio_essential_pages(rbio);
+        if (ret)
+                goto cleanup;
+        bio_list_init(&bio_list);
+        atomic_set(&rbio->error, 0);
+        /*
+         * build a list of bios to read all the missing parts of this
+         * stripe
+         */
+        for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+                for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
+                        struct page *page;
+                        /*
+                         * we want to find all the pages missing from
+                         * the rbio and read them from the disk.  If
+                         * page_in_rbio finds a page in the bio list
+                         * we don't need to read it off the stripe.
+                         */
+                        page = page_in_rbio(rbio, stripe, pagenr, 1);
+                        if (page)
+                                continue;
+                        page = rbio_stripe_page(rbio, stripe, pagenr);
+                        /*
+                         * the bio cache may have handed us an uptodate
+                         * page.  If so, be happy and use it
+                         */
+                        if (PageUptodate(page))
+                                continue;
+                        ret = rbio_add_io_page(rbio, &bio_list, page,
+                                       stripe, pagenr, rbio->stripe_len);
+                        if (ret)
+                                goto cleanup;
+                }
+        }
+        bios_to_read = bio_list_size(&bio_list);
+        if (!bios_to_read) {
+                /*
+                 * this can happen if others have merged with
+                 * us, it means there is nothing left to read.
+                 * But if there are missing devices it may not be
+                 * safe to do the full stripe write yet.
+                 */
+                goto finish;
+        }
+        /*
+         * the bbio may be freed once we submit the last bio.  Make sure
+         * not to touch it after that
+         */
+        atomic_set(&rbio->stripes_pending, bios_to_read);
+        while (1) {
+                bio = bio_list_pop(&bio_list);
+                if (!bio)
+                        break;
+                bio->bi_private = rbio;
+                bio->bi_end_io = raid56_parity_scrub_end_io;
+                btrfs_bio_wq_end_io(rbio->fs_info, bio,
+                                    BTRFS_WQ_ENDIO_RAID56);
+                BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
+                submit_bio(READ, bio);
+        }
+        /* the actual write will happen once the reads are done */
+        return;
+cleanup:
+        rbio_orig_end_io(rbio, -EIO, 0);
+        return;
+finish:
+        validate_rbio_for_parity_scrub(rbio);
+}
+static void scrub_parity_work(struct btrfs_work *work)
+{
+        struct btrfs_raid_bio *rbio;
+        rbio = container_of(work, struct btrfs_raid_bio, work);
+        raid56_parity_scrub_stripe(rbio);
+}
+static void async_scrub_parity(struct btrfs_raid_bio *rbio)
+{
+        btrfs_init_work(&rbio->work, btrfs_rmw_helper,
+                        scrub_parity_work, NULL, NULL);
+        btrfs_queue_work(rbio->fs_info->rmw_workers,
+                         &rbio->work);
+}
+void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
+{
+        if (!lock_stripe_add(rbio))
+                async_scrub_parity(rbio);
+}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index ea5d73bfdfbe..31d4a157b5e3 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -39,13 +39,25 @@ static inline int nr_data_stripes(struct map_lookup *map)
 #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) ||         \
                             ((x) == RAID6_Q_STRIPE))
+struct btrfs_raid_bio;
+struct btrfs_device;
 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
-                                 struct btrfs_bio *bbio, u64 *raid_map,
+                          struct btrfs_bio *bbio, u64 *raid_map,
-                                 u64 stripe_len, int mirror_num);
+                          u64 stripe_len, int mirror_num, int generic_io);
 int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
                               struct btrfs_bio *bbio, u64 *raid_map,
                               u64 stripe_len);
+struct btrfs_raid_bio *
+raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
+                               struct btrfs_bio *bbio, u64 *raid_map,
+                               u64 stripe_len, struct btrfs_device *scrub_dev,
+                               unsigned long *dbitmap, int stripe_nsectors);
+void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
+                                   struct page *page, u64 logical);
+void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
 #endif
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4325bb0111d9..f2bb13a23f86 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -63,10 +63,18 @@ struct scrub_ctx;
 */
 #define SCRUB_MAX_PAGES_PER_BLOCK       16      /* 64k per node/leaf/sector */
+struct scrub_recover {
+        atomic_t                refs;
+        struct btrfs_bio        *bbio;
+        u64                     *raid_map;
+        u64                     map_length;
+};
 struct scrub_page {
        struct scrub_block      *sblock;
        struct page             *page;
        struct btrfs_device     *dev;
+        struct list_head        list;
        u64                     flags;  /* extent flags */
        u64                     generation;
        u64                     logical;
@@ -79,6 +87,8 @@ struct scrub_page {
                unsigned int    io_error:1;
        };
        u8                      csum[BTRFS_CSUM_SIZE];
+        struct scrub_recover    *recover;
 };
 struct scrub_bio {
@@ -105,14 +115,52 @@ struct scrub_block {
        atomic_t                outstanding_pages;
        atomic_t                ref_count; /* free mem on transition to zero */
        struct scrub_ctx        *sctx;
+        struct scrub_parity     *sparity;
        struct {
                unsigned int    header_error:1;
                unsigned int    checksum_error:1;
                unsigned int    no_io_error_seen:1;
                unsigned int    generation_error:1; /* also sets header_error */
+                /* The following is for the data used to check parity */
+                /* It is for the data with checksum */
+                unsigned int    data_corrected:1;
        };
 };
+/* Used for the chunks with parity stripe such RAID5/6 */
+struct scrub_parity {
+        struct scrub_ctx        *sctx;
+        struct btrfs_device     *scrub_dev;
+        u64                     logic_start;
+        u64                     logic_end;
+        int                     nsectors;
+        int                     stripe_len;
+        atomic_t                ref_count;
+        struct list_head        spages;
+        /* Work of parity check and repair */
+        struct btrfs_work       work;
+        /* Mark the parity blocks which have data */
+        unsigned long           *dbitmap;
+        /*
+         * Mark the parity blocks which have data, but errors happen when
+         * read data or check data
+         */
+        unsigned long           *ebitmap;
+        unsigned long           bitmap[0];
+};
 struct scrub_wr_ctx {
        struct scrub_bio *wr_curr_bio;
        struct btrfs_device *tgtdev;
@@ -196,7 +244,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
 static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                                struct scrub_block *sblock, int is_metadata,
                                int have_csum, u8 *csum, u64 generation,
-                                u16 csum_size);
+                                u16 csum_size, int retry_failed_mirror);
 static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
                                         struct scrub_block *sblock,
                                         int is_metadata, int have_csum,
@@ -218,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock);
 static void scrub_block_put(struct scrub_block *sblock);
 static void scrub_page_get(struct scrub_page *spage);
 static void scrub_page_put(struct scrub_page *spage);
+static void scrub_parity_get(struct scrub_parity *sparity);
+static void scrub_parity_put(struct scrub_parity *sparity);
 static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
                                    struct scrub_page *spage);
 static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
@@ -790,6 +840,20 @@ out:
        scrub_pending_trans_workers_dec(sctx);
 }
+static inline void scrub_get_recover(struct scrub_recover *recover)
+{
+        atomic_inc(&recover->refs);
+}
+static inline void scrub_put_recover(struct scrub_recover *recover)
+{
+        if (atomic_dec_and_test(&recover->refs)) {
+                kfree(recover->bbio);
+                kfree(recover->raid_map);
+                kfree(recover);
+        }
+}
 /*
 * scrub_handle_errored_block gets called when either verification of the
 * pages failed or the bio failed to read, e.g. with EIO. In the latter
@@ -906,7 +970,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
        /* build and submit the bios for the failed mirror, check checksums */
        scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
-                            csum, generation, sctx->csum_size);
+                            csum, generation, sctx->csum_size, 1);
        if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
            sblock_bad->no_io_error_seen) {
@@ -920,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
                 */
                spin_lock(&sctx->stat_lock);
                sctx->stat.unverified_errors++;
+                sblock_to_check->data_corrected = 1;
                spin_unlock(&sctx->stat_lock);
                if (sctx->is_dev_replace)
@@ -1019,7 +1084,7 @@ nodatasum_case:
                /* build and submit the bios, check checksums */
                scrub_recheck_block(fs_info, sblock_other, is_metadata,
                                    have_csum, csum, generation,
-                                    sctx->csum_size);
+                                    sctx->csum_size, 0);
                if (!sblock_other->header_error &&
                    !sblock_other->checksum_error &&
@@ -1169,7 +1234,7 @@ nodatasum_case:
                         */
                        scrub_recheck_block(fs_info, sblock_bad,
                                            is_metadata, have_csum, csum,
-                                            generation, sctx->csum_size);
+                                            generation, sctx->csum_size, 1);
                        if (!sblock_bad->header_error &&
                            !sblock_bad->checksum_error &&
                            sblock_bad->no_io_error_seen)
@@ -1180,6 +1245,7 @@ nodatasum_case:
 corrected_error:
                        spin_lock(&sctx->stat_lock);
                        sctx->stat.corrected_errors++;
+                        sblock_to_check->data_corrected = 1;
                        spin_unlock(&sctx->stat_lock);
                        printk_ratelimited_in_rcu(KERN_ERR
                                "BTRFS: fixed up error at logical %llu on dev %s\n",
@@ -1201,11 +1267,18 @@ out:
                     mirror_index++) {
                        struct scrub_block *sblock = sblocks_for_recheck +
                                                     mirror_index;
+                        struct scrub_recover *recover;
                        int page_index;
                        for (page_index = 0; page_index < sblock->page_count;
                             page_index++) {
                                sblock->pagev[page_index]->sblock = NULL;
+                                recover = sblock->pagev[page_index]->recover;
+                                if (recover) {
+                                        scrub_put_recover(recover);
+                                        sblock->pagev[page_index]->recover =
+                                                                        NULL;
+                                }
                                scrub_page_put(sblock->pagev[page_index]);
                        }
                }
@@ -1215,14 +1288,63 @@ out:
        return 0;
 }
+static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map)
+{
+        if (raid_map) {
+                if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
+                        return 3;
+                else
+                        return 2;
+        } else {
+                return (int)bbio->num_stripes;
+        }
+}
+static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
+                                                 u64 mapped_length,
+                                                 int nstripes, int mirror,
+                                                 int *stripe_index,
+                                                 u64 *stripe_offset)
+{
+        int i;
+        if (raid_map) {
+                /* RAID5/6 */
+                for (i = 0; i < nstripes; i++) {
+                        if (raid_map[i] == RAID6_Q_STRIPE ||
+                            raid_map[i] == RAID5_P_STRIPE)
+                                continue;
+                        if (logical >= raid_map[i] &&
+                            logical < raid_map[i] + mapped_length)
+                                break;
+                }
+                *stripe_index = i;
+                *stripe_offset = logical - raid_map[i];
+        } else {
+                /* The other RAID type */
+                *stripe_index = mirror;
+                *stripe_offset = 0;
+        }
+}
 static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
                                     struct btrfs_fs_info *fs_info,
                                     struct scrub_block *original_sblock,
                                     u64 length, u64 logical,
                                     struct scrub_block *sblocks_for_recheck)
 {
+        struct scrub_recover *recover;
+        struct btrfs_bio *bbio;
+        u64 *raid_map;
+        u64 sublen;
+        u64 mapped_length;
+        u64 stripe_offset;
+        int stripe_index;
        int page_index;
        int mirror_index;
+        int nmirrors;
        int ret;
        /*
@@ -1233,23 +1355,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
        page_index = 0;
        while (length > 0) {
-                u64 sublen = min_t(u64, length, PAGE_SIZE);
+                sublen = min_t(u64, length, PAGE_SIZE);
-                u64 mapped_length = sublen;
+                mapped_length = sublen;
-                struct btrfs_bio *bbio = NULL;
+                bbio = NULL;
+                raid_map = NULL;
                /*
                 * with a length of PAGE_SIZE, each returned stripe
                 * represents one mirror
                 */
-                ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
+                ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
-                                      &mapped_length, &bbio, 0);
+                                       &mapped_length, &bbio, 0, &raid_map);
                if (ret || !bbio || mapped_length < sublen) {
                        kfree(bbio);
+                        kfree(raid_map);
                        return -EIO;
                }
+                recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
+                if (!recover) {
+                        kfree(bbio);
+                        kfree(raid_map);
+                        return -ENOMEM;
+                }
+                atomic_set(&recover->refs, 1);
+                recover->bbio = bbio;
+                recover->raid_map = raid_map;
+                recover->map_length = mapped_length;
                BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
-                for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
+                nmirrors = scrub_nr_raid_mirrors(bbio, raid_map);
+                for (mirror_index = 0; mirror_index < nmirrors;
                     mirror_index++) {
                        struct scrub_block *sblock;
                        struct scrub_page *page;
@@ -1265,26 +1403,38 @@ leave_nomem:
                                spin_lock(&sctx->stat_lock);
                                sctx->stat.malloc_errors++;
                                spin_unlock(&sctx->stat_lock);
-                                kfree(bbio);
+                                scrub_put_recover(recover);
                                return -ENOMEM;
                        }
                        scrub_page_get(page);
                        sblock->pagev[page_index] = page;
                        page->logical = logical;
-                        page->physical = bbio->stripes[mirror_index].physical;
+                        scrub_stripe_index_and_offset(logical, raid_map,
+                                                      mapped_length,
+                                                      bbio->num_stripes,
+                                                      mirror_index,
+                                                      &stripe_index,
+                                                      &stripe_offset);
+                        page->physical = bbio->stripes[stripe_index].physical +
+                                         stripe_offset;
+                        page->dev = bbio->stripes[stripe_index].dev;
                        BUG_ON(page_index >= original_sblock->page_count);
                        page->physical_for_dev_replace =
                                original_sblock->pagev[page_index]->
                                physical_for_dev_replace;
                        /* for missing devices, dev->bdev is NULL */
-                        page->dev = bbio->stripes[mirror_index].dev;
                        page->mirror_num = mirror_index + 1;
                        sblock->page_count++;
                        page->page = alloc_page(GFP_NOFS);
                        if (!page->page)
                                goto leave_nomem;
+                        scrub_get_recover(recover);
+                        page->recover = recover;
                }
-                kfree(bbio);
+                scrub_put_recover(recover);
                length -= sublen;
                logical += sublen;
                page_index++;
@@ -1293,6 +1443,51 @@ leave_nomem:
        return 0;
 }
+struct scrub_bio_ret {
+        struct completion event;
+        int error;
+};
+static void scrub_bio_wait_endio(struct bio *bio, int error)
+{
+        struct scrub_bio_ret *ret = bio->bi_private;
+        ret->error = error;
+        complete(&ret->event);
+}
+static inline int scrub_is_page_on_raid56(struct scrub_page *page)
+{
+        return page->recover && page->recover->raid_map;
+}
+static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
+                                        struct bio *bio,
+                                        struct scrub_page *page)
+{
+        struct scrub_bio_ret done;
+        int ret;
+        init_completion(&done.event);
+        done.error = 0;
+        bio->bi_iter.bi_sector = page->logical >> 9;
+        bio->bi_private = &done;
+        bio->bi_end_io = scrub_bio_wait_endio;
+        ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
+                                    page->recover->raid_map,
+                                    page->recover->map_length,
+                                    page->mirror_num, 0);
+        if (ret)
+                return ret;
+        wait_for_completion(&done.event);
+        if (done.error)
+                return -EIO;
+        return 0;
+}
 /*
 * this function will check the on disk data for checksum errors, header
 * errors and read I/O errors. If any I/O errors happen, the exact pages
@@ -1303,7 +1498,7 @@ leave_nomem:
 static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                                struct scrub_block *sblock, int is_metadata,
                                int have_csum, u8 *csum, u64 generation,
-                                u16 csum_size)
+                                u16 csum_size, int retry_failed_mirror)
 {
        int page_num;
@@ -1329,11 +1524,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                        continue;
                }
                bio->bi_bdev = page->dev->bdev;
-                bio->bi_iter.bi_sector = page->physical >> 9;
                bio_add_page(bio, page->page, PAGE_SIZE, 0);
-                if (btrfsic_submit_bio_wait(READ, bio))
+                if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
-                        sblock->no_io_error_seen = 0;
+                        if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
+                                sblock->no_io_error_seen = 0;
+                } else {
+                        bio->bi_iter.bi_sector = page->physical >> 9;
+                        if (btrfsic_submit_bio_wait(READ, bio))
+                                sblock->no_io_error_seen = 0;
+                }
                bio_put(bio);
        }
@@ -1486,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
 {
        int page_num;
+        /*
+         * This block is used for the check of the parity on the source device,
+         * so the data needn't be written into the destination device.
+         */
+        if (sblock->sparity)
+                return;
        for (page_num = 0; page_num < sblock->page_count; page_num++) {
                int ret;
@@ -1867,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock)
        if (atomic_dec_and_test(&sblock->ref_count)) {
                int i;
+                if (sblock->sparity)
+                        scrub_parity_put(sblock->sparity);
                for (i = 0; i < sblock->page_count; i++)
                        scrub_page_put(sblock->pagev[i]);
                kfree(sblock);
@@ -2124,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
        scrub_pending_bio_dec(sctx);
 }
+static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
+                                       unsigned long *bitmap,
+                                       u64 start, u64 len)
+{
+        int offset;
+        int nsectors;
+        int sectorsize = sparity->sctx->dev_root->sectorsize;
+        if (len >= sparity->stripe_len) {
+                bitmap_set(bitmap, 0, sparity->nsectors);
+                return;
+        }
+        start -= sparity->logic_start;
+        offset = (int)do_div(start, sparity->stripe_len);
+        offset /= sectorsize;
+        nsectors = (int)len / sectorsize;
+        if (offset + nsectors <= sparity->nsectors) {
+                bitmap_set(bitmap, offset, nsectors);
+                return;
+        }
+        bitmap_set(bitmap, offset, sparity->nsectors - offset);
+        bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
+}
+static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
+                                                   u64 start, u64 len)
+{
+        __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
+}
+static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
+                                                  u64 start, u64 len)
+{
+        __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
+}
 static void scrub_block_complete(struct scrub_block *sblock)
 {
+        int corrupted = 0;
        if (!sblock->no_io_error_seen) {
+                corrupted = 1;
                scrub_handle_errored_block(sblock);
        } else {
                /*
@@ -2134,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock)
                 * dev replace case, otherwise write here in dev replace
                 * case.
                 */
-                if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
+                corrupted = scrub_checksum(sblock);
+                if (!corrupted && sblock->sctx->is_dev_replace)
                        scrub_write_block_to_dev_replace(sblock);
        }
+        if (sblock->sparity && corrupted && !sblock->data_corrected) {
+                u64 start = sblock->pagev[0]->logical;
+                u64 end = sblock->pagev[sblock->page_count - 1]->logical +
+                          PAGE_SIZE;
+                scrub_parity_mark_sectors_error(sblock->sparity,
+                                                start, end - start);
+        }
 }
 static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
@@ -2228,6 +2491,132 @@ behind_scrub_pages:
        return 0;
 }
+static int scrub_pages_for_parity(struct scrub_parity *sparity,
+                                  u64 logical, u64 len,
+                                  u64 physical, struct btrfs_device *dev,
+                                  u64 flags, u64 gen, int mirror_num, u8 *csum)
+{
+        struct scrub_ctx *sctx = sparity->sctx;
+        struct scrub_block *sblock;
+        int index;
+        sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
+        if (!sblock) {
+                spin_lock(&sctx->stat_lock);
+                sctx->stat.malloc_errors++;
+                spin_unlock(&sctx->stat_lock);
+                return -ENOMEM;
+        }
+        /* one ref inside this function, plus one for each page added to
+         * a bio later on */
+        atomic_set(&sblock->ref_count, 1);
+        sblock->sctx = sctx;
+        sblock->no_io_error_seen = 1;
+        sblock->sparity = sparity;
+        scrub_parity_get(sparity);
+        for (index = 0; len > 0; index++) {
+                struct scrub_page *spage;
+                u64 l = min_t(u64, len, PAGE_SIZE);
+                spage = kzalloc(sizeof(*spage), GFP_NOFS);
+                if (!spage) {
+leave_nomem:
+                        spin_lock(&sctx->stat_lock);
+                        sctx->stat.malloc_errors++;
+                        spin_unlock(&sctx->stat_lock);
+                        scrub_block_put(sblock);
+                        return -ENOMEM;
+                }
+                BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
+                /* For scrub block */
+                scrub_page_get(spage);
+                sblock->pagev[index] = spage;
+                /* For scrub parity */
+                scrub_page_get(spage);
+                list_add_tail(&spage->list, &sparity->spages);
+                spage->sblock = sblock;
+                spage->dev = dev;
+                spage->flags = flags;
+                spage->generation = gen;
+                spage->logical = logical;
+                spage->physical = physical;
+                spage->mirror_num = mirror_num;
+                if (csum) {
+                        spage->have_csum = 1;
+                        memcpy(spage->csum, csum, sctx->csum_size);
+                } else {
+                        spage->have_csum = 0;
+                }
+                sblock->page_count++;
+                spage->page = alloc_page(GFP_NOFS);
+                if (!spage->page)
+                        goto leave_nomem;
+                len -= l;
+                logical += l;
+                physical += l;
+        }
+        WARN_ON(sblock->page_count == 0);
+        for (index = 0; index < sblock->page_count; index++) {
+                struct scrub_page *spage = sblock->pagev[index];
+                int ret;
+                ret = scrub_add_page_to_rd_bio(sctx, spage);
+                if (ret) {
+                        scrub_block_put(sblock);
+                        return ret;
+                }
+        }
+        /* last one frees, either here or in bio completion for last page */
+        scrub_block_put(sblock);
+        return 0;
+}
+static int scrub_extent_for_parity(struct scrub_parity *sparity,
+                                   u64 logical, u64 len,
+                                   u64 physical, struct btrfs_device *dev,
+                                   u64 flags, u64 gen, int mirror_num)
+{
+        struct scrub_ctx *sctx = sparity->sctx;
+        int ret;
+        u8 csum[BTRFS_CSUM_SIZE];
+        u32 blocksize;
+        if (flags & BTRFS_EXTENT_FLAG_DATA) {
+                blocksize = sctx->sectorsize;
+        } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+                blocksize = sctx->nodesize;
+        } else {
+                blocksize = sctx->sectorsize;
+                WARN_ON(1);
+        }
+        while (len) {
+                u64 l = min_t(u64, len, blocksize);
+                int have_csum = 0;
+                if (flags & BTRFS_EXTENT_FLAG_DATA) {
+                        /* push csums to sbio */
+                        have_csum = scrub_find_csum(sctx, logical, l, csum);
+                        if (have_csum == 0)
+                                goto skip;
+                }
+                ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
+                                             flags, gen, mirror_num,
+                                             have_csum ? csum : NULL);
+skip:
+                if (ret)
+                        return ret;
+                len -= l;
+                logical += l;
+                physical += l;
+        }
+        return 0;
+}
 /*
 * Given a physical address, this will calculate it's
 * logical offset. if this is a parity stripe, it will return
@@ -2236,7 +2625,8 @@ behind_scrub_pages:
 * return 0 if it is a data stripe, 1 means parity stripe.
 */
 static int get_raid56_logic_offset(u64 physical, int num,
-                                   struct map_lookup *map, u64 *offset)
+                                   struct map_lookup *map, u64 *offset,
+                                   u64 *stripe_start)
 {
        int i;
        int j = 0;
@@ -2247,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num,
        last_offset = (physical - map->stripes[num].physical) *
                      nr_data_stripes(map);
+        if (stripe_start)
+                *stripe_start = last_offset;
        *offset = last_offset;
        for (i = 0; i < nr_data_stripes(map); i++) {
                *offset = last_offset + i * map->stripe_len;
@@ -2269,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num,
        return 1;
 }
+static void scrub_free_parity(struct scrub_parity *sparity)
+{
+        struct scrub_ctx *sctx = sparity->sctx;
+        struct scrub_page *curr, *next;
+        int nbits;
+        nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
+        if (nbits) {
+                spin_lock(&sctx->stat_lock);
+                sctx->stat.read_errors += nbits;
+                sctx->stat.uncorrectable_errors += nbits;
+                spin_unlock(&sctx->stat_lock);
+        }
+        list_for_each_entry_safe(curr, next, &sparity->spages, list) {
+                list_del_init(&curr->list);
+                scrub_page_put(curr);
+        }
+        kfree(sparity);
+}
+static void scrub_parity_bio_endio(struct bio *bio, int error)
+{
+        struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
+        struct scrub_ctx *sctx = sparity->sctx;
+        if (error)
+                bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
+                          sparity->nsectors);
+        scrub_free_parity(sparity);
+        scrub_pending_bio_dec(sctx);
+        bio_put(bio);
+}
+static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
+{
+        struct scrub_ctx *sctx = sparity->sctx;
+        struct bio *bio;
+        struct btrfs_raid_bio *rbio;
+        struct scrub_page *spage;
+        struct btrfs_bio *bbio = NULL;
+        u64 *raid_map = NULL;
+        u64 length;
+        int ret;
+        if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
+                           sparity->nsectors))
+                goto out;
+        length = sparity->logic_end - sparity->logic_start + 1;
+        ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
+                               sparity->logic_start,
+                               &length, &bbio, 0, &raid_map);
+        if (ret || !bbio || !raid_map)
+                goto bbio_out;
+        bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
+        if (!bio)
+                goto bbio_out;
+        bio->bi_iter.bi_sector = sparity->logic_start >> 9;
+        bio->bi_private = sparity;
+        bio->bi_end_io = scrub_parity_bio_endio;
+        rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
+                                              raid_map, length,
+                                              sparity->scrub_dev,
+                                              sparity->dbitmap,
+                                              sparity->nsectors);
+        if (!rbio)
+                goto rbio_out;
+        list_for_each_entry(spage, &sparity->spages, list)
+                raid56_parity_add_scrub_pages(rbio, spage->page,
+                                              spage->logical);
+        scrub_pending_bio_inc(sctx);
+        raid56_parity_submit_scrub_rbio(rbio);
+        return;
+rbio_out:
+        bio_put(bio);
+bbio_out:
+        kfree(bbio);
+        kfree(raid_map);
+        bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
+                  sparity->nsectors);
+        spin_lock(&sctx->stat_lock);
+        sctx->stat.malloc_errors++;
+        spin_unlock(&sctx->stat_lock);
+out:
+        scrub_free_parity(sparity);
+}
+static inline int scrub_calc_parity_bitmap_len(int nsectors)
+{
+        return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+}
+static void scrub_parity_get(struct scrub_parity *sparity)
+{
+        atomic_inc(&sparity->ref_count);
+}
+static void scrub_parity_put(struct scrub_parity *sparity)
+{
+        if (!atomic_dec_and_test(&sparity->ref_count))
+                return;
+        scrub_parity_check_and_repair(sparity);
+}
+static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
+                                                  struct map_lookup *map,
+                                                  struct btrfs_device *sdev,
+                                                  struct btrfs_path *path,
+                                                  u64 logic_start,
+                                                  u64 logic_end)
+{
+        struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
+        struct btrfs_root *root = fs_info->extent_root;
+        struct btrfs_root *csum_root = fs_info->csum_root;
+        struct btrfs_extent_item *extent;
+        u64 flags;
+        int ret;
+        int slot;
+        struct extent_buffer *l;
+        struct btrfs_key key;
+        u64 generation;
+        u64 extent_logical;
+        u64 extent_physical;
+        u64 extent_len;
+        struct btrfs_device *extent_dev;
+        struct scrub_parity *sparity;
+        int nsectors;
+        int bitmap_len;
+        int extent_mirror_num;
+        int stop_loop = 0;
+        nsectors = map->stripe_len / root->sectorsize;
+        bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
+        sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
+                          GFP_NOFS);
+        if (!sparity) {
+                spin_lock(&sctx->stat_lock);
+                sctx->stat.malloc_errors++;
+                spin_unlock(&sctx->stat_lock);
+                return -ENOMEM;
+        }
+        sparity->stripe_len = map->stripe_len;
+        sparity->nsectors = nsectors;
+        sparity->sctx = sctx;
+        sparity->scrub_dev = sdev;
+        sparity->logic_start = logic_start;
+        sparity->logic_end = logic_end;
+        atomic_set(&sparity->ref_count, 1);
+        INIT_LIST_HEAD(&sparity->spages);
+        sparity->dbitmap = sparity->bitmap;
+        sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
+        ret = 0;
+        while (logic_start < logic_end) {
+                if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
+                        key.type = BTRFS_METADATA_ITEM_KEY;
+                else
+                        key.type = BTRFS_EXTENT_ITEM_KEY;
+                key.objectid = logic_start;
+                key.offset = (u64)-1;
+                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+                if (ret < 0)
+                        goto out;
+                if (ret > 0) {
+                        ret = btrfs_previous_extent_item(root, path, 0);
+                        if (ret < 0)
+                                goto out;
+                        if (ret > 0) {
+                                btrfs_release_path(path);
+                                ret = btrfs_search_slot(NULL, root, &key,
+                                                        path, 0, 0);
+                                if (ret < 0)
+                                        goto out;
+                        }
+                }
+                stop_loop = 0;
+                while (1) {
+                        u64 bytes;
+                        l = path->nodes[0];
+                        slot = path->slots[0];
+                        if (slot >= btrfs_header_nritems(l)) {
+                                ret = btrfs_next_leaf(root, path);
+                                if (ret == 0)
+                                        continue;
+                                if (ret < 0)
+                                        goto out;
+                                stop_loop = 1;
+                                break;
+                        }
+                        btrfs_item_key_to_cpu(l, &key, slot);
+                        if (key.type == BTRFS_METADATA_ITEM_KEY)
+                                bytes = root->nodesize;
+                        else
+                                bytes = key.offset;
+                        if (key.objectid + bytes <= logic_start)
+                                goto next;
+                        if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+                            key.type != BTRFS_METADATA_ITEM_KEY)
+                                goto next;
+                        if (key.objectid > logic_end) {
+                                stop_loop = 1;
+                                break;
+                        }
+                        while (key.objectid >= logic_start + map->stripe_len)
+                                logic_start += map->stripe_len;
+                        extent = btrfs_item_ptr(l, slot,
+                                                struct btrfs_extent_item);
+                        flags = btrfs_extent_flags(l, extent);
+                        generation = btrfs_extent_generation(l, extent);
+                        if (key.objectid < logic_start &&
+                            (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
+                                btrfs_err(fs_info,
+                                          "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
+                                           key.objectid, logic_start);
+                                goto next;
+                        }
+again:
+                        extent_logical = key.objectid;
+                        extent_len = bytes;
+                        if (extent_logical < logic_start) {
+                                extent_len -= logic_start - extent_logical;
+                                extent_logical = logic_start;
+                        }
+                        if (extent_logical + extent_len >
+                            logic_start + map->stripe_len)
+                                extent_len = logic_start + map->stripe_len -
+                                             extent_logical;
+                        scrub_parity_mark_sectors_data(sparity, extent_logical,
+                                                       extent_len);
+                        scrub_remap_extent(fs_info, extent_logical,
+                                           extent_len, &extent_physical,
+                                           &extent_dev,
+                                           &extent_mirror_num);
+                        ret = btrfs_lookup_csums_range(csum_root,
+                                                extent_logical,
+                                                extent_logical + extent_len - 1,
+                                                &sctx->csum_list, 1);
+                        if (ret)
+                                goto out;
+                        ret = scrub_extent_for_parity(sparity, extent_logical,
+                                                      extent_len,
+                                                      extent_physical,
+                                                      extent_dev, flags,
+                                                      generation,
+                                                      extent_mirror_num);
+                        if (ret)
+                                goto out;
+                        scrub_free_csums(sctx);
+                        if (extent_logical + extent_len <
+                            key.objectid + bytes) {
+                                logic_start += map->stripe_len;
+                                if (logic_start >= logic_end) {
+                                        stop_loop = 1;
+                                        break;
+                                }
+                                if (logic_start < key.objectid + bytes) {
+                                        cond_resched();
+                                        goto again;
+                                }
+                        }
+next:
+                        path->slots[0]++;
+                }
+                btrfs_release_path(path);
+                if (stop_loop)
+                        break;
+                logic_start += map->stripe_len;
+        }
+out:
+        if (ret < 0)
+                scrub_parity_mark_sectors_error(sparity, logic_start,
+                                                logic_end - logic_start + 1);
+        scrub_parity_put(sparity);
+        scrub_submit(sctx);
+        mutex_lock(&sctx->wr_ctx.wr_lock);
+        scrub_wr_submit(sctx);
+        mutex_unlock(&sctx->wr_ctx.wr_lock);
+        btrfs_release_path(path);
+        return ret < 0 ? ret : 0;
+}
 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                                           struct map_lookup *map,
                                           struct btrfs_device *scrub_dev,
                                           int num, u64 base, u64 length,
                                           int is_dev_replace)
 {
-        struct btrfs_path *path;
+        struct btrfs_path *path, *ppath;
        struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
        struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_root *csum_root = fs_info->csum_root;
@@ -2302,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        u64 extent_logical;
        u64 extent_physical;
        u64 extent_len;
+        u64 stripe_logical;
+        u64 stripe_end;
        struct btrfs_device *extent_dev;
        int extent_mirror_num;
        int stop_loop = 0;
@@ -2327,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                mirror_num = num % map->num_stripes + 1;
        } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
                                BTRFS_BLOCK_GROUP_RAID6)) {
-                get_raid56_logic_offset(physical, num, map, &offset);
+                get_raid56_logic_offset(physical, num, map, &offset, NULL);
                increment = map->stripe_len * nr_data_stripes(map);
                mirror_num = 1;
        } else {
@@ -2339,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        if (!path)
                return -ENOMEM;
+        ppath = btrfs_alloc_path();
+        if (!ppath) {
+                btrfs_free_path(ppath);
+                return -ENOMEM;
+        }
        /*
         * work on commit root. The related disk blocks are static as
         * long as COW is applied. This means, it is save to rewrite
@@ -2357,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
                         BTRFS_BLOCK_GROUP_RAID6)) {
                get_raid56_logic_offset(physical_end, num,
-                                        map, &logic_end);
+                                        map, &logic_end, NULL);
                logic_end += base;
        } else {
                logic_end = logical + increment * nstripes;
@@ -2404,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
                                BTRFS_BLOCK_GROUP_RAID6)) {
                        ret = get_raid56_logic_offset(physical, num,
-                                        map, &logical);
+                                        map, &logical, &stripe_logical);
                        logical += base;
-                        if (ret)
+                        if (ret) {
+                                stripe_logical += base;
+                                stripe_end = stripe_logical + increment - 1;
+                                ret = scrub_raid56_parity(sctx, map, scrub_dev,
+                                                ppath, stripe_logical,
+                                                stripe_end);
+                                if (ret)
+                                        goto out;
                                goto skip;
+                        }
                }
                /*
                 * canceled?
@@ -2558,13 +3284,25 @@ again:
                                         * loop until we find next data stripe
                                         * or we have finished all stripes.
                                         */
-                                        do {
+loop:
-                                                physical += map->stripe_len;
+                                        physical += map->stripe_len;
-                                                ret = get_raid56_logic_offset(
+                                        ret = get_raid56_logic_offset(physical,
-                                                                physical, num,
+                                                        num, map, &logical,
-                                                                map, &logical);
+                                                        &stripe_logical);
-                                                logical += base;
+                                        logical += base;
-                                        } while (physical < physical_end && ret);
+                                        if (ret && physical < physical_end) {
+                                                stripe_logical += base;
+                                                stripe_end = stripe_logical +
+                                                                increment - 1;
+                                                ret = scrub_raid56_parity(sctx,
+                                                        map, scrub_dev, ppath,
+                                                        stripe_logical,
+                                                        stripe_end);
+                                                if (ret)
+                                                        goto out;
+                                                goto loop;
+                                        }
                                } else {
                                        physical += map->stripe_len;
                                        logical += increment;
@@ -2605,6 +3343,7 @@ out:
        blk_finish_plug(&plug);
        btrfs_free_path(path);
+        btrfs_free_path(ppath);
        return ret < 0 ? ret : 0;
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ff2b35114972..0144790e296e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4879,13 +4879,15 @@ static inline int parity_smaller(u64 a, u64 b)
 static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
 {
        struct btrfs_bio_stripe s;
+        int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
        int i;
        u64 l;
        int again = 1;
+        int m;
        while (again) {
                again = 0;
-                for (i = 0; i < bbio->num_stripes - 1; i++) {
+                for (i = 0; i < real_stripes - 1; i++) {
                        if (parity_smaller(raid_map[i], raid_map[i+1])) {
                                s = bbio->stripes[i];
                                l = raid_map[i];
@@ -4893,6 +4895,14 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map)
                                raid_map[i] = raid_map[i+1];
                                bbio->stripes[i+1] = s;
                                raid_map[i+1] = l;
+                                if (bbio->tgtdev_map) {
+                                        m = bbio->tgtdev_map[i];
+                                        bbio->tgtdev_map[i] =
+                                                        bbio->tgtdev_map[i + 1];
+                                        bbio->tgtdev_map[i + 1] = m;
+                                }
                                again = 1;
                        }
                }
@@ -4921,6 +4931,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        int ret = 0;
        int num_stripes;
        int max_errors = 0;
+        int tgtdev_indexes = 0;
        struct btrfs_bio *bbio = NULL;
        struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
        int dev_replace_is_ongoing = 0;
@@ -5159,15 +5170,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                                BTRFS_BLOCK_GROUP_RAID6)) {
                u64 tmp;
-                if (bbio_ret && ((rw & REQ_WRITE) || mirror_num > 1)
+                if (raid_map_ret &&
-                    && raid_map_ret) {
+                    ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+                     mirror_num > 1)) {
                        int i, rot;
                        /* push stripe_nr back to the start of the full stripe */
                        stripe_nr = raid56_full_stripe_start;
-                        do_div(stripe_nr, stripe_len);
+                        do_div(stripe_nr, stripe_len * nr_data_stripes(map));
-                        stripe_index = do_div(stripe_nr, nr_data_stripes(map));
                        /* RAID[56] write or recovery. Return all stripes */
                        num_stripes = map->num_stripes;
@@ -5233,14 +5243,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                        num_alloc_stripes <<= 1;
                if (rw & REQ_GET_READ_MIRRORS)
                        num_alloc_stripes++;
+                tgtdev_indexes = num_stripes;
        }
-        bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
+        bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes),
+                       GFP_NOFS);
        if (!bbio) {
                kfree(raid_map);
                ret = -ENOMEM;
                goto out;
        }
        atomic_set(&bbio->error, 0);
+        if (dev_replace_is_ongoing)
+                bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
        if (rw & REQ_DISCARD) {
                int factor = 0;
@@ -5325,6 +5340,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
                max_errors = btrfs_chunk_max_errors(map);
+        tgtdev_indexes = 0;
        if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
            dev_replace->tgtdev != NULL) {
                int index_where_to_add;
@@ -5353,8 +5369,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                                new->physical = old->physical;
                                new->length = old->length;
                                new->dev = dev_replace->tgtdev;
+                                bbio->tgtdev_map[i] = index_where_to_add;
                                index_where_to_add++;
                                max_errors++;
+                                tgtdev_indexes++;
                        }
                }
                num_stripes = index_where_to_add;
@@ -5400,7 +5418,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                                tgtdev_stripe->length =
                                        bbio->stripes[index_srcdev].length;
                                tgtdev_stripe->dev = dev_replace->tgtdev;
+                                bbio->tgtdev_map[index_srcdev] = num_stripes;
+                                tgtdev_indexes++;
                                num_stripes++;
                        }
                }
@@ -5410,6 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        bbio->num_stripes = num_stripes;
        bbio->max_errors = max_errors;
        bbio->mirror_num = mirror_num;
+        bbio->num_tgtdevs = tgtdev_indexes;
        /*
         * this is the case that REQ_READ && dev_replace_is_ongoing &&
@@ -5441,6 +5462,16 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                                 mirror_num, NULL);
 }
+/* For Scrub/replace */
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+                     u64 logical, u64 *length,
+                     struct btrfs_bio **bbio_ret, int mirror_num,
+                     u64 **raid_map_ret)
+{
+        return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
+                                 mirror_num, raid_map_ret);
+}
 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                     u64 chunk_start, u64 physical, u64 devid,
                     u64 **logical, int *naddrs, int *stripe_len)
@@ -5810,12 +5841,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                } else {
                        ret = raid56_parity_recover(root, bio, bbio,
                                                    raid_map, map_length,
-                                                    mirror_num);
+                                                    mirror_num, 1);
                }
-                /*
-                 * FIXME, replace dosen't support raid56 yet, please fix
-                 * it in the future.
-                 */
                btrfs_bio_counter_dec(root->fs_info);
                return ret;
        }
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 637bcfadadb2..d6fe73c0f4a2 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -292,7 +292,7 @@ struct btrfs_bio_stripe {
 struct btrfs_bio;
 typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
-#define BTRFS_BIO_ORIG_BIO_SUBMITTED    0x1
+#define BTRFS_BIO_ORIG_BIO_SUBMITTED    (1 << 0)
 struct btrfs_bio {
        atomic_t stripes_pending;
@@ -305,6 +305,8 @@ struct btrfs_bio {
        int max_errors;
        int num_stripes;
        int mirror_num;
+        int num_tgtdevs;
+        int *tgtdev_map;
        struct btrfs_bio_stripe stripes[];
 };
@@ -387,12 +389,18 @@ struct btrfs_balance_control {
 int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
                                   u64 end, u64 *length);
-#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \
+#define btrfs_bio_size(total_stripes, real_stripes)             \
-                            (sizeof(struct btrfs_bio_stripe) * (n)))
+        (sizeof(struct btrfs_bio) +                             \
+         (sizeof(struct btrfs_bio_stripe) * (total_stripes)) +  \
+         (sizeof(int) * (real_stripes)))
 int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                    u64 logical, u64 *length,
                    struct btrfs_bio **bbio_ret, int mirror_num);
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+                     u64 logical, u64 *length,
+                     struct btrfs_bio **bbio_ret, int mirror_num,
+                     u64 **raid_map_ret);
 int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                     u64 chunk_start, u64 physical, u64 devid,
                     u64 **logical, int *naddrs, int *stripe_len);
author	Chris Mason <clm@fb.com>	2014-12-02 21:42:03 -0500
committer	Chris Mason <clm@fb.com>	2014-12-02 21:42:03 -0500
commit	9627aeee3e203e30679549e4962633698a6bf87f (patch)
tree	30ee313a7049bf3fcc17e346df5737e967fd9a95 /fs/btrfs
parent	cb83b7b81698a4abe531e0ba18b9e288b06947ce (diff)
parent	5d3edd8f44aac94de7b16f4c54290e24f5e8c532 (diff)