aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/raid56.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r--fs/btrfs/raid56.c763
1 files changed, 687 insertions, 76 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 6a41631cb959..8ab2a17bbba8 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -58,9 +58,23 @@
58 */ 58 */
59#define RBIO_CACHE_READY_BIT 3 59#define RBIO_CACHE_READY_BIT 3
60 60
61/*
62 * bbio and raid_map is managed by the caller, so we shouldn't free
63 * them here. And besides that, all rbios with this flag should not
64 * be cached, because we need raid_map to check the rbios' stripe
65 * is the same or not, but it is very likely that the caller has
66 * free raid_map, so don't cache those rbios.
67 */
68#define RBIO_HOLD_BBIO_MAP_BIT 4
61 69
62#define RBIO_CACHE_SIZE 1024 70#define RBIO_CACHE_SIZE 1024
63 71
72enum btrfs_rbio_ops {
73 BTRFS_RBIO_WRITE = 0,
74 BTRFS_RBIO_READ_REBUILD = 1,
75 BTRFS_RBIO_PARITY_SCRUB = 2,
76};
77
64struct btrfs_raid_bio { 78struct btrfs_raid_bio {
65 struct btrfs_fs_info *fs_info; 79 struct btrfs_fs_info *fs_info;
66 struct btrfs_bio *bbio; 80 struct btrfs_bio *bbio;
@@ -117,13 +131,16 @@ struct btrfs_raid_bio {
117 /* number of data stripes (no p/q) */ 131 /* number of data stripes (no p/q) */
118 int nr_data; 132 int nr_data;
119 133
134 int real_stripes;
135
136 int stripe_npages;
120 /* 137 /*
121 * set if we're doing a parity rebuild 138 * set if we're doing a parity rebuild
122 * for a read from higher up, which is handled 139 * for a read from higher up, which is handled
123 * differently from a parity rebuild as part of 140 * differently from a parity rebuild as part of
124 * rmw 141 * rmw
125 */ 142 */
126 int read_rebuild; 143 enum btrfs_rbio_ops operation;
127 144
128 /* first bad stripe */ 145 /* first bad stripe */
129 int faila; 146 int faila;
@@ -131,6 +148,7 @@ struct btrfs_raid_bio {
131 /* second bad stripe (for raid6 use) */ 148 /* second bad stripe (for raid6 use) */
132 int failb; 149 int failb;
133 150
151 int scrubp;
134 /* 152 /*
135 * number of pages needed to represent the full 153 * number of pages needed to represent the full
136 * stripe 154 * stripe
@@ -144,8 +162,13 @@ struct btrfs_raid_bio {
144 */ 162 */
145 int bio_list_bytes; 163 int bio_list_bytes;
146 164
165 int generic_bio_cnt;
166
147 atomic_t refs; 167 atomic_t refs;
148 168
169 atomic_t stripes_pending;
170
171 atomic_t error;
149 /* 172 /*
150 * these are two arrays of pointers. We allocate the 173 * these are two arrays of pointers. We allocate the
151 * rbio big enough to hold them both and setup their 174 * rbio big enough to hold them both and setup their
@@ -162,6 +185,11 @@ struct btrfs_raid_bio {
162 * here for faster lookup 185 * here for faster lookup
163 */ 186 */
164 struct page **bio_pages; 187 struct page **bio_pages;
188
189 /*
190 * bitmap to record which horizontal stripe has data
191 */
192 unsigned long *dbitmap;
165}; 193};
166 194
167static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); 195static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
@@ -176,6 +204,10 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio);
176static void index_rbio_pages(struct btrfs_raid_bio *rbio); 204static void index_rbio_pages(struct btrfs_raid_bio *rbio);
177static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); 205static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
178 206
207static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
208 int need_check);
209static void async_scrub_parity(struct btrfs_raid_bio *rbio);
210
179/* 211/*
180 * the stripe hash table is used for locking, and to collect 212 * the stripe hash table is used for locking, and to collect
181 * bios in hopes of making a full stripe 213 * bios in hopes of making a full stripe
@@ -324,6 +356,7 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
324{ 356{
325 bio_list_merge(&dest->bio_list, &victim->bio_list); 357 bio_list_merge(&dest->bio_list, &victim->bio_list);
326 dest->bio_list_bytes += victim->bio_list_bytes; 358 dest->bio_list_bytes += victim->bio_list_bytes;
359 dest->generic_bio_cnt += victim->generic_bio_cnt;
327 bio_list_init(&victim->bio_list); 360 bio_list_init(&victim->bio_list);
328} 361}
329 362
@@ -577,11 +610,20 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
577 cur->raid_map[0]) 610 cur->raid_map[0])
578 return 0; 611 return 0;
579 612
580 /* reads can't merge with writes */ 613 /* we can't merge with different operations */
581 if (last->read_rebuild != 614 if (last->operation != cur->operation)
582 cur->read_rebuild) { 615 return 0;
616 /*
617 * We've need read the full stripe from the drive.
618 * check and repair the parity and write the new results.
619 *
620 * We're not allowed to add any new bios to the
621 * bio list here, anyone else that wants to
622 * change this stripe needs to do their own rmw.
623 */
624 if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
625 cur->operation == BTRFS_RBIO_PARITY_SCRUB)
583 return 0; 626 return 0;
584 }
585 627
586 return 1; 628 return 1;
587} 629}
@@ -601,7 +643,7 @@ static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
601 */ 643 */
602static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) 644static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
603{ 645{
604 if (rbio->nr_data + 1 == rbio->bbio->num_stripes) 646 if (rbio->nr_data + 1 == rbio->real_stripes)
605 return NULL; 647 return NULL;
606 648
607 index += ((rbio->nr_data + 1) * rbio->stripe_len) >> 649 index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
@@ -772,11 +814,14 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
772 spin_unlock(&rbio->bio_list_lock); 814 spin_unlock(&rbio->bio_list_lock);
773 spin_unlock_irqrestore(&h->lock, flags); 815 spin_unlock_irqrestore(&h->lock, flags);
774 816
775 if (next->read_rebuild) 817 if (next->operation == BTRFS_RBIO_READ_REBUILD)
776 async_read_rebuild(next); 818 async_read_rebuild(next);
777 else { 819 else if (next->operation == BTRFS_RBIO_WRITE) {
778 steal_rbio(rbio, next); 820 steal_rbio(rbio, next);
779 async_rmw_stripe(next); 821 async_rmw_stripe(next);
822 } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
823 steal_rbio(rbio, next);
824 async_scrub_parity(next);
780 } 825 }
781 826
782 goto done_nolock; 827 goto done_nolock;
@@ -796,6 +841,21 @@ done_nolock:
796 remove_rbio_from_cache(rbio); 841 remove_rbio_from_cache(rbio);
797} 842}
798 843
844static inline void
845__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
846{
847 if (need) {
848 kfree(raid_map);
849 kfree(bbio);
850 }
851}
852
853static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
854{
855 __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
856 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
857}
858
799static void __free_raid_bio(struct btrfs_raid_bio *rbio) 859static void __free_raid_bio(struct btrfs_raid_bio *rbio)
800{ 860{
801 int i; 861 int i;
@@ -814,8 +874,9 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
814 rbio->stripe_pages[i] = NULL; 874 rbio->stripe_pages[i] = NULL;
815 } 875 }
816 } 876 }
817 kfree(rbio->raid_map); 877
818 kfree(rbio->bbio); 878 free_bbio_and_raid_map(rbio);
879
819 kfree(rbio); 880 kfree(rbio);
820} 881}
821 882
@@ -833,6 +894,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
833{ 894{
834 struct bio *cur = bio_list_get(&rbio->bio_list); 895 struct bio *cur = bio_list_get(&rbio->bio_list);
835 struct bio *next; 896 struct bio *next;
897
898 if (rbio->generic_bio_cnt)
899 btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
900
836 free_raid_bio(rbio); 901 free_raid_bio(rbio);
837 902
838 while (cur) { 903 while (cur) {
@@ -858,13 +923,13 @@ static void raid_write_end_io(struct bio *bio, int err)
858 923
859 bio_put(bio); 924 bio_put(bio);
860 925
861 if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) 926 if (!atomic_dec_and_test(&rbio->stripes_pending))
862 return; 927 return;
863 928
864 err = 0; 929 err = 0;
865 930
866 /* OK, we have read all the stripes we need to. */ 931 /* OK, we have read all the stripes we need to. */
867 if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) 932 if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
868 err = -EIO; 933 err = -EIO;
869 934
870 rbio_orig_end_io(rbio, err, 0); 935 rbio_orig_end_io(rbio, err, 0);
@@ -925,16 +990,16 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
925{ 990{
926 struct btrfs_raid_bio *rbio; 991 struct btrfs_raid_bio *rbio;
927 int nr_data = 0; 992 int nr_data = 0;
928 int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes); 993 int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
994 int num_pages = rbio_nr_pages(stripe_len, real_stripes);
995 int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
929 void *p; 996 void *p;
930 997
931 rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2, 998 rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
999 DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
932 GFP_NOFS); 1000 GFP_NOFS);
933 if (!rbio) { 1001 if (!rbio)
934 kfree(raid_map);
935 kfree(bbio);
936 return ERR_PTR(-ENOMEM); 1002 return ERR_PTR(-ENOMEM);
937 }
938 1003
939 bio_list_init(&rbio->bio_list); 1004 bio_list_init(&rbio->bio_list);
940 INIT_LIST_HEAD(&rbio->plug_list); 1005 INIT_LIST_HEAD(&rbio->plug_list);
@@ -946,9 +1011,13 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
946 rbio->fs_info = root->fs_info; 1011 rbio->fs_info = root->fs_info;
947 rbio->stripe_len = stripe_len; 1012 rbio->stripe_len = stripe_len;
948 rbio->nr_pages = num_pages; 1013 rbio->nr_pages = num_pages;
1014 rbio->real_stripes = real_stripes;
1015 rbio->stripe_npages = stripe_npages;
949 rbio->faila = -1; 1016 rbio->faila = -1;
950 rbio->failb = -1; 1017 rbio->failb = -1;
951 atomic_set(&rbio->refs, 1); 1018 atomic_set(&rbio->refs, 1);
1019 atomic_set(&rbio->error, 0);
1020 atomic_set(&rbio->stripes_pending, 0);
952 1021
953 /* 1022 /*
954 * the stripe_pages and bio_pages array point to the extra 1023 * the stripe_pages and bio_pages array point to the extra
@@ -957,11 +1026,12 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
957 p = rbio + 1; 1026 p = rbio + 1;
958 rbio->stripe_pages = p; 1027 rbio->stripe_pages = p;
959 rbio->bio_pages = p + sizeof(struct page *) * num_pages; 1028 rbio->bio_pages = p + sizeof(struct page *) * num_pages;
1029 rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
960 1030
961 if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) 1031 if (raid_map[real_stripes - 1] == RAID6_Q_STRIPE)
962 nr_data = bbio->num_stripes - 2; 1032 nr_data = real_stripes - 2;
963 else 1033 else
964 nr_data = bbio->num_stripes - 1; 1034 nr_data = real_stripes - 1;
965 1035
966 rbio->nr_data = nr_data; 1036 rbio->nr_data = nr_data;
967 return rbio; 1037 return rbio;
@@ -1073,7 +1143,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1073static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) 1143static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
1074{ 1144{
1075 if (rbio->faila >= 0 || rbio->failb >= 0) { 1145 if (rbio->faila >= 0 || rbio->failb >= 0) {
1076 BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1); 1146 BUG_ON(rbio->faila == rbio->real_stripes - 1);
1077 __raid56_parity_recover(rbio); 1147 __raid56_parity_recover(rbio);
1078 } else { 1148 } else {
1079 finish_rmw(rbio); 1149 finish_rmw(rbio);
@@ -1134,7 +1204,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1134static noinline void finish_rmw(struct btrfs_raid_bio *rbio) 1204static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1135{ 1205{
1136 struct btrfs_bio *bbio = rbio->bbio; 1206 struct btrfs_bio *bbio = rbio->bbio;
1137 void *pointers[bbio->num_stripes]; 1207 void *pointers[rbio->real_stripes];
1138 int stripe_len = rbio->stripe_len; 1208 int stripe_len = rbio->stripe_len;
1139 int nr_data = rbio->nr_data; 1209 int nr_data = rbio->nr_data;
1140 int stripe; 1210 int stripe;
@@ -1148,11 +1218,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1148 1218
1149 bio_list_init(&bio_list); 1219 bio_list_init(&bio_list);
1150 1220
1151 if (bbio->num_stripes - rbio->nr_data == 1) { 1221 if (rbio->real_stripes - rbio->nr_data == 1) {
1152 p_stripe = bbio->num_stripes - 1; 1222 p_stripe = rbio->real_stripes - 1;
1153 } else if (bbio->num_stripes - rbio->nr_data == 2) { 1223 } else if (rbio->real_stripes - rbio->nr_data == 2) {
1154 p_stripe = bbio->num_stripes - 2; 1224 p_stripe = rbio->real_stripes - 2;
1155 q_stripe = bbio->num_stripes - 1; 1225 q_stripe = rbio->real_stripes - 1;
1156 } else { 1226 } else {
1157 BUG(); 1227 BUG();
1158 } 1228 }
@@ -1169,7 +1239,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1169 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 1239 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1170 spin_unlock_irq(&rbio->bio_list_lock); 1240 spin_unlock_irq(&rbio->bio_list_lock);
1171 1241
1172 atomic_set(&rbio->bbio->error, 0); 1242 atomic_set(&rbio->error, 0);
1173 1243
1174 /* 1244 /*
1175 * now that we've set rmw_locked, run through the 1245 * now that we've set rmw_locked, run through the
@@ -1209,7 +1279,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1209 SetPageUptodate(p); 1279 SetPageUptodate(p);
1210 pointers[stripe++] = kmap(p); 1280 pointers[stripe++] = kmap(p);
1211 1281
1212 raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE, 1282 raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
1213 pointers); 1283 pointers);
1214 } else { 1284 } else {
1215 /* raid5 */ 1285 /* raid5 */
@@ -1218,7 +1288,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1218 } 1288 }
1219 1289
1220 1290
1221 for (stripe = 0; stripe < bbio->num_stripes; stripe++) 1291 for (stripe = 0; stripe < rbio->real_stripes; stripe++)
1222 kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); 1292 kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
1223 } 1293 }
1224 1294
@@ -1227,7 +1297,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1227 * higher layers (the bio_list in our rbio) and our p/q. Ignore 1297 * higher layers (the bio_list in our rbio) and our p/q. Ignore
1228 * everything else. 1298 * everything else.
1229 */ 1299 */
1230 for (stripe = 0; stripe < bbio->num_stripes; stripe++) { 1300 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1231 for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) { 1301 for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1232 struct page *page; 1302 struct page *page;
1233 if (stripe < rbio->nr_data) { 1303 if (stripe < rbio->nr_data) {
@@ -1245,8 +1315,34 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1245 } 1315 }
1246 } 1316 }
1247 1317
1248 atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list)); 1318 if (likely(!bbio->num_tgtdevs))
1249 BUG_ON(atomic_read(&bbio->stripes_pending) == 0); 1319 goto write_data;
1320
1321 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1322 if (!bbio->tgtdev_map[stripe])
1323 continue;
1324
1325 for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1326 struct page *page;
1327 if (stripe < rbio->nr_data) {
1328 page = page_in_rbio(rbio, stripe, pagenr, 1);
1329 if (!page)
1330 continue;
1331 } else {
1332 page = rbio_stripe_page(rbio, stripe, pagenr);
1333 }
1334
1335 ret = rbio_add_io_page(rbio, &bio_list, page,
1336 rbio->bbio->tgtdev_map[stripe],
1337 pagenr, rbio->stripe_len);
1338 if (ret)
1339 goto cleanup;
1340 }
1341 }
1342
1343write_data:
1344 atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
1345 BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
1250 1346
1251 while (1) { 1347 while (1) {
1252 bio = bio_list_pop(&bio_list); 1348 bio = bio_list_pop(&bio_list);
@@ -1283,7 +1379,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1283 stripe = &rbio->bbio->stripes[i]; 1379 stripe = &rbio->bbio->stripes[i];
1284 stripe_start = stripe->physical; 1380 stripe_start = stripe->physical;
1285 if (physical >= stripe_start && 1381 if (physical >= stripe_start &&
1286 physical < stripe_start + rbio->stripe_len) { 1382 physical < stripe_start + rbio->stripe_len &&
1383 bio->bi_bdev == stripe->dev->bdev) {
1287 return i; 1384 return i;
1288 } 1385 }
1289 } 1386 }
@@ -1331,11 +1428,11 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
1331 if (rbio->faila == -1) { 1428 if (rbio->faila == -1) {
1332 /* first failure on this rbio */ 1429 /* first failure on this rbio */
1333 rbio->faila = failed; 1430 rbio->faila = failed;
1334 atomic_inc(&rbio->bbio->error); 1431 atomic_inc(&rbio->error);
1335 } else if (rbio->failb == -1) { 1432 } else if (rbio->failb == -1) {
1336 /* second failure on this rbio */ 1433 /* second failure on this rbio */
1337 rbio->failb = failed; 1434 rbio->failb = failed;
1338 atomic_inc(&rbio->bbio->error); 1435 atomic_inc(&rbio->error);
1339 } else { 1436 } else {
1340 ret = -EIO; 1437 ret = -EIO;
1341 } 1438 }
@@ -1394,11 +1491,11 @@ static void raid_rmw_end_io(struct bio *bio, int err)
1394 1491
1395 bio_put(bio); 1492 bio_put(bio);
1396 1493
1397 if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) 1494 if (!atomic_dec_and_test(&rbio->stripes_pending))
1398 return; 1495 return;
1399 1496
1400 err = 0; 1497 err = 0;
1401 if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) 1498 if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
1402 goto cleanup; 1499 goto cleanup;
1403 1500
1404 /* 1501 /*
@@ -1439,7 +1536,6 @@ static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1439static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) 1536static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1440{ 1537{
1441 int bios_to_read = 0; 1538 int bios_to_read = 0;
1442 struct btrfs_bio *bbio = rbio->bbio;
1443 struct bio_list bio_list; 1539 struct bio_list bio_list;
1444 int ret; 1540 int ret;
1445 int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); 1541 int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
@@ -1455,7 +1551,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1455 1551
1456 index_rbio_pages(rbio); 1552 index_rbio_pages(rbio);
1457 1553
1458 atomic_set(&rbio->bbio->error, 0); 1554 atomic_set(&rbio->error, 0);
1459 /* 1555 /*
1460 * build a list of bios to read all the missing parts of this 1556 * build a list of bios to read all the missing parts of this
1461 * stripe 1557 * stripe
@@ -1503,7 +1599,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1503 * the bbio may be freed once we submit the last bio. Make sure 1599 * the bbio may be freed once we submit the last bio. Make sure
1504 * not to touch it after that 1600 * not to touch it after that
1505 */ 1601 */
1506 atomic_set(&bbio->stripes_pending, bios_to_read); 1602 atomic_set(&rbio->stripes_pending, bios_to_read);
1507 while (1) { 1603 while (1) {
1508 bio = bio_list_pop(&bio_list); 1604 bio = bio_list_pop(&bio_list);
1509 if (!bio) 1605 if (!bio)
@@ -1686,19 +1782,30 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1686 struct btrfs_raid_bio *rbio; 1782 struct btrfs_raid_bio *rbio;
1687 struct btrfs_plug_cb *plug = NULL; 1783 struct btrfs_plug_cb *plug = NULL;
1688 struct blk_plug_cb *cb; 1784 struct blk_plug_cb *cb;
1785 int ret;
1689 1786
1690 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 1787 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
1691 if (IS_ERR(rbio)) 1788 if (IS_ERR(rbio)) {
1789 __free_bbio_and_raid_map(bbio, raid_map, 1);
1692 return PTR_ERR(rbio); 1790 return PTR_ERR(rbio);
1791 }
1693 bio_list_add(&rbio->bio_list, bio); 1792 bio_list_add(&rbio->bio_list, bio);
1694 rbio->bio_list_bytes = bio->bi_iter.bi_size; 1793 rbio->bio_list_bytes = bio->bi_iter.bi_size;
1794 rbio->operation = BTRFS_RBIO_WRITE;
1795
1796 btrfs_bio_counter_inc_noblocked(root->fs_info);
1797 rbio->generic_bio_cnt = 1;
1695 1798
1696 /* 1799 /*
1697 * don't plug on full rbios, just get them out the door 1800 * don't plug on full rbios, just get them out the door
1698 * as quickly as we can 1801 * as quickly as we can
1699 */ 1802 */
1700 if (rbio_is_full(rbio)) 1803 if (rbio_is_full(rbio)) {
1701 return full_stripe_write(rbio); 1804 ret = full_stripe_write(rbio);
1805 if (ret)
1806 btrfs_bio_counter_dec(root->fs_info);
1807 return ret;
1808 }
1702 1809
1703 cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info, 1810 cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
1704 sizeof(*plug)); 1811 sizeof(*plug));
@@ -1709,10 +1816,13 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1709 INIT_LIST_HEAD(&plug->rbio_list); 1816 INIT_LIST_HEAD(&plug->rbio_list);
1710 } 1817 }
1711 list_add_tail(&rbio->plug_list, &plug->rbio_list); 1818 list_add_tail(&rbio->plug_list, &plug->rbio_list);
1819 ret = 0;
1712 } else { 1820 } else {
1713 return __raid56_parity_write(rbio); 1821 ret = __raid56_parity_write(rbio);
1822 if (ret)
1823 btrfs_bio_counter_dec(root->fs_info);
1714 } 1824 }
1715 return 0; 1825 return ret;
1716} 1826}
1717 1827
1718/* 1828/*
@@ -1730,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1730 int err; 1840 int err;
1731 int i; 1841 int i;
1732 1842
1733 pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *), 1843 pointers = kzalloc(rbio->real_stripes * sizeof(void *),
1734 GFP_NOFS); 1844 GFP_NOFS);
1735 if (!pointers) { 1845 if (!pointers) {
1736 err = -ENOMEM; 1846 err = -ENOMEM;
@@ -1740,7 +1850,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1740 faila = rbio->faila; 1850 faila = rbio->faila;
1741 failb = rbio->failb; 1851 failb = rbio->failb;
1742 1852
1743 if (rbio->read_rebuild) { 1853 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
1744 spin_lock_irq(&rbio->bio_list_lock); 1854 spin_lock_irq(&rbio->bio_list_lock);
1745 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 1855 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1746 spin_unlock_irq(&rbio->bio_list_lock); 1856 spin_unlock_irq(&rbio->bio_list_lock);
@@ -1749,15 +1859,23 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1749 index_rbio_pages(rbio); 1859 index_rbio_pages(rbio);
1750 1860
1751 for (pagenr = 0; pagenr < nr_pages; pagenr++) { 1861 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1862 /*
1863 * Now we just use bitmap to mark the horizontal stripes in
1864 * which we have data when doing parity scrub.
1865 */
1866 if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
1867 !test_bit(pagenr, rbio->dbitmap))
1868 continue;
1869
1752 /* setup our array of pointers with pages 1870 /* setup our array of pointers with pages
1753 * from each stripe 1871 * from each stripe
1754 */ 1872 */
1755 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { 1873 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1756 /* 1874 /*
1757 * if we're rebuilding a read, we have to use 1875 * if we're rebuilding a read, we have to use
1758 * pages from the bio list 1876 * pages from the bio list
1759 */ 1877 */
1760 if (rbio->read_rebuild && 1878 if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
1761 (stripe == faila || stripe == failb)) { 1879 (stripe == faila || stripe == failb)) {
1762 page = page_in_rbio(rbio, stripe, pagenr, 0); 1880 page = page_in_rbio(rbio, stripe, pagenr, 0);
1763 } else { 1881 } else {
@@ -1767,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1767 } 1885 }
1768 1886
1769 /* all raid6 handling here */ 1887 /* all raid6 handling here */
1770 if (rbio->raid_map[rbio->bbio->num_stripes - 1] == 1888 if (rbio->raid_map[rbio->real_stripes - 1] ==
1771 RAID6_Q_STRIPE) { 1889 RAID6_Q_STRIPE) {
1772 1890
1773 /* 1891 /*
@@ -1817,10 +1935,10 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1817 } 1935 }
1818 1936
1819 if (rbio->raid_map[failb] == RAID5_P_STRIPE) { 1937 if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
1820 raid6_datap_recov(rbio->bbio->num_stripes, 1938 raid6_datap_recov(rbio->real_stripes,
1821 PAGE_SIZE, faila, pointers); 1939 PAGE_SIZE, faila, pointers);
1822 } else { 1940 } else {
1823 raid6_2data_recov(rbio->bbio->num_stripes, 1941 raid6_2data_recov(rbio->real_stripes,
1824 PAGE_SIZE, faila, failb, 1942 PAGE_SIZE, faila, failb,
1825 pointers); 1943 pointers);
1826 } 1944 }
@@ -1850,7 +1968,7 @@ pstripe:
1850 * know they can be trusted. If this was a read reconstruction, 1968 * know they can be trusted. If this was a read reconstruction,
1851 * other endio functions will fiddle the uptodate bits 1969 * other endio functions will fiddle the uptodate bits
1852 */ 1970 */
1853 if (!rbio->read_rebuild) { 1971 if (rbio->operation == BTRFS_RBIO_WRITE) {
1854 for (i = 0; i < nr_pages; i++) { 1972 for (i = 0; i < nr_pages; i++) {
1855 if (faila != -1) { 1973 if (faila != -1) {
1856 page = rbio_stripe_page(rbio, faila, i); 1974 page = rbio_stripe_page(rbio, faila, i);
@@ -1862,12 +1980,12 @@ pstripe:
1862 } 1980 }
1863 } 1981 }
1864 } 1982 }
1865 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) { 1983 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1866 /* 1984 /*
1867 * if we're rebuilding a read, we have to use 1985 * if we're rebuilding a read, we have to use
1868 * pages from the bio list 1986 * pages from the bio list
1869 */ 1987 */
1870 if (rbio->read_rebuild && 1988 if (rbio->operation == BTRFS_RBIO_READ_REBUILD &&
1871 (stripe == faila || stripe == failb)) { 1989 (stripe == faila || stripe == failb)) {
1872 page = page_in_rbio(rbio, stripe, pagenr, 0); 1990 page = page_in_rbio(rbio, stripe, pagenr, 0);
1873 } else { 1991 } else {
@@ -1882,9 +2000,9 @@ cleanup:
1882 kfree(pointers); 2000 kfree(pointers);
1883 2001
1884cleanup_io: 2002cleanup_io:
1885 2003 if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
1886 if (rbio->read_rebuild) { 2004 if (err == 0 &&
1887 if (err == 0) 2005 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
1888 cache_rbio_pages(rbio); 2006 cache_rbio_pages(rbio);
1889 else 2007 else
1890 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 2008 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -1893,7 +2011,13 @@ cleanup_io:
1893 } else if (err == 0) { 2011 } else if (err == 0) {
1894 rbio->faila = -1; 2012 rbio->faila = -1;
1895 rbio->failb = -1; 2013 rbio->failb = -1;
1896 finish_rmw(rbio); 2014
2015 if (rbio->operation == BTRFS_RBIO_WRITE)
2016 finish_rmw(rbio);
2017 else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
2018 finish_parity_scrub(rbio, 0);
2019 else
2020 BUG();
1897 } else { 2021 } else {
1898 rbio_orig_end_io(rbio, err, 0); 2022 rbio_orig_end_io(rbio, err, 0);
1899 } 2023 }
@@ -1917,10 +2041,10 @@ static void raid_recover_end_io(struct bio *bio, int err)
1917 set_bio_pages_uptodate(bio); 2041 set_bio_pages_uptodate(bio);
1918 bio_put(bio); 2042 bio_put(bio);
1919 2043
1920 if (!atomic_dec_and_test(&rbio->bbio->stripes_pending)) 2044 if (!atomic_dec_and_test(&rbio->stripes_pending))
1921 return; 2045 return;
1922 2046
1923 if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors) 2047 if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
1924 rbio_orig_end_io(rbio, -EIO, 0); 2048 rbio_orig_end_io(rbio, -EIO, 0);
1925 else 2049 else
1926 __raid_recover_end_io(rbio); 2050 __raid_recover_end_io(rbio);
@@ -1937,7 +2061,6 @@ static void raid_recover_end_io(struct bio *bio, int err)
1937static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) 2061static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
1938{ 2062{
1939 int bios_to_read = 0; 2063 int bios_to_read = 0;
1940 struct btrfs_bio *bbio = rbio->bbio;
1941 struct bio_list bio_list; 2064 struct bio_list bio_list;
1942 int ret; 2065 int ret;
1943 int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); 2066 int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
@@ -1951,16 +2074,16 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
1951 if (ret) 2074 if (ret)
1952 goto cleanup; 2075 goto cleanup;
1953 2076
1954 atomic_set(&rbio->bbio->error, 0); 2077 atomic_set(&rbio->error, 0);
1955 2078
1956 /* 2079 /*
1957 * read everything that hasn't failed. Thanks to the 2080 * read everything that hasn't failed. Thanks to the
1958 * stripe cache, it is possible that some or all of these 2081 * stripe cache, it is possible that some or all of these
1959 * pages are going to be uptodate. 2082 * pages are going to be uptodate.
1960 */ 2083 */
1961 for (stripe = 0; stripe < bbio->num_stripes; stripe++) { 2084 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1962 if (rbio->faila == stripe || rbio->failb == stripe) { 2085 if (rbio->faila == stripe || rbio->failb == stripe) {
1963 atomic_inc(&rbio->bbio->error); 2086 atomic_inc(&rbio->error);
1964 continue; 2087 continue;
1965 } 2088 }
1966 2089
@@ -1990,7 +2113,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
1990 * were up to date, or we might have no bios to read because 2113 * were up to date, or we might have no bios to read because
1991 * the devices were gone. 2114 * the devices were gone.
1992 */ 2115 */
1993 if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) { 2116 if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
1994 __raid_recover_end_io(rbio); 2117 __raid_recover_end_io(rbio);
1995 goto out; 2118 goto out;
1996 } else { 2119 } else {
@@ -2002,7 +2125,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
2002 * the bbio may be freed once we submit the last bio. Make sure 2125 * the bbio may be freed once we submit the last bio. Make sure
2003 * not to touch it after that 2126 * not to touch it after that
2004 */ 2127 */
2005 atomic_set(&bbio->stripes_pending, bios_to_read); 2128 atomic_set(&rbio->stripes_pending, bios_to_read);
2006 while (1) { 2129 while (1) {
2007 bio = bio_list_pop(&bio_list); 2130 bio = bio_list_pop(&bio_list);
2008 if (!bio) 2131 if (!bio)
@@ -2021,7 +2144,7 @@ out:
2021 return 0; 2144 return 0;
2022 2145
2023cleanup: 2146cleanup:
2024 if (rbio->read_rebuild) 2147 if (rbio->operation == BTRFS_RBIO_READ_REBUILD)
2025 rbio_orig_end_io(rbio, -EIO, 0); 2148 rbio_orig_end_io(rbio, -EIO, 0);
2026 return -EIO; 2149 return -EIO;
2027} 2150}
@@ -2034,34 +2157,42 @@ cleanup:
2034 */ 2157 */
2035int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, 2158int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2036 struct btrfs_bio *bbio, u64 *raid_map, 2159 struct btrfs_bio *bbio, u64 *raid_map,
2037 u64 stripe_len, int mirror_num) 2160 u64 stripe_len, int mirror_num, int generic_io)
2038{ 2161{
2039 struct btrfs_raid_bio *rbio; 2162 struct btrfs_raid_bio *rbio;
2040 int ret; 2163 int ret;
2041 2164
2042 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 2165 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
2043 if (IS_ERR(rbio)) 2166 if (IS_ERR(rbio)) {
2167 __free_bbio_and_raid_map(bbio, raid_map, generic_io);
2044 return PTR_ERR(rbio); 2168 return PTR_ERR(rbio);
2169 }
2045 2170
2046 rbio->read_rebuild = 1; 2171 rbio->operation = BTRFS_RBIO_READ_REBUILD;
2047 bio_list_add(&rbio->bio_list, bio); 2172 bio_list_add(&rbio->bio_list, bio);
2048 rbio->bio_list_bytes = bio->bi_iter.bi_size; 2173 rbio->bio_list_bytes = bio->bi_iter.bi_size;
2049 2174
2050 rbio->faila = find_logical_bio_stripe(rbio, bio); 2175 rbio->faila = find_logical_bio_stripe(rbio, bio);
2051 if (rbio->faila == -1) { 2176 if (rbio->faila == -1) {
2052 BUG(); 2177 BUG();
2053 kfree(raid_map); 2178 __free_bbio_and_raid_map(bbio, raid_map, generic_io);
2054 kfree(bbio);
2055 kfree(rbio); 2179 kfree(rbio);
2056 return -EIO; 2180 return -EIO;
2057 } 2181 }
2058 2182
2183 if (generic_io) {
2184 btrfs_bio_counter_inc_noblocked(root->fs_info);
2185 rbio->generic_bio_cnt = 1;
2186 } else {
2187 set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags);
2188 }
2189
2059 /* 2190 /*
2060 * reconstruct from the q stripe if they are 2191 * reconstruct from the q stripe if they are
2061 * asking for mirror 3 2192 * asking for mirror 3
2062 */ 2193 */
2063 if (mirror_num == 3) 2194 if (mirror_num == 3)
2064 rbio->failb = bbio->num_stripes - 2; 2195 rbio->failb = rbio->real_stripes - 2;
2065 2196
2066 ret = lock_stripe_add(rbio); 2197 ret = lock_stripe_add(rbio);
2067 2198
@@ -2098,3 +2229,483 @@ static void read_rebuild_work(struct btrfs_work *work)
2098 rbio = container_of(work, struct btrfs_raid_bio, work); 2229 rbio = container_of(work, struct btrfs_raid_bio, work);
2099 __raid56_parity_recover(rbio); 2230 __raid56_parity_recover(rbio);
2100} 2231}
2232
2233/*
2234 * The following code is used to scrub/replace the parity stripe
2235 *
2236 * Note: We need make sure all the pages that add into the scrub/replace
2237 * raid bio are correct and not be changed during the scrub/replace. That
2238 * is those pages just hold metadata or file data with checksum.
2239 */
2240
2241struct btrfs_raid_bio *
2242raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
2243 struct btrfs_bio *bbio, u64 *raid_map,
2244 u64 stripe_len, struct btrfs_device *scrub_dev,
2245 unsigned long *dbitmap, int stripe_nsectors)
2246{
2247 struct btrfs_raid_bio *rbio;
2248 int i;
2249
2250 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
2251 if (IS_ERR(rbio))
2252 return NULL;
2253 bio_list_add(&rbio->bio_list, bio);
2254 /*
2255 * This is a special bio which is used to hold the completion handler
2256 * and make the scrub rbio is similar to the other types
2257 */
2258 ASSERT(!bio->bi_iter.bi_size);
2259 rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
2260
2261 for (i = 0; i < rbio->real_stripes; i++) {
2262 if (bbio->stripes[i].dev == scrub_dev) {
2263 rbio->scrubp = i;
2264 break;
2265 }
2266 }
2267
2268 /* Now we just support the sectorsize equals to page size */
2269 ASSERT(root->sectorsize == PAGE_SIZE);
2270 ASSERT(rbio->stripe_npages == stripe_nsectors);
2271 bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
2272
2273 return rbio;
2274}
2275
2276void raid56_parity_add_scrub_pages(struct btrfs_raid_bio *rbio,
2277 struct page *page, u64 logical)
2278{
2279 int stripe_offset;
2280 int index;
2281
2282 ASSERT(logical >= rbio->raid_map[0]);
2283 ASSERT(logical + PAGE_SIZE <= rbio->raid_map[0] +
2284 rbio->stripe_len * rbio->nr_data);
2285 stripe_offset = (int)(logical - rbio->raid_map[0]);
2286 index = stripe_offset >> PAGE_CACHE_SHIFT;
2287 rbio->bio_pages[index] = page;
2288}
2289
2290/*
2291 * We just scrub the parity that we have correct data on the same horizontal,
2292 * so we needn't allocate all pages for all the stripes.
2293 */
2294static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
2295{
2296 int i;
2297 int bit;
2298 int index;
2299 struct page *page;
2300
2301 for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
2302 for (i = 0; i < rbio->real_stripes; i++) {
2303 index = i * rbio->stripe_npages + bit;
2304 if (rbio->stripe_pages[index])
2305 continue;
2306
2307 page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
2308 if (!page)
2309 return -ENOMEM;
2310 rbio->stripe_pages[index] = page;
2311 ClearPageUptodate(page);
2312 }
2313 }
2314 return 0;
2315}
2316
2317/*
2318 * end io function used by finish_rmw. When we finally
2319 * get here, we've written a full stripe
2320 */
2321static void raid_write_parity_end_io(struct bio *bio, int err)
2322{
2323 struct btrfs_raid_bio *rbio = bio->bi_private;
2324
2325 if (err)
2326 fail_bio_stripe(rbio, bio);
2327
2328 bio_put(bio);
2329
2330 if (!atomic_dec_and_test(&rbio->stripes_pending))
2331 return;
2332
2333 err = 0;
2334
2335 if (atomic_read(&rbio->error))
2336 err = -EIO;
2337
2338 rbio_orig_end_io(rbio, err, 0);
2339}
2340
2341static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
2342 int need_check)
2343{
2344 struct btrfs_bio *bbio = rbio->bbio;
2345 void *pointers[rbio->real_stripes];
2346 DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
2347 int nr_data = rbio->nr_data;
2348 int stripe;
2349 int pagenr;
2350 int p_stripe = -1;
2351 int q_stripe = -1;
2352 struct page *p_page = NULL;
2353 struct page *q_page = NULL;
2354 struct bio_list bio_list;
2355 struct bio *bio;
2356 int is_replace = 0;
2357 int ret;
2358
2359 bio_list_init(&bio_list);
2360
2361 if (rbio->real_stripes - rbio->nr_data == 1) {
2362 p_stripe = rbio->real_stripes - 1;
2363 } else if (rbio->real_stripes - rbio->nr_data == 2) {
2364 p_stripe = rbio->real_stripes - 2;
2365 q_stripe = rbio->real_stripes - 1;
2366 } else {
2367 BUG();
2368 }
2369
2370 if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
2371 is_replace = 1;
2372 bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
2373 }
2374
2375 /*
2376 * Because the higher layers(scrubber) are unlikely to
2377 * use this area of the disk again soon, so don't cache
2378 * it.
2379 */
2380 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
2381
2382 if (!need_check)
2383 goto writeback;
2384
2385 p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
2386 if (!p_page)
2387 goto cleanup;
2388 SetPageUptodate(p_page);
2389
2390 if (q_stripe != -1) {
2391 q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
2392 if (!q_page) {
2393 __free_page(p_page);
2394 goto cleanup;
2395 }
2396 SetPageUptodate(q_page);
2397 }
2398
2399 atomic_set(&rbio->error, 0);
2400
2401 for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
2402 struct page *p;
2403 void *parity;
2404 /* first collect one page from each data stripe */
2405 for (stripe = 0; stripe < nr_data; stripe++) {
2406 p = page_in_rbio(rbio, stripe, pagenr, 0);
2407 pointers[stripe] = kmap(p);
2408 }
2409
2410 /* then add the parity stripe */
2411 pointers[stripe++] = kmap(p_page);
2412
2413 if (q_stripe != -1) {
2414
2415 /*
2416 * raid6, add the qstripe and call the
2417 * library function to fill in our p/q
2418 */
2419 pointers[stripe++] = kmap(q_page);
2420
2421 raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
2422 pointers);
2423 } else {
2424 /* raid5 */
2425 memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
2426 run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
2427 }
2428
2429 /* Check scrubbing pairty and repair it */
2430 p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
2431 parity = kmap(p);
2432 if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
2433 memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
2434 else
2435 /* Parity is right, needn't writeback */
2436 bitmap_clear(rbio->dbitmap, pagenr, 1);
2437 kunmap(p);
2438
2439 for (stripe = 0; stripe < rbio->real_stripes; stripe++)
2440 kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
2441 }
2442
2443 __free_page(p_page);
2444 if (q_page)
2445 __free_page(q_page);
2446
2447writeback:
2448 /*
2449 * time to start writing. Make bios for everything from the
2450 * higher layers (the bio_list in our rbio) and our p/q. Ignore
2451 * everything else.
2452 */
2453 for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
2454 struct page *page;
2455
2456 page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
2457 ret = rbio_add_io_page(rbio, &bio_list,
2458 page, rbio->scrubp, pagenr, rbio->stripe_len);
2459 if (ret)
2460 goto cleanup;
2461 }
2462
2463 if (!is_replace)
2464 goto submit_write;
2465
2466 for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
2467 struct page *page;
2468
2469 page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
2470 ret = rbio_add_io_page(rbio, &bio_list, page,
2471 bbio->tgtdev_map[rbio->scrubp],
2472 pagenr, rbio->stripe_len);
2473 if (ret)
2474 goto cleanup;
2475 }
2476
2477submit_write:
2478 nr_data = bio_list_size(&bio_list);
2479 if (!nr_data) {
2480 /* Every parity is right */
2481 rbio_orig_end_io(rbio, 0, 0);
2482 return;
2483 }
2484
2485 atomic_set(&rbio->stripes_pending, nr_data);
2486
2487 while (1) {
2488 bio = bio_list_pop(&bio_list);
2489 if (!bio)
2490 break;
2491
2492 bio->bi_private = rbio;
2493 bio->bi_end_io = raid_write_parity_end_io;
2494 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
2495 submit_bio(WRITE, bio);
2496 }
2497 return;
2498
2499cleanup:
2500 rbio_orig_end_io(rbio, -EIO, 0);
2501}
2502
2503static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
2504{
2505 if (stripe >= 0 && stripe < rbio->nr_data)
2506 return 1;
2507 return 0;
2508}
2509
2510/*
2511 * While we're doing the parity check and repair, we could have errors
2512 * in reading pages off the disk. This checks for errors and if we're
2513 * not able to read the page it'll trigger parity reconstruction. The
2514 * parity scrub will be finished after we've reconstructed the failed
2515 * stripes
2516 */
2517static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
2518{
2519 if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
2520 goto cleanup;
2521
2522 if (rbio->faila >= 0 || rbio->failb >= 0) {
2523 int dfail = 0, failp = -1;
2524
2525 if (is_data_stripe(rbio, rbio->faila))
2526 dfail++;
2527 else if (is_parity_stripe(rbio->faila))
2528 failp = rbio->faila;
2529
2530 if (is_data_stripe(rbio, rbio->failb))
2531 dfail++;
2532 else if (is_parity_stripe(rbio->failb))
2533 failp = rbio->failb;
2534
2535 /*
2536 * Because we can not use a scrubbing parity to repair
2537 * the data, so the capability of the repair is declined.
2538 * (In the case of RAID5, we can not repair anything)
2539 */
2540 if (dfail > rbio->bbio->max_errors - 1)
2541 goto cleanup;
2542
2543 /*
2544 * If all data is good, only parity is correctly, just
2545 * repair the parity.
2546 */
2547 if (dfail == 0) {
2548 finish_parity_scrub(rbio, 0);
2549 return;
2550 }
2551
2552 /*
2553 * Here means we got one corrupted data stripe and one
2554 * corrupted parity on RAID6, if the corrupted parity
2555 * is scrubbing parity, luckly, use the other one to repair
2556 * the data, or we can not repair the data stripe.
2557 */
2558 if (failp != rbio->scrubp)
2559 goto cleanup;
2560
2561 __raid_recover_end_io(rbio);
2562 } else {
2563 finish_parity_scrub(rbio, 1);
2564 }
2565 return;
2566
2567cleanup:
2568 rbio_orig_end_io(rbio, -EIO, 0);
2569}
2570
2571/*
2572 * end io for the read phase of the rmw cycle. All the bios here are physical
2573 * stripe bios we've read from the disk so we can recalculate the parity of the
2574 * stripe.
2575 *
2576 * This will usually kick off finish_rmw once all the bios are read in, but it
2577 * may trigger parity reconstruction if we had any errors along the way
2578 */
2579static void raid56_parity_scrub_end_io(struct bio *bio, int err)
2580{
2581 struct btrfs_raid_bio *rbio = bio->bi_private;
2582
2583 if (err)
2584 fail_bio_stripe(rbio, bio);
2585 else
2586 set_bio_pages_uptodate(bio);
2587
2588 bio_put(bio);
2589
2590 if (!atomic_dec_and_test(&rbio->stripes_pending))
2591 return;
2592
2593 /*
2594 * this will normally call finish_rmw to start our write
2595 * but if there are any failed stripes we'll reconstruct
2596 * from parity first
2597 */
2598 validate_rbio_for_parity_scrub(rbio);
2599}
2600
2601static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
2602{
2603 int bios_to_read = 0;
2604 struct bio_list bio_list;
2605 int ret;
2606 int pagenr;
2607 int stripe;
2608 struct bio *bio;
2609
2610 ret = alloc_rbio_essential_pages(rbio);
2611 if (ret)
2612 goto cleanup;
2613
2614 bio_list_init(&bio_list);
2615
2616 atomic_set(&rbio->error, 0);
2617 /*
2618 * build a list of bios to read all the missing parts of this
2619 * stripe
2620 */
2621 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
2622 for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
2623 struct page *page;
2624 /*
2625 * we want to find all the pages missing from
2626 * the rbio and read them from the disk. If
2627 * page_in_rbio finds a page in the bio list
2628 * we don't need to read it off the stripe.
2629 */
2630 page = page_in_rbio(rbio, stripe, pagenr, 1);
2631 if (page)
2632 continue;
2633
2634 page = rbio_stripe_page(rbio, stripe, pagenr);
2635 /*
2636 * the bio cache may have handed us an uptodate
2637 * page. If so, be happy and use it
2638 */
2639 if (PageUptodate(page))
2640 continue;
2641
2642 ret = rbio_add_io_page(rbio, &bio_list, page,
2643 stripe, pagenr, rbio->stripe_len);
2644 if (ret)
2645 goto cleanup;
2646 }
2647 }
2648
2649 bios_to_read = bio_list_size(&bio_list);
2650 if (!bios_to_read) {
2651 /*
2652 * this can happen if others have merged with
2653 * us, it means there is nothing left to read.
2654 * But if there are missing devices it may not be
2655 * safe to do the full stripe write yet.
2656 */
2657 goto finish;
2658 }
2659
2660 /*
2661 * the bbio may be freed once we submit the last bio. Make sure
2662 * not to touch it after that
2663 */
2664 atomic_set(&rbio->stripes_pending, bios_to_read);
2665 while (1) {
2666 bio = bio_list_pop(&bio_list);
2667 if (!bio)
2668 break;
2669
2670 bio->bi_private = rbio;
2671 bio->bi_end_io = raid56_parity_scrub_end_io;
2672
2673 btrfs_bio_wq_end_io(rbio->fs_info, bio,
2674 BTRFS_WQ_ENDIO_RAID56);
2675
2676 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
2677 submit_bio(READ, bio);
2678 }
2679 /* the actual write will happen once the reads are done */
2680 return;
2681
2682cleanup:
2683 rbio_orig_end_io(rbio, -EIO, 0);
2684 return;
2685
2686finish:
2687 validate_rbio_for_parity_scrub(rbio);
2688}
2689
2690static void scrub_parity_work(struct btrfs_work *work)
2691{
2692 struct btrfs_raid_bio *rbio;
2693
2694 rbio = container_of(work, struct btrfs_raid_bio, work);
2695 raid56_parity_scrub_stripe(rbio);
2696}
2697
2698static void async_scrub_parity(struct btrfs_raid_bio *rbio)
2699{
2700 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
2701 scrub_parity_work, NULL, NULL);
2702
2703 btrfs_queue_work(rbio->fs_info->rmw_workers,
2704 &rbio->work);
2705}
2706
2707void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
2708{
2709 if (!lock_stripe_add(rbio))
2710 async_scrub_parity(rbio);
2711}