aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c309
1 files changed, 141 insertions, 168 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index e427cb7ee12c..ec57687c9a4d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -66,7 +66,6 @@ struct scrub_ctx;
66struct scrub_recover { 66struct scrub_recover {
67 atomic_t refs; 67 atomic_t refs;
68 struct btrfs_bio *bbio; 68 struct btrfs_bio *bbio;
69 u64 *raid_map;
70 u64 map_length; 69 u64 map_length;
71}; 70};
72 71
@@ -80,7 +79,7 @@ struct scrub_page {
80 u64 logical; 79 u64 logical;
81 u64 physical; 80 u64 physical;
82 u64 physical_for_dev_replace; 81 u64 physical_for_dev_replace;
83 atomic_t ref_count; 82 atomic_t refs;
84 struct { 83 struct {
85 unsigned int mirror_num:8; 84 unsigned int mirror_num:8;
86 unsigned int have_csum:1; 85 unsigned int have_csum:1;
@@ -113,7 +112,7 @@ struct scrub_block {
113 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; 112 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
114 int page_count; 113 int page_count;
115 atomic_t outstanding_pages; 114 atomic_t outstanding_pages;
116 atomic_t ref_count; /* free mem on transition to zero */ 115 atomic_t refs; /* free mem on transition to zero */
117 struct scrub_ctx *sctx; 116 struct scrub_ctx *sctx;
118 struct scrub_parity *sparity; 117 struct scrub_parity *sparity;
119 struct { 118 struct {
@@ -142,7 +141,7 @@ struct scrub_parity {
142 141
143 int stripe_len; 142 int stripe_len;
144 143
145 atomic_t ref_count; 144 atomic_t refs;
146 145
147 struct list_head spages; 146 struct list_head spages;
148 147
@@ -194,6 +193,15 @@ struct scrub_ctx {
194 */ 193 */
195 struct btrfs_scrub_progress stat; 194 struct btrfs_scrub_progress stat;
196 spinlock_t stat_lock; 195 spinlock_t stat_lock;
196
197 /*
198 * Use a ref counter to avoid use-after-free issues. Scrub workers
199 * decrement bios_in_flight and workers_pending and then do a wakeup
200 * on the list_wait wait queue. We must ensure the main scrub task
201 * doesn't free the scrub context before or while the workers are
202 * doing the wakeup() call.
203 */
204 atomic_t refs;
197}; 205};
198 206
199struct scrub_fixup_nodatasum { 207struct scrub_fixup_nodatasum {
@@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
236static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); 244static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
237static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); 245static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
238static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); 246static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
239static int scrub_setup_recheck_block(struct scrub_ctx *sctx, 247static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
240 struct btrfs_fs_info *fs_info,
241 struct scrub_block *original_sblock,
242 u64 length, u64 logical,
243 struct scrub_block *sblocks_for_recheck); 248 struct scrub_block *sblocks_for_recheck);
244static void scrub_recheck_block(struct btrfs_fs_info *fs_info, 249static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
245 struct scrub_block *sblock, int is_metadata, 250 struct scrub_block *sblock, int is_metadata,
@@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
251 const u8 *csum, u64 generation, 256 const u8 *csum, u64 generation,
252 u16 csum_size); 257 u16 csum_size);
253static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 258static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
254 struct scrub_block *sblock_good, 259 struct scrub_block *sblock_good);
255 int force_write);
256static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, 260static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
257 struct scrub_block *sblock_good, 261 struct scrub_block *sblock_good,
258 int page_num, int force_write); 262 int page_num, int force_write);
@@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
302static void copy_nocow_pages_worker(struct btrfs_work *work); 306static void copy_nocow_pages_worker(struct btrfs_work *work);
303static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 307static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
304static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 308static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
309static void scrub_put_ctx(struct scrub_ctx *sctx);
305 310
306 311
307static void scrub_pending_bio_inc(struct scrub_ctx *sctx) 312static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
308{ 313{
314 atomic_inc(&sctx->refs);
309 atomic_inc(&sctx->bios_in_flight); 315 atomic_inc(&sctx->bios_in_flight);
310} 316}
311 317
@@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
313{ 319{
314 atomic_dec(&sctx->bios_in_flight); 320 atomic_dec(&sctx->bios_in_flight);
315 wake_up(&sctx->list_wait); 321 wake_up(&sctx->list_wait);
322 scrub_put_ctx(sctx);
316} 323}
317 324
318static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) 325static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
@@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
346{ 353{
347 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; 354 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
348 355
356 atomic_inc(&sctx->refs);
349 /* 357 /*
350 * increment scrubs_running to prevent cancel requests from 358 * increment scrubs_running to prevent cancel requests from
351 * completing as long as a worker is running. we must also 359 * completing as long as a worker is running. we must also
@@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
388 atomic_dec(&sctx->workers_pending); 396 atomic_dec(&sctx->workers_pending);
389 wake_up(&fs_info->scrub_pause_wait); 397 wake_up(&fs_info->scrub_pause_wait);
390 wake_up(&sctx->list_wait); 398 wake_up(&sctx->list_wait);
399 scrub_put_ctx(sctx);
391} 400}
392 401
393static void scrub_free_csums(struct scrub_ctx *sctx) 402static void scrub_free_csums(struct scrub_ctx *sctx)
@@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
433 kfree(sctx); 442 kfree(sctx);
434} 443}
435 444
445static void scrub_put_ctx(struct scrub_ctx *sctx)
446{
447 if (atomic_dec_and_test(&sctx->refs))
448 scrub_free_ctx(sctx);
449}
450
436static noinline_for_stack 451static noinline_for_stack
437struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) 452struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
438{ 453{
@@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
457 sctx = kzalloc(sizeof(*sctx), GFP_NOFS); 472 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
458 if (!sctx) 473 if (!sctx)
459 goto nomem; 474 goto nomem;
475 atomic_set(&sctx->refs, 1);
460 sctx->is_dev_replace = is_dev_replace; 476 sctx->is_dev_replace = is_dev_replace;
461 sctx->pages_per_rd_bio = pages_per_rd_bio; 477 sctx->pages_per_rd_bio = pages_per_rd_bio;
462 sctx->curr = -1; 478 sctx->curr = -1;
@@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
520 struct inode_fs_paths *ipath = NULL; 536 struct inode_fs_paths *ipath = NULL;
521 struct btrfs_root *local_root; 537 struct btrfs_root *local_root;
522 struct btrfs_key root_key; 538 struct btrfs_key root_key;
539 struct btrfs_key key;
523 540
524 root_key.objectid = root; 541 root_key.objectid = root;
525 root_key.type = BTRFS_ROOT_ITEM_KEY; 542 root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
530 goto err; 547 goto err;
531 } 548 }
532 549
533 ret = inode_item_info(inum, 0, local_root, swarn->path); 550 /*
551 * this makes the path point to (inum INODE_ITEM ioff)
552 */
553 key.objectid = inum;
554 key.type = BTRFS_INODE_ITEM_KEY;
555 key.offset = 0;
556
557 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
534 if (ret) { 558 if (ret) {
535 btrfs_release_path(swarn->path); 559 btrfs_release_path(swarn->path);
536 goto err; 560 goto err;
@@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover)
848static inline void scrub_put_recover(struct scrub_recover *recover) 872static inline void scrub_put_recover(struct scrub_recover *recover)
849{ 873{
850 if (atomic_dec_and_test(&recover->refs)) { 874 if (atomic_dec_and_test(&recover->refs)) {
851 kfree(recover->bbio); 875 btrfs_put_bbio(recover->bbio);
852 kfree(recover->raid_map);
853 kfree(recover); 876 kfree(recover);
854 } 877 }
855} 878}
@@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
955 } 978 }
956 979
957 /* setup the context, map the logical blocks and alloc the pages */ 980 /* setup the context, map the logical blocks and alloc the pages */
958 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, 981 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
959 logical, sblocks_for_recheck);
960 if (ret) { 982 if (ret) {
961 spin_lock(&sctx->stat_lock); 983 spin_lock(&sctx->stat_lock);
962 sctx->stat.read_errors++; 984 sctx->stat.read_errors++;
@@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
1030 if (!is_metadata && !have_csum) { 1052 if (!is_metadata && !have_csum) {
1031 struct scrub_fixup_nodatasum *fixup_nodatasum; 1053 struct scrub_fixup_nodatasum *fixup_nodatasum;
1032 1054
1033nodatasum_case:
1034 WARN_ON(sctx->is_dev_replace); 1055 WARN_ON(sctx->is_dev_replace);
1035 1056
1057nodatasum_case:
1058
1036 /* 1059 /*
1037 * !is_metadata and !have_csum, this means that the data 1060 * !is_metadata and !have_csum, this means that the data
1038 * might not be COW'ed, that it might be modified 1061 * might not be COW'ed, that it might be modified
@@ -1091,76 +1114,20 @@ nodatasum_case:
1091 sblock_other->no_io_error_seen) { 1114 sblock_other->no_io_error_seen) {
1092 if (sctx->is_dev_replace) { 1115 if (sctx->is_dev_replace) {
1093 scrub_write_block_to_dev_replace(sblock_other); 1116 scrub_write_block_to_dev_replace(sblock_other);
1117 goto corrected_error;
1094 } else { 1118 } else {
1095 int force_write = is_metadata || have_csum;
1096
1097 ret = scrub_repair_block_from_good_copy( 1119 ret = scrub_repair_block_from_good_copy(
1098 sblock_bad, sblock_other, 1120 sblock_bad, sblock_other);
1099 force_write); 1121 if (!ret)
1122 goto corrected_error;
1100 } 1123 }
1101 if (0 == ret)
1102 goto corrected_error;
1103 } 1124 }
1104 } 1125 }
1105 1126
1106 /* 1127 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1107 * for dev_replace, pick good pages and write to the target device. 1128 goto did_not_correct_error;
1108 */
1109 if (sctx->is_dev_replace) {
1110 success = 1;
1111 for (page_num = 0; page_num < sblock_bad->page_count;
1112 page_num++) {
1113 int sub_success;
1114
1115 sub_success = 0;
1116 for (mirror_index = 0;
1117 mirror_index < BTRFS_MAX_MIRRORS &&
1118 sblocks_for_recheck[mirror_index].page_count > 0;
1119 mirror_index++) {
1120 struct scrub_block *sblock_other =
1121 sblocks_for_recheck + mirror_index;
1122 struct scrub_page *page_other =
1123 sblock_other->pagev[page_num];
1124
1125 if (!page_other->io_error) {
1126 ret = scrub_write_page_to_dev_replace(
1127 sblock_other, page_num);
1128 if (ret == 0) {
1129 /* succeeded for this page */
1130 sub_success = 1;
1131 break;
1132 } else {
1133 btrfs_dev_replace_stats_inc(
1134 &sctx->dev_root->
1135 fs_info->dev_replace.
1136 num_write_errors);
1137 }
1138 }
1139 }
1140
1141 if (!sub_success) {
1142 /*
1143 * did not find a mirror to fetch the page
1144 * from. scrub_write_page_to_dev_replace()
1145 * handles this case (page->io_error), by
1146 * filling the block with zeros before
1147 * submitting the write request
1148 */
1149 success = 0;
1150 ret = scrub_write_page_to_dev_replace(
1151 sblock_bad, page_num);
1152 if (ret)
1153 btrfs_dev_replace_stats_inc(
1154 &sctx->dev_root->fs_info->
1155 dev_replace.num_write_errors);
1156 }
1157 }
1158
1159 goto out;
1160 }
1161 1129
1162 /* 1130 /*
1163 * for regular scrub, repair those pages that are errored.
1164 * In case of I/O errors in the area that is supposed to be 1131 * In case of I/O errors in the area that is supposed to be
1165 * repaired, continue by picking good copies of those pages. 1132 * repaired, continue by picking good copies of those pages.
1166 * Select the good pages from mirrors to rewrite bad pages from 1133 * Select the good pages from mirrors to rewrite bad pages from
@@ -1184,44 +1151,64 @@ nodatasum_case:
1184 * mirror, even if other 512 byte sectors in the same PAGE_SIZE 1151 * mirror, even if other 512 byte sectors in the same PAGE_SIZE
1185 * area are unreadable. 1152 * area are unreadable.
1186 */ 1153 */
1187
1188 /* can only fix I/O errors from here on */
1189 if (sblock_bad->no_io_error_seen)
1190 goto did_not_correct_error;
1191
1192 success = 1; 1154 success = 1;
1193 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { 1155 for (page_num = 0; page_num < sblock_bad->page_count;
1156 page_num++) {
1194 struct scrub_page *page_bad = sblock_bad->pagev[page_num]; 1157 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1158 struct scrub_block *sblock_other = NULL;
1195 1159
1196 if (!page_bad->io_error) 1160 /* skip no-io-error page in scrub */
1161 if (!page_bad->io_error && !sctx->is_dev_replace)
1197 continue; 1162 continue;
1198 1163
1199 for (mirror_index = 0; 1164 /* try to find no-io-error page in mirrors */
1200 mirror_index < BTRFS_MAX_MIRRORS && 1165 if (page_bad->io_error) {
1201 sblocks_for_recheck[mirror_index].page_count > 0; 1166 for (mirror_index = 0;
1202 mirror_index++) { 1167 mirror_index < BTRFS_MAX_MIRRORS &&
1203 struct scrub_block *sblock_other = sblocks_for_recheck + 1168 sblocks_for_recheck[mirror_index].page_count > 0;
1204 mirror_index; 1169 mirror_index++) {
1205 struct scrub_page *page_other = sblock_other->pagev[ 1170 if (!sblocks_for_recheck[mirror_index].
1206 page_num]; 1171 pagev[page_num]->io_error) {
1207 1172 sblock_other = sblocks_for_recheck +
1208 if (!page_other->io_error) { 1173 mirror_index;
1209 ret = scrub_repair_page_from_good_copy( 1174 break;
1210 sblock_bad, sblock_other, page_num, 0);
1211 if (0 == ret) {
1212 page_bad->io_error = 0;
1213 break; /* succeeded for this page */
1214 } 1175 }
1215 } 1176 }
1177 if (!sblock_other)
1178 success = 0;
1216 } 1179 }
1217 1180
1218 if (page_bad->io_error) { 1181 if (sctx->is_dev_replace) {
1219 /* did not find a mirror to copy the page from */ 1182 /*
1220 success = 0; 1183 * did not find a mirror to fetch the page
1184 * from. scrub_write_page_to_dev_replace()
1185 * handles this case (page->io_error), by
1186 * filling the block with zeros before
1187 * submitting the write request
1188 */
1189 if (!sblock_other)
1190 sblock_other = sblock_bad;
1191
1192 if (scrub_write_page_to_dev_replace(sblock_other,
1193 page_num) != 0) {
1194 btrfs_dev_replace_stats_inc(
1195 &sctx->dev_root->
1196 fs_info->dev_replace.
1197 num_write_errors);
1198 success = 0;
1199 }
1200 } else if (sblock_other) {
1201 ret = scrub_repair_page_from_good_copy(sblock_bad,
1202 sblock_other,
1203 page_num, 0);
1204 if (0 == ret)
1205 page_bad->io_error = 0;
1206 else
1207 success = 0;
1221 } 1208 }
1222 } 1209 }
1223 1210
1224 if (success) { 1211 if (success && !sctx->is_dev_replace) {
1225 if (is_metadata || have_csum) { 1212 if (is_metadata || have_csum) {
1226 /* 1213 /*
1227 * need to verify the checksum now that all 1214 * need to verify the checksum now that all
@@ -1288,19 +1275,18 @@ out:
1288 return 0; 1275 return 0;
1289} 1276}
1290 1277
1291static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) 1278static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1292{ 1279{
1293 if (raid_map) { 1280 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1294 if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) 1281 return 2;
1295 return 3; 1282 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1296 else 1283 return 3;
1297 return 2; 1284 else
1298 } else {
1299 return (int)bbio->num_stripes; 1285 return (int)bbio->num_stripes;
1300 }
1301} 1286}
1302 1287
1303static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, 1288static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1289 u64 *raid_map,
1304 u64 mapped_length, 1290 u64 mapped_length,
1305 int nstripes, int mirror, 1291 int nstripes, int mirror,
1306 int *stripe_index, 1292 int *stripe_index,
@@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
1308{ 1294{
1309 int i; 1295 int i;
1310 1296
1311 if (raid_map) { 1297 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1312 /* RAID5/6 */ 1298 /* RAID5/6 */
1313 for (i = 0; i < nstripes; i++) { 1299 for (i = 0; i < nstripes; i++) {
1314 if (raid_map[i] == RAID6_Q_STRIPE || 1300 if (raid_map[i] == RAID6_Q_STRIPE ||
@@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
1329 } 1315 }
1330} 1316}
1331 1317
1332static int scrub_setup_recheck_block(struct scrub_ctx *sctx, 1318static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1333 struct btrfs_fs_info *fs_info,
1334 struct scrub_block *original_sblock,
1335 u64 length, u64 logical,
1336 struct scrub_block *sblocks_for_recheck) 1319 struct scrub_block *sblocks_for_recheck)
1337{ 1320{
1321 struct scrub_ctx *sctx = original_sblock->sctx;
1322 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
1323 u64 length = original_sblock->page_count * PAGE_SIZE;
1324 u64 logical = original_sblock->pagev[0]->logical;
1338 struct scrub_recover *recover; 1325 struct scrub_recover *recover;
1339 struct btrfs_bio *bbio; 1326 struct btrfs_bio *bbio;
1340 u64 *raid_map;
1341 u64 sublen; 1327 u64 sublen;
1342 u64 mapped_length; 1328 u64 mapped_length;
1343 u64 stripe_offset; 1329 u64 stripe_offset;
1344 int stripe_index; 1330 int stripe_index;
1345 int page_index; 1331 int page_index = 0;
1346 int mirror_index; 1332 int mirror_index;
1347 int nmirrors; 1333 int nmirrors;
1348 int ret; 1334 int ret;
1349 1335
1350 /* 1336 /*
1351 * note: the two members ref_count and outstanding_pages 1337 * note: the two members refs and outstanding_pages
1352 * are not used (and not set) in the blocks that are used for 1338 * are not used (and not set) in the blocks that are used for
1353 * the recheck procedure 1339 * the recheck procedure
1354 */ 1340 */
1355 1341
1356 page_index = 0;
1357 while (length > 0) { 1342 while (length > 0) {
1358 sublen = min_t(u64, length, PAGE_SIZE); 1343 sublen = min_t(u64, length, PAGE_SIZE);
1359 mapped_length = sublen; 1344 mapped_length = sublen;
1360 bbio = NULL; 1345 bbio = NULL;
1361 raid_map = NULL;
1362 1346
1363 /* 1347 /*
1364 * with a length of PAGE_SIZE, each returned stripe 1348 * with a length of PAGE_SIZE, each returned stripe
1365 * represents one mirror 1349 * represents one mirror
1366 */ 1350 */
1367 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, 1351 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
1368 &mapped_length, &bbio, 0, &raid_map); 1352 &mapped_length, &bbio, 0, 1);
1369 if (ret || !bbio || mapped_length < sublen) { 1353 if (ret || !bbio || mapped_length < sublen) {
1370 kfree(bbio); 1354 btrfs_put_bbio(bbio);
1371 kfree(raid_map);
1372 return -EIO; 1355 return -EIO;
1373 } 1356 }
1374 1357
1375 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); 1358 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1376 if (!recover) { 1359 if (!recover) {
1377 kfree(bbio); 1360 btrfs_put_bbio(bbio);
1378 kfree(raid_map);
1379 return -ENOMEM; 1361 return -ENOMEM;
1380 } 1362 }
1381 1363
1382 atomic_set(&recover->refs, 1); 1364 atomic_set(&recover->refs, 1);
1383 recover->bbio = bbio; 1365 recover->bbio = bbio;
1384 recover->raid_map = raid_map;
1385 recover->map_length = mapped_length; 1366 recover->map_length = mapped_length;
1386 1367
1387 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); 1368 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1388 1369
1389 nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); 1370 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1371
1390 for (mirror_index = 0; mirror_index < nmirrors; 1372 for (mirror_index = 0; mirror_index < nmirrors;
1391 mirror_index++) { 1373 mirror_index++) {
1392 struct scrub_block *sblock; 1374 struct scrub_block *sblock;
1393 struct scrub_page *page; 1375 struct scrub_page *page;
1394 1376
1395 if (mirror_index >= BTRFS_MAX_MIRRORS)
1396 continue;
1397
1398 sblock = sblocks_for_recheck + mirror_index; 1377 sblock = sblocks_for_recheck + mirror_index;
1399 sblock->sctx = sctx; 1378 sblock->sctx = sctx;
1400 page = kzalloc(sizeof(*page), GFP_NOFS); 1379 page = kzalloc(sizeof(*page), GFP_NOFS);
@@ -1410,9 +1389,12 @@ leave_nomem:
1410 sblock->pagev[page_index] = page; 1389 sblock->pagev[page_index] = page;
1411 page->logical = logical; 1390 page->logical = logical;
1412 1391
1413 scrub_stripe_index_and_offset(logical, raid_map, 1392 scrub_stripe_index_and_offset(logical,
1393 bbio->map_type,
1394 bbio->raid_map,
1414 mapped_length, 1395 mapped_length,
1415 bbio->num_stripes, 1396 bbio->num_stripes -
1397 bbio->num_tgtdevs,
1416 mirror_index, 1398 mirror_index,
1417 &stripe_index, 1399 &stripe_index,
1418 &stripe_offset); 1400 &stripe_offset);
@@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error)
1458 1440
1459static inline int scrub_is_page_on_raid56(struct scrub_page *page) 1441static inline int scrub_is_page_on_raid56(struct scrub_page *page)
1460{ 1442{
1461 return page->recover && page->recover->raid_map; 1443 return page->recover &&
1444 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
1462} 1445}
1463 1446
1464static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, 1447static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
@@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1475 bio->bi_end_io = scrub_bio_wait_endio; 1458 bio->bi_end_io = scrub_bio_wait_endio;
1476 1459
1477 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, 1460 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
1478 page->recover->raid_map,
1479 page->recover->map_length, 1461 page->recover->map_length,
1480 page->mirror_num, 0); 1462 page->mirror_num, 0);
1481 if (ret) 1463 if (ret)
@@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1615} 1597}
1616 1598
1617static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, 1599static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1618 struct scrub_block *sblock_good, 1600 struct scrub_block *sblock_good)
1619 int force_write)
1620{ 1601{
1621 int page_num; 1602 int page_num;
1622 int ret = 0; 1603 int ret = 0;
@@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1626 1607
1627 ret_sub = scrub_repair_page_from_good_copy(sblock_bad, 1608 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1628 sblock_good, 1609 sblock_good,
1629 page_num, 1610 page_num, 1);
1630 force_write);
1631 if (ret_sub) 1611 if (ret_sub)
1632 ret = ret_sub; 1612 ret = ret_sub;
1633 } 1613 }
@@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock)
2067 2047
2068static void scrub_block_get(struct scrub_block *sblock) 2048static void scrub_block_get(struct scrub_block *sblock)
2069{ 2049{
2070 atomic_inc(&sblock->ref_count); 2050 atomic_inc(&sblock->refs);
2071} 2051}
2072 2052
2073static void scrub_block_put(struct scrub_block *sblock) 2053static void scrub_block_put(struct scrub_block *sblock)
2074{ 2054{
2075 if (atomic_dec_and_test(&sblock->ref_count)) { 2055 if (atomic_dec_and_test(&sblock->refs)) {
2076 int i; 2056 int i;
2077 2057
2078 if (sblock->sparity) 2058 if (sblock->sparity)
@@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock)
2086 2066
2087static void scrub_page_get(struct scrub_page *spage) 2067static void scrub_page_get(struct scrub_page *spage)
2088{ 2068{
2089 atomic_inc(&spage->ref_count); 2069 atomic_inc(&spage->refs);
2090} 2070}
2091 2071
2092static void scrub_page_put(struct scrub_page *spage) 2072static void scrub_page_put(struct scrub_page *spage)
2093{ 2073{
2094 if (atomic_dec_and_test(&spage->ref_count)) { 2074 if (atomic_dec_and_test(&spage->refs)) {
2095 if (spage->page) 2075 if (spage->page)
2096 __free_page(spage->page); 2076 __free_page(spage->page);
2097 kfree(spage); 2077 kfree(spage);
@@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2217 2197
2218 /* one ref inside this function, plus one for each page added to 2198 /* one ref inside this function, plus one for each page added to
2219 * a bio later on */ 2199 * a bio later on */
2220 atomic_set(&sblock->ref_count, 1); 2200 atomic_set(&sblock->refs, 1);
2221 sblock->sctx = sctx; 2201 sblock->sctx = sctx;
2222 sblock->no_io_error_seen = 1; 2202 sblock->no_io_error_seen = 1;
2223 2203
@@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
2510 2490
2511 /* one ref inside this function, plus one for each page added to 2491 /* one ref inside this function, plus one for each page added to
2512 * a bio later on */ 2492 * a bio later on */
2513 atomic_set(&sblock->ref_count, 1); 2493 atomic_set(&sblock->refs, 1);
2514 sblock->sctx = sctx; 2494 sblock->sctx = sctx;
2515 sblock->no_io_error_seen = 1; 2495 sblock->no_io_error_seen = 1;
2516 sblock->sparity = sparity; 2496 sblock->sparity = sparity;
@@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2705 struct btrfs_raid_bio *rbio; 2685 struct btrfs_raid_bio *rbio;
2706 struct scrub_page *spage; 2686 struct scrub_page *spage;
2707 struct btrfs_bio *bbio = NULL; 2687 struct btrfs_bio *bbio = NULL;
2708 u64 *raid_map = NULL;
2709 u64 length; 2688 u64 length;
2710 int ret; 2689 int ret;
2711 2690
@@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2716 length = sparity->logic_end - sparity->logic_start + 1; 2695 length = sparity->logic_end - sparity->logic_start + 1;
2717 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, 2696 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
2718 sparity->logic_start, 2697 sparity->logic_start,
2719 &length, &bbio, 0, &raid_map); 2698 &length, &bbio, 0, 1);
2720 if (ret || !bbio || !raid_map) 2699 if (ret || !bbio || !bbio->raid_map)
2721 goto bbio_out; 2700 goto bbio_out;
2722 2701
2723 bio = btrfs_io_bio_alloc(GFP_NOFS, 0); 2702 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
@@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2729 bio->bi_end_io = scrub_parity_bio_endio; 2708 bio->bi_end_io = scrub_parity_bio_endio;
2730 2709
2731 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, 2710 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
2732 raid_map, length, 2711 length, sparity->scrub_dev,
2733 sparity->scrub_dev,
2734 sparity->dbitmap, 2712 sparity->dbitmap,
2735 sparity->nsectors); 2713 sparity->nsectors);
2736 if (!rbio) 2714 if (!rbio)
@@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2747rbio_out: 2725rbio_out:
2748 bio_put(bio); 2726 bio_put(bio);
2749bbio_out: 2727bbio_out:
2750 kfree(bbio); 2728 btrfs_put_bbio(bbio);
2751 kfree(raid_map);
2752 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, 2729 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2753 sparity->nsectors); 2730 sparity->nsectors);
2754 spin_lock(&sctx->stat_lock); 2731 spin_lock(&sctx->stat_lock);
@@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors)
2765 2742
2766static void scrub_parity_get(struct scrub_parity *sparity) 2743static void scrub_parity_get(struct scrub_parity *sparity)
2767{ 2744{
2768 atomic_inc(&sparity->ref_count); 2745 atomic_inc(&sparity->refs);
2769} 2746}
2770 2747
2771static void scrub_parity_put(struct scrub_parity *sparity) 2748static void scrub_parity_put(struct scrub_parity *sparity)
2772{ 2749{
2773 if (!atomic_dec_and_test(&sparity->ref_count)) 2750 if (!atomic_dec_and_test(&sparity->refs))
2774 return; 2751 return;
2775 2752
2776 scrub_parity_check_and_repair(sparity); 2753 scrub_parity_check_and_repair(sparity);
@@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2820 sparity->scrub_dev = sdev; 2797 sparity->scrub_dev = sdev;
2821 sparity->logic_start = logic_start; 2798 sparity->logic_start = logic_start;
2822 sparity->logic_end = logic_end; 2799 sparity->logic_end = logic_end;
2823 atomic_set(&sparity->ref_count, 1); 2800 atomic_set(&sparity->refs, 1);
2824 INIT_LIST_HEAD(&sparity->spages); 2801 INIT_LIST_HEAD(&sparity->spages);
2825 sparity->dbitmap = sparity->bitmap; 2802 sparity->dbitmap = sparity->bitmap;
2826 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; 2803 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
@@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3037 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 3014 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3038 increment = map->stripe_len; 3015 increment = map->stripe_len;
3039 mirror_num = num % map->num_stripes + 1; 3016 mirror_num = num % map->num_stripes + 1;
3040 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3017 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3041 BTRFS_BLOCK_GROUP_RAID6)) {
3042 get_raid56_logic_offset(physical, num, map, &offset, NULL); 3018 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3043 increment = map->stripe_len * nr_data_stripes(map); 3019 increment = map->stripe_len * nr_data_stripes(map);
3044 mirror_num = 1; 3020 mirror_num = 1;
@@ -3074,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3074 */ 3050 */
3075 logical = base + offset; 3051 logical = base + offset;
3076 physical_end = physical + nstripes * map->stripe_len; 3052 physical_end = physical + nstripes * map->stripe_len;
3077 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3053 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3078 BTRFS_BLOCK_GROUP_RAID6)) {
3079 get_raid56_logic_offset(physical_end, num, 3054 get_raid56_logic_offset(physical_end, num,
3080 map, &logic_end, NULL); 3055 map, &logic_end, NULL);
3081 logic_end += base; 3056 logic_end += base;
@@ -3121,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3121 ret = 0; 3096 ret = 0;
3122 while (physical < physical_end) { 3097 while (physical < physical_end) {
3123 /* for raid56, we skip parity stripe */ 3098 /* for raid56, we skip parity stripe */
3124 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3099 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3125 BTRFS_BLOCK_GROUP_RAID6)) {
3126 ret = get_raid56_logic_offset(physical, num, 3100 ret = get_raid56_logic_offset(physical, num,
3127 map, &logical, &stripe_logical); 3101 map, &logical, &stripe_logical);
3128 logical += base; 3102 logical += base;
@@ -3280,8 +3254,7 @@ again:
3280 scrub_free_csums(sctx); 3254 scrub_free_csums(sctx);
3281 if (extent_logical + extent_len < 3255 if (extent_logical + extent_len <
3282 key.objectid + bytes) { 3256 key.objectid + bytes) {
3283 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3257 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3284 BTRFS_BLOCK_GROUP_RAID6)) {
3285 /* 3258 /*
3286 * loop until we find next data stripe 3259 * loop until we find next data stripe
3287 * or we have finished all stripes. 3260 * or we have finished all stripes.
@@ -3775,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3775 scrub_workers_put(fs_info); 3748 scrub_workers_put(fs_info);
3776 mutex_unlock(&fs_info->scrub_lock); 3749 mutex_unlock(&fs_info->scrub_lock);
3777 3750
3778 scrub_free_ctx(sctx); 3751 scrub_put_ctx(sctx);
3779 3752
3780 return ret; 3753 return ret;
3781} 3754}
@@ -3881,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3881 &mapped_length, &bbio, 0); 3854 &mapped_length, &bbio, 0);
3882 if (ret || !bbio || mapped_length < extent_len || 3855 if (ret || !bbio || mapped_length < extent_len ||
3883 !bbio->stripes[0].dev->bdev) { 3856 !bbio->stripes[0].dev->bdev) {
3884 kfree(bbio); 3857 btrfs_put_bbio(bbio);
3885 return; 3858 return;
3886 } 3859 }
3887 3860
3888 *extent_physical = bbio->stripes[0].physical; 3861 *extent_physical = bbio->stripes[0].physical;
3889 *extent_mirror_num = bbio->mirror_num; 3862 *extent_mirror_num = bbio->mirror_num;
3890 *extent_dev = bbio->stripes[0].dev; 3863 *extent_dev = bbio->stripes[0].dev;
3891 kfree(bbio); 3864 btrfs_put_bbio(bbio);
3892} 3865}
3893 3866
3894static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, 3867static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,