diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r-- | fs/btrfs/scrub.c | 309 |
1 files changed, 141 insertions, 168 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index e427cb7ee12c..ec57687c9a4d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -66,7 +66,6 @@ struct scrub_ctx; | |||
66 | struct scrub_recover { | 66 | struct scrub_recover { |
67 | atomic_t refs; | 67 | atomic_t refs; |
68 | struct btrfs_bio *bbio; | 68 | struct btrfs_bio *bbio; |
69 | u64 *raid_map; | ||
70 | u64 map_length; | 69 | u64 map_length; |
71 | }; | 70 | }; |
72 | 71 | ||
@@ -80,7 +79,7 @@ struct scrub_page { | |||
80 | u64 logical; | 79 | u64 logical; |
81 | u64 physical; | 80 | u64 physical; |
82 | u64 physical_for_dev_replace; | 81 | u64 physical_for_dev_replace; |
83 | atomic_t ref_count; | 82 | atomic_t refs; |
84 | struct { | 83 | struct { |
85 | unsigned int mirror_num:8; | 84 | unsigned int mirror_num:8; |
86 | unsigned int have_csum:1; | 85 | unsigned int have_csum:1; |
@@ -113,7 +112,7 @@ struct scrub_block { | |||
113 | struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; | 112 | struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; |
114 | int page_count; | 113 | int page_count; |
115 | atomic_t outstanding_pages; | 114 | atomic_t outstanding_pages; |
116 | atomic_t ref_count; /* free mem on transition to zero */ | 115 | atomic_t refs; /* free mem on transition to zero */ |
117 | struct scrub_ctx *sctx; | 116 | struct scrub_ctx *sctx; |
118 | struct scrub_parity *sparity; | 117 | struct scrub_parity *sparity; |
119 | struct { | 118 | struct { |
@@ -142,7 +141,7 @@ struct scrub_parity { | |||
142 | 141 | ||
143 | int stripe_len; | 142 | int stripe_len; |
144 | 143 | ||
145 | atomic_t ref_count; | 144 | atomic_t refs; |
146 | 145 | ||
147 | struct list_head spages; | 146 | struct list_head spages; |
148 | 147 | ||
@@ -194,6 +193,15 @@ struct scrub_ctx { | |||
194 | */ | 193 | */ |
195 | struct btrfs_scrub_progress stat; | 194 | struct btrfs_scrub_progress stat; |
196 | spinlock_t stat_lock; | 195 | spinlock_t stat_lock; |
196 | |||
197 | /* | ||
198 | * Use a ref counter to avoid use-after-free issues. Scrub workers | ||
199 | * decrement bios_in_flight and workers_pending and then do a wakeup | ||
200 | * on the list_wait wait queue. We must ensure the main scrub task | ||
201 | * doesn't free the scrub context before or while the workers are | ||
202 | * doing the wakeup() call. | ||
203 | */ | ||
204 | atomic_t refs; | ||
197 | }; | 205 | }; |
198 | 206 | ||
199 | struct scrub_fixup_nodatasum { | 207 | struct scrub_fixup_nodatasum { |
@@ -236,10 +244,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx); | |||
236 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); | 244 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); |
237 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); | 245 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); |
238 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); | 246 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); |
239 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 247 | static int scrub_setup_recheck_block(struct scrub_block *original_sblock, |
240 | struct btrfs_fs_info *fs_info, | ||
241 | struct scrub_block *original_sblock, | ||
242 | u64 length, u64 logical, | ||
243 | struct scrub_block *sblocks_for_recheck); | 248 | struct scrub_block *sblocks_for_recheck); |
244 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 249 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
245 | struct scrub_block *sblock, int is_metadata, | 250 | struct scrub_block *sblock, int is_metadata, |
@@ -251,8 +256,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
251 | const u8 *csum, u64 generation, | 256 | const u8 *csum, u64 generation, |
252 | u16 csum_size); | 257 | u16 csum_size); |
253 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | 258 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, |
254 | struct scrub_block *sblock_good, | 259 | struct scrub_block *sblock_good); |
255 | int force_write); | ||
256 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | 260 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, |
257 | struct scrub_block *sblock_good, | 261 | struct scrub_block *sblock_good, |
258 | int page_num, int force_write); | 262 | int page_num, int force_write); |
@@ -302,10 +306,12 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
302 | static void copy_nocow_pages_worker(struct btrfs_work *work); | 306 | static void copy_nocow_pages_worker(struct btrfs_work *work); |
303 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); | 307 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); |
304 | static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); | 308 | static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); |
309 | static void scrub_put_ctx(struct scrub_ctx *sctx); | ||
305 | 310 | ||
306 | 311 | ||
307 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) | 312 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) |
308 | { | 313 | { |
314 | atomic_inc(&sctx->refs); | ||
309 | atomic_inc(&sctx->bios_in_flight); | 315 | atomic_inc(&sctx->bios_in_flight); |
310 | } | 316 | } |
311 | 317 | ||
@@ -313,6 +319,7 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx) | |||
313 | { | 319 | { |
314 | atomic_dec(&sctx->bios_in_flight); | 320 | atomic_dec(&sctx->bios_in_flight); |
315 | wake_up(&sctx->list_wait); | 321 | wake_up(&sctx->list_wait); |
322 | scrub_put_ctx(sctx); | ||
316 | } | 323 | } |
317 | 324 | ||
318 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) | 325 | static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) |
@@ -346,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | |||
346 | { | 353 | { |
347 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | 354 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
348 | 355 | ||
356 | atomic_inc(&sctx->refs); | ||
349 | /* | 357 | /* |
350 | * increment scrubs_running to prevent cancel requests from | 358 | * increment scrubs_running to prevent cancel requests from |
351 | * completing as long as a worker is running. we must also | 359 | * completing as long as a worker is running. we must also |
@@ -388,6 +396,7 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) | |||
388 | atomic_dec(&sctx->workers_pending); | 396 | atomic_dec(&sctx->workers_pending); |
389 | wake_up(&fs_info->scrub_pause_wait); | 397 | wake_up(&fs_info->scrub_pause_wait); |
390 | wake_up(&sctx->list_wait); | 398 | wake_up(&sctx->list_wait); |
399 | scrub_put_ctx(sctx); | ||
391 | } | 400 | } |
392 | 401 | ||
393 | static void scrub_free_csums(struct scrub_ctx *sctx) | 402 | static void scrub_free_csums(struct scrub_ctx *sctx) |
@@ -433,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) | |||
433 | kfree(sctx); | 442 | kfree(sctx); |
434 | } | 443 | } |
435 | 444 | ||
445 | static void scrub_put_ctx(struct scrub_ctx *sctx) | ||
446 | { | ||
447 | if (atomic_dec_and_test(&sctx->refs)) | ||
448 | scrub_free_ctx(sctx); | ||
449 | } | ||
450 | |||
436 | static noinline_for_stack | 451 | static noinline_for_stack |
437 | struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | 452 | struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) |
438 | { | 453 | { |
@@ -457,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
457 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); | 472 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); |
458 | if (!sctx) | 473 | if (!sctx) |
459 | goto nomem; | 474 | goto nomem; |
475 | atomic_set(&sctx->refs, 1); | ||
460 | sctx->is_dev_replace = is_dev_replace; | 476 | sctx->is_dev_replace = is_dev_replace; |
461 | sctx->pages_per_rd_bio = pages_per_rd_bio; | 477 | sctx->pages_per_rd_bio = pages_per_rd_bio; |
462 | sctx->curr = -1; | 478 | sctx->curr = -1; |
@@ -520,6 +536,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
520 | struct inode_fs_paths *ipath = NULL; | 536 | struct inode_fs_paths *ipath = NULL; |
521 | struct btrfs_root *local_root; | 537 | struct btrfs_root *local_root; |
522 | struct btrfs_key root_key; | 538 | struct btrfs_key root_key; |
539 | struct btrfs_key key; | ||
523 | 540 | ||
524 | root_key.objectid = root; | 541 | root_key.objectid = root; |
525 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 542 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -530,7 +547,14 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
530 | goto err; | 547 | goto err; |
531 | } | 548 | } |
532 | 549 | ||
533 | ret = inode_item_info(inum, 0, local_root, swarn->path); | 550 | /* |
551 | * this makes the path point to (inum INODE_ITEM ioff) | ||
552 | */ | ||
553 | key.objectid = inum; | ||
554 | key.type = BTRFS_INODE_ITEM_KEY; | ||
555 | key.offset = 0; | ||
556 | |||
557 | ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0); | ||
534 | if (ret) { | 558 | if (ret) { |
535 | btrfs_release_path(swarn->path); | 559 | btrfs_release_path(swarn->path); |
536 | goto err; | 560 | goto err; |
@@ -848,8 +872,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover) | |||
848 | static inline void scrub_put_recover(struct scrub_recover *recover) | 872 | static inline void scrub_put_recover(struct scrub_recover *recover) |
849 | { | 873 | { |
850 | if (atomic_dec_and_test(&recover->refs)) { | 874 | if (atomic_dec_and_test(&recover->refs)) { |
851 | kfree(recover->bbio); | 875 | btrfs_put_bbio(recover->bbio); |
852 | kfree(recover->raid_map); | ||
853 | kfree(recover); | 876 | kfree(recover); |
854 | } | 877 | } |
855 | } | 878 | } |
@@ -955,8 +978,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
955 | } | 978 | } |
956 | 979 | ||
957 | /* setup the context, map the logical blocks and alloc the pages */ | 980 | /* setup the context, map the logical blocks and alloc the pages */ |
958 | ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, | 981 | ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck); |
959 | logical, sblocks_for_recheck); | ||
960 | if (ret) { | 982 | if (ret) { |
961 | spin_lock(&sctx->stat_lock); | 983 | spin_lock(&sctx->stat_lock); |
962 | sctx->stat.read_errors++; | 984 | sctx->stat.read_errors++; |
@@ -1030,9 +1052,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
1030 | if (!is_metadata && !have_csum) { | 1052 | if (!is_metadata && !have_csum) { |
1031 | struct scrub_fixup_nodatasum *fixup_nodatasum; | 1053 | struct scrub_fixup_nodatasum *fixup_nodatasum; |
1032 | 1054 | ||
1033 | nodatasum_case: | ||
1034 | WARN_ON(sctx->is_dev_replace); | 1055 | WARN_ON(sctx->is_dev_replace); |
1035 | 1056 | ||
1057 | nodatasum_case: | ||
1058 | |||
1036 | /* | 1059 | /* |
1037 | * !is_metadata and !have_csum, this means that the data | 1060 | * !is_metadata and !have_csum, this means that the data |
1038 | * might not be COW'ed, that it might be modified | 1061 | * might not be COW'ed, that it might be modified |
@@ -1091,76 +1114,20 @@ nodatasum_case: | |||
1091 | sblock_other->no_io_error_seen) { | 1114 | sblock_other->no_io_error_seen) { |
1092 | if (sctx->is_dev_replace) { | 1115 | if (sctx->is_dev_replace) { |
1093 | scrub_write_block_to_dev_replace(sblock_other); | 1116 | scrub_write_block_to_dev_replace(sblock_other); |
1117 | goto corrected_error; | ||
1094 | } else { | 1118 | } else { |
1095 | int force_write = is_metadata || have_csum; | ||
1096 | |||
1097 | ret = scrub_repair_block_from_good_copy( | 1119 | ret = scrub_repair_block_from_good_copy( |
1098 | sblock_bad, sblock_other, | 1120 | sblock_bad, sblock_other); |
1099 | force_write); | 1121 | if (!ret) |
1122 | goto corrected_error; | ||
1100 | } | 1123 | } |
1101 | if (0 == ret) | ||
1102 | goto corrected_error; | ||
1103 | } | 1124 | } |
1104 | } | 1125 | } |
1105 | 1126 | ||
1106 | /* | 1127 | if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace) |
1107 | * for dev_replace, pick good pages and write to the target device. | 1128 | goto did_not_correct_error; |
1108 | */ | ||
1109 | if (sctx->is_dev_replace) { | ||
1110 | success = 1; | ||
1111 | for (page_num = 0; page_num < sblock_bad->page_count; | ||
1112 | page_num++) { | ||
1113 | int sub_success; | ||
1114 | |||
1115 | sub_success = 0; | ||
1116 | for (mirror_index = 0; | ||
1117 | mirror_index < BTRFS_MAX_MIRRORS && | ||
1118 | sblocks_for_recheck[mirror_index].page_count > 0; | ||
1119 | mirror_index++) { | ||
1120 | struct scrub_block *sblock_other = | ||
1121 | sblocks_for_recheck + mirror_index; | ||
1122 | struct scrub_page *page_other = | ||
1123 | sblock_other->pagev[page_num]; | ||
1124 | |||
1125 | if (!page_other->io_error) { | ||
1126 | ret = scrub_write_page_to_dev_replace( | ||
1127 | sblock_other, page_num); | ||
1128 | if (ret == 0) { | ||
1129 | /* succeeded for this page */ | ||
1130 | sub_success = 1; | ||
1131 | break; | ||
1132 | } else { | ||
1133 | btrfs_dev_replace_stats_inc( | ||
1134 | &sctx->dev_root-> | ||
1135 | fs_info->dev_replace. | ||
1136 | num_write_errors); | ||
1137 | } | ||
1138 | } | ||
1139 | } | ||
1140 | |||
1141 | if (!sub_success) { | ||
1142 | /* | ||
1143 | * did not find a mirror to fetch the page | ||
1144 | * from. scrub_write_page_to_dev_replace() | ||
1145 | * handles this case (page->io_error), by | ||
1146 | * filling the block with zeros before | ||
1147 | * submitting the write request | ||
1148 | */ | ||
1149 | success = 0; | ||
1150 | ret = scrub_write_page_to_dev_replace( | ||
1151 | sblock_bad, page_num); | ||
1152 | if (ret) | ||
1153 | btrfs_dev_replace_stats_inc( | ||
1154 | &sctx->dev_root->fs_info-> | ||
1155 | dev_replace.num_write_errors); | ||
1156 | } | ||
1157 | } | ||
1158 | |||
1159 | goto out; | ||
1160 | } | ||
1161 | 1129 | ||
1162 | /* | 1130 | /* |
1163 | * for regular scrub, repair those pages that are errored. | ||
1164 | * In case of I/O errors in the area that is supposed to be | 1131 | * In case of I/O errors in the area that is supposed to be |
1165 | * repaired, continue by picking good copies of those pages. | 1132 | * repaired, continue by picking good copies of those pages. |
1166 | * Select the good pages from mirrors to rewrite bad pages from | 1133 | * Select the good pages from mirrors to rewrite bad pages from |
@@ -1184,44 +1151,64 @@ nodatasum_case: | |||
1184 | * mirror, even if other 512 byte sectors in the same PAGE_SIZE | 1151 | * mirror, even if other 512 byte sectors in the same PAGE_SIZE |
1185 | * area are unreadable. | 1152 | * area are unreadable. |
1186 | */ | 1153 | */ |
1187 | |||
1188 | /* can only fix I/O errors from here on */ | ||
1189 | if (sblock_bad->no_io_error_seen) | ||
1190 | goto did_not_correct_error; | ||
1191 | |||
1192 | success = 1; | 1154 | success = 1; |
1193 | for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { | 1155 | for (page_num = 0; page_num < sblock_bad->page_count; |
1156 | page_num++) { | ||
1194 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; | 1157 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; |
1158 | struct scrub_block *sblock_other = NULL; | ||
1195 | 1159 | ||
1196 | if (!page_bad->io_error) | 1160 | /* skip no-io-error page in scrub */ |
1161 | if (!page_bad->io_error && !sctx->is_dev_replace) | ||
1197 | continue; | 1162 | continue; |
1198 | 1163 | ||
1199 | for (mirror_index = 0; | 1164 | /* try to find no-io-error page in mirrors */ |
1200 | mirror_index < BTRFS_MAX_MIRRORS && | 1165 | if (page_bad->io_error) { |
1201 | sblocks_for_recheck[mirror_index].page_count > 0; | 1166 | for (mirror_index = 0; |
1202 | mirror_index++) { | 1167 | mirror_index < BTRFS_MAX_MIRRORS && |
1203 | struct scrub_block *sblock_other = sblocks_for_recheck + | 1168 | sblocks_for_recheck[mirror_index].page_count > 0; |
1204 | mirror_index; | 1169 | mirror_index++) { |
1205 | struct scrub_page *page_other = sblock_other->pagev[ | 1170 | if (!sblocks_for_recheck[mirror_index]. |
1206 | page_num]; | 1171 | pagev[page_num]->io_error) { |
1207 | 1172 | sblock_other = sblocks_for_recheck + | |
1208 | if (!page_other->io_error) { | 1173 | mirror_index; |
1209 | ret = scrub_repair_page_from_good_copy( | 1174 | break; |
1210 | sblock_bad, sblock_other, page_num, 0); | ||
1211 | if (0 == ret) { | ||
1212 | page_bad->io_error = 0; | ||
1213 | break; /* succeeded for this page */ | ||
1214 | } | 1175 | } |
1215 | } | 1176 | } |
1177 | if (!sblock_other) | ||
1178 | success = 0; | ||
1216 | } | 1179 | } |
1217 | 1180 | ||
1218 | if (page_bad->io_error) { | 1181 | if (sctx->is_dev_replace) { |
1219 | /* did not find a mirror to copy the page from */ | 1182 | /* |
1220 | success = 0; | 1183 | * did not find a mirror to fetch the page |
1184 | * from. scrub_write_page_to_dev_replace() | ||
1185 | * handles this case (page->io_error), by | ||
1186 | * filling the block with zeros before | ||
1187 | * submitting the write request | ||
1188 | */ | ||
1189 | if (!sblock_other) | ||
1190 | sblock_other = sblock_bad; | ||
1191 | |||
1192 | if (scrub_write_page_to_dev_replace(sblock_other, | ||
1193 | page_num) != 0) { | ||
1194 | btrfs_dev_replace_stats_inc( | ||
1195 | &sctx->dev_root-> | ||
1196 | fs_info->dev_replace. | ||
1197 | num_write_errors); | ||
1198 | success = 0; | ||
1199 | } | ||
1200 | } else if (sblock_other) { | ||
1201 | ret = scrub_repair_page_from_good_copy(sblock_bad, | ||
1202 | sblock_other, | ||
1203 | page_num, 0); | ||
1204 | if (0 == ret) | ||
1205 | page_bad->io_error = 0; | ||
1206 | else | ||
1207 | success = 0; | ||
1221 | } | 1208 | } |
1222 | } | 1209 | } |
1223 | 1210 | ||
1224 | if (success) { | 1211 | if (success && !sctx->is_dev_replace) { |
1225 | if (is_metadata || have_csum) { | 1212 | if (is_metadata || have_csum) { |
1226 | /* | 1213 | /* |
1227 | * need to verify the checksum now that all | 1214 | * need to verify the checksum now that all |
@@ -1288,19 +1275,18 @@ out: | |||
1288 | return 0; | 1275 | return 0; |
1289 | } | 1276 | } |
1290 | 1277 | ||
1291 | static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) | 1278 | static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio) |
1292 | { | 1279 | { |
1293 | if (raid_map) { | 1280 | if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) |
1294 | if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) | 1281 | return 2; |
1295 | return 3; | 1282 | else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) |
1296 | else | 1283 | return 3; |
1297 | return 2; | 1284 | else |
1298 | } else { | ||
1299 | return (int)bbio->num_stripes; | 1285 | return (int)bbio->num_stripes; |
1300 | } | ||
1301 | } | 1286 | } |
1302 | 1287 | ||
1303 | static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | 1288 | static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, |
1289 | u64 *raid_map, | ||
1304 | u64 mapped_length, | 1290 | u64 mapped_length, |
1305 | int nstripes, int mirror, | 1291 | int nstripes, int mirror, |
1306 | int *stripe_index, | 1292 | int *stripe_index, |
@@ -1308,7 +1294,7 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | |||
1308 | { | 1294 | { |
1309 | int i; | 1295 | int i; |
1310 | 1296 | ||
1311 | if (raid_map) { | 1297 | if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
1312 | /* RAID5/6 */ | 1298 | /* RAID5/6 */ |
1313 | for (i = 0; i < nstripes; i++) { | 1299 | for (i = 0; i < nstripes; i++) { |
1314 | if (raid_map[i] == RAID6_Q_STRIPE || | 1300 | if (raid_map[i] == RAID6_Q_STRIPE || |
@@ -1329,72 +1315,65 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | |||
1329 | } | 1315 | } |
1330 | } | 1316 | } |
1331 | 1317 | ||
1332 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 1318 | static int scrub_setup_recheck_block(struct scrub_block *original_sblock, |
1333 | struct btrfs_fs_info *fs_info, | ||
1334 | struct scrub_block *original_sblock, | ||
1335 | u64 length, u64 logical, | ||
1336 | struct scrub_block *sblocks_for_recheck) | 1319 | struct scrub_block *sblocks_for_recheck) |
1337 | { | 1320 | { |
1321 | struct scrub_ctx *sctx = original_sblock->sctx; | ||
1322 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
1323 | u64 length = original_sblock->page_count * PAGE_SIZE; | ||
1324 | u64 logical = original_sblock->pagev[0]->logical; | ||
1338 | struct scrub_recover *recover; | 1325 | struct scrub_recover *recover; |
1339 | struct btrfs_bio *bbio; | 1326 | struct btrfs_bio *bbio; |
1340 | u64 *raid_map; | ||
1341 | u64 sublen; | 1327 | u64 sublen; |
1342 | u64 mapped_length; | 1328 | u64 mapped_length; |
1343 | u64 stripe_offset; | 1329 | u64 stripe_offset; |
1344 | int stripe_index; | 1330 | int stripe_index; |
1345 | int page_index; | 1331 | int page_index = 0; |
1346 | int mirror_index; | 1332 | int mirror_index; |
1347 | int nmirrors; | 1333 | int nmirrors; |
1348 | int ret; | 1334 | int ret; |
1349 | 1335 | ||
1350 | /* | 1336 | /* |
1351 | * note: the two members ref_count and outstanding_pages | 1337 | * note: the two members refs and outstanding_pages |
1352 | * are not used (and not set) in the blocks that are used for | 1338 | * are not used (and not set) in the blocks that are used for |
1353 | * the recheck procedure | 1339 | * the recheck procedure |
1354 | */ | 1340 | */ |
1355 | 1341 | ||
1356 | page_index = 0; | ||
1357 | while (length > 0) { | 1342 | while (length > 0) { |
1358 | sublen = min_t(u64, length, PAGE_SIZE); | 1343 | sublen = min_t(u64, length, PAGE_SIZE); |
1359 | mapped_length = sublen; | 1344 | mapped_length = sublen; |
1360 | bbio = NULL; | 1345 | bbio = NULL; |
1361 | raid_map = NULL; | ||
1362 | 1346 | ||
1363 | /* | 1347 | /* |
1364 | * with a length of PAGE_SIZE, each returned stripe | 1348 | * with a length of PAGE_SIZE, each returned stripe |
1365 | * represents one mirror | 1349 | * represents one mirror |
1366 | */ | 1350 | */ |
1367 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, | 1351 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, |
1368 | &mapped_length, &bbio, 0, &raid_map); | 1352 | &mapped_length, &bbio, 0, 1); |
1369 | if (ret || !bbio || mapped_length < sublen) { | 1353 | if (ret || !bbio || mapped_length < sublen) { |
1370 | kfree(bbio); | 1354 | btrfs_put_bbio(bbio); |
1371 | kfree(raid_map); | ||
1372 | return -EIO; | 1355 | return -EIO; |
1373 | } | 1356 | } |
1374 | 1357 | ||
1375 | recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); | 1358 | recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); |
1376 | if (!recover) { | 1359 | if (!recover) { |
1377 | kfree(bbio); | 1360 | btrfs_put_bbio(bbio); |
1378 | kfree(raid_map); | ||
1379 | return -ENOMEM; | 1361 | return -ENOMEM; |
1380 | } | 1362 | } |
1381 | 1363 | ||
1382 | atomic_set(&recover->refs, 1); | 1364 | atomic_set(&recover->refs, 1); |
1383 | recover->bbio = bbio; | 1365 | recover->bbio = bbio; |
1384 | recover->raid_map = raid_map; | ||
1385 | recover->map_length = mapped_length; | 1366 | recover->map_length = mapped_length; |
1386 | 1367 | ||
1387 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); | 1368 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
1388 | 1369 | ||
1389 | nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); | 1370 | nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS); |
1371 | |||
1390 | for (mirror_index = 0; mirror_index < nmirrors; | 1372 | for (mirror_index = 0; mirror_index < nmirrors; |
1391 | mirror_index++) { | 1373 | mirror_index++) { |
1392 | struct scrub_block *sblock; | 1374 | struct scrub_block *sblock; |
1393 | struct scrub_page *page; | 1375 | struct scrub_page *page; |
1394 | 1376 | ||
1395 | if (mirror_index >= BTRFS_MAX_MIRRORS) | ||
1396 | continue; | ||
1397 | |||
1398 | sblock = sblocks_for_recheck + mirror_index; | 1377 | sblock = sblocks_for_recheck + mirror_index; |
1399 | sblock->sctx = sctx; | 1378 | sblock->sctx = sctx; |
1400 | page = kzalloc(sizeof(*page), GFP_NOFS); | 1379 | page = kzalloc(sizeof(*page), GFP_NOFS); |
@@ -1410,9 +1389,12 @@ leave_nomem: | |||
1410 | sblock->pagev[page_index] = page; | 1389 | sblock->pagev[page_index] = page; |
1411 | page->logical = logical; | 1390 | page->logical = logical; |
1412 | 1391 | ||
1413 | scrub_stripe_index_and_offset(logical, raid_map, | 1392 | scrub_stripe_index_and_offset(logical, |
1393 | bbio->map_type, | ||
1394 | bbio->raid_map, | ||
1414 | mapped_length, | 1395 | mapped_length, |
1415 | bbio->num_stripes, | 1396 | bbio->num_stripes - |
1397 | bbio->num_tgtdevs, | ||
1416 | mirror_index, | 1398 | mirror_index, |
1417 | &stripe_index, | 1399 | &stripe_index, |
1418 | &stripe_offset); | 1400 | &stripe_offset); |
@@ -1458,7 +1440,8 @@ static void scrub_bio_wait_endio(struct bio *bio, int error) | |||
1458 | 1440 | ||
1459 | static inline int scrub_is_page_on_raid56(struct scrub_page *page) | 1441 | static inline int scrub_is_page_on_raid56(struct scrub_page *page) |
1460 | { | 1442 | { |
1461 | return page->recover && page->recover->raid_map; | 1443 | return page->recover && |
1444 | (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); | ||
1462 | } | 1445 | } |
1463 | 1446 | ||
1464 | static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, | 1447 | static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, |
@@ -1475,7 +1458,6 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, | |||
1475 | bio->bi_end_io = scrub_bio_wait_endio; | 1458 | bio->bi_end_io = scrub_bio_wait_endio; |
1476 | 1459 | ||
1477 | ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, | 1460 | ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, |
1478 | page->recover->raid_map, | ||
1479 | page->recover->map_length, | 1461 | page->recover->map_length, |
1480 | page->mirror_num, 0); | 1462 | page->mirror_num, 0); |
1481 | if (ret) | 1463 | if (ret) |
@@ -1615,8 +1597,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
1615 | } | 1597 | } |
1616 | 1598 | ||
1617 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | 1599 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, |
1618 | struct scrub_block *sblock_good, | 1600 | struct scrub_block *sblock_good) |
1619 | int force_write) | ||
1620 | { | 1601 | { |
1621 | int page_num; | 1602 | int page_num; |
1622 | int ret = 0; | 1603 | int ret = 0; |
@@ -1626,8 +1607,7 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | |||
1626 | 1607 | ||
1627 | ret_sub = scrub_repair_page_from_good_copy(sblock_bad, | 1608 | ret_sub = scrub_repair_page_from_good_copy(sblock_bad, |
1628 | sblock_good, | 1609 | sblock_good, |
1629 | page_num, | 1610 | page_num, 1); |
1630 | force_write); | ||
1631 | if (ret_sub) | 1611 | if (ret_sub) |
1632 | ret = ret_sub; | 1612 | ret = ret_sub; |
1633 | } | 1613 | } |
@@ -2067,12 +2047,12 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
2067 | 2047 | ||
2068 | static void scrub_block_get(struct scrub_block *sblock) | 2048 | static void scrub_block_get(struct scrub_block *sblock) |
2069 | { | 2049 | { |
2070 | atomic_inc(&sblock->ref_count); | 2050 | atomic_inc(&sblock->refs); |
2071 | } | 2051 | } |
2072 | 2052 | ||
2073 | static void scrub_block_put(struct scrub_block *sblock) | 2053 | static void scrub_block_put(struct scrub_block *sblock) |
2074 | { | 2054 | { |
2075 | if (atomic_dec_and_test(&sblock->ref_count)) { | 2055 | if (atomic_dec_and_test(&sblock->refs)) { |
2076 | int i; | 2056 | int i; |
2077 | 2057 | ||
2078 | if (sblock->sparity) | 2058 | if (sblock->sparity) |
@@ -2086,12 +2066,12 @@ static void scrub_block_put(struct scrub_block *sblock) | |||
2086 | 2066 | ||
2087 | static void scrub_page_get(struct scrub_page *spage) | 2067 | static void scrub_page_get(struct scrub_page *spage) |
2088 | { | 2068 | { |
2089 | atomic_inc(&spage->ref_count); | 2069 | atomic_inc(&spage->refs); |
2090 | } | 2070 | } |
2091 | 2071 | ||
2092 | static void scrub_page_put(struct scrub_page *spage) | 2072 | static void scrub_page_put(struct scrub_page *spage) |
2093 | { | 2073 | { |
2094 | if (atomic_dec_and_test(&spage->ref_count)) { | 2074 | if (atomic_dec_and_test(&spage->refs)) { |
2095 | if (spage->page) | 2075 | if (spage->page) |
2096 | __free_page(spage->page); | 2076 | __free_page(spage->page); |
2097 | kfree(spage); | 2077 | kfree(spage); |
@@ -2217,7 +2197,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2217 | 2197 | ||
2218 | /* one ref inside this function, plus one for each page added to | 2198 | /* one ref inside this function, plus one for each page added to |
2219 | * a bio later on */ | 2199 | * a bio later on */ |
2220 | atomic_set(&sblock->ref_count, 1); | 2200 | atomic_set(&sblock->refs, 1); |
2221 | sblock->sctx = sctx; | 2201 | sblock->sctx = sctx; |
2222 | sblock->no_io_error_seen = 1; | 2202 | sblock->no_io_error_seen = 1; |
2223 | 2203 | ||
@@ -2510,7 +2490,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity, | |||
2510 | 2490 | ||
2511 | /* one ref inside this function, plus one for each page added to | 2491 | /* one ref inside this function, plus one for each page added to |
2512 | * a bio later on */ | 2492 | * a bio later on */ |
2513 | atomic_set(&sblock->ref_count, 1); | 2493 | atomic_set(&sblock->refs, 1); |
2514 | sblock->sctx = sctx; | 2494 | sblock->sctx = sctx; |
2515 | sblock->no_io_error_seen = 1; | 2495 | sblock->no_io_error_seen = 1; |
2516 | sblock->sparity = sparity; | 2496 | sblock->sparity = sparity; |
@@ -2705,7 +2685,6 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2705 | struct btrfs_raid_bio *rbio; | 2685 | struct btrfs_raid_bio *rbio; |
2706 | struct scrub_page *spage; | 2686 | struct scrub_page *spage; |
2707 | struct btrfs_bio *bbio = NULL; | 2687 | struct btrfs_bio *bbio = NULL; |
2708 | u64 *raid_map = NULL; | ||
2709 | u64 length; | 2688 | u64 length; |
2710 | int ret; | 2689 | int ret; |
2711 | 2690 | ||
@@ -2716,8 +2695,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2716 | length = sparity->logic_end - sparity->logic_start + 1; | 2695 | length = sparity->logic_end - sparity->logic_start + 1; |
2717 | ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, | 2696 | ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, |
2718 | sparity->logic_start, | 2697 | sparity->logic_start, |
2719 | &length, &bbio, 0, &raid_map); | 2698 | &length, &bbio, 0, 1); |
2720 | if (ret || !bbio || !raid_map) | 2699 | if (ret || !bbio || !bbio->raid_map) |
2721 | goto bbio_out; | 2700 | goto bbio_out; |
2722 | 2701 | ||
2723 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | 2702 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); |
@@ -2729,8 +2708,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2729 | bio->bi_end_io = scrub_parity_bio_endio; | 2708 | bio->bi_end_io = scrub_parity_bio_endio; |
2730 | 2709 | ||
2731 | rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, | 2710 | rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, |
2732 | raid_map, length, | 2711 | length, sparity->scrub_dev, |
2733 | sparity->scrub_dev, | ||
2734 | sparity->dbitmap, | 2712 | sparity->dbitmap, |
2735 | sparity->nsectors); | 2713 | sparity->nsectors); |
2736 | if (!rbio) | 2714 | if (!rbio) |
@@ -2747,8 +2725,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | |||
2747 | rbio_out: | 2725 | rbio_out: |
2748 | bio_put(bio); | 2726 | bio_put(bio); |
2749 | bbio_out: | 2727 | bbio_out: |
2750 | kfree(bbio); | 2728 | btrfs_put_bbio(bbio); |
2751 | kfree(raid_map); | ||
2752 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | 2729 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, |
2753 | sparity->nsectors); | 2730 | sparity->nsectors); |
2754 | spin_lock(&sctx->stat_lock); | 2731 | spin_lock(&sctx->stat_lock); |
@@ -2765,12 +2742,12 @@ static inline int scrub_calc_parity_bitmap_len(int nsectors) | |||
2765 | 2742 | ||
2766 | static void scrub_parity_get(struct scrub_parity *sparity) | 2743 | static void scrub_parity_get(struct scrub_parity *sparity) |
2767 | { | 2744 | { |
2768 | atomic_inc(&sparity->ref_count); | 2745 | atomic_inc(&sparity->refs); |
2769 | } | 2746 | } |
2770 | 2747 | ||
2771 | static void scrub_parity_put(struct scrub_parity *sparity) | 2748 | static void scrub_parity_put(struct scrub_parity *sparity) |
2772 | { | 2749 | { |
2773 | if (!atomic_dec_and_test(&sparity->ref_count)) | 2750 | if (!atomic_dec_and_test(&sparity->refs)) |
2774 | return; | 2751 | return; |
2775 | 2752 | ||
2776 | scrub_parity_check_and_repair(sparity); | 2753 | scrub_parity_check_and_repair(sparity); |
@@ -2820,7 +2797,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, | |||
2820 | sparity->scrub_dev = sdev; | 2797 | sparity->scrub_dev = sdev; |
2821 | sparity->logic_start = logic_start; | 2798 | sparity->logic_start = logic_start; |
2822 | sparity->logic_end = logic_end; | 2799 | sparity->logic_end = logic_end; |
2823 | atomic_set(&sparity->ref_count, 1); | 2800 | atomic_set(&sparity->refs, 1); |
2824 | INIT_LIST_HEAD(&sparity->spages); | 2801 | INIT_LIST_HEAD(&sparity->spages); |
2825 | sparity->dbitmap = sparity->bitmap; | 2802 | sparity->dbitmap = sparity->bitmap; |
2826 | sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; | 2803 | sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; |
@@ -3037,8 +3014,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3037 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 3014 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
3038 | increment = map->stripe_len; | 3015 | increment = map->stripe_len; |
3039 | mirror_num = num % map->num_stripes + 1; | 3016 | mirror_num = num % map->num_stripes + 1; |
3040 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3017 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3041 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3042 | get_raid56_logic_offset(physical, num, map, &offset, NULL); | 3018 | get_raid56_logic_offset(physical, num, map, &offset, NULL); |
3043 | increment = map->stripe_len * nr_data_stripes(map); | 3019 | increment = map->stripe_len * nr_data_stripes(map); |
3044 | mirror_num = 1; | 3020 | mirror_num = 1; |
@@ -3074,8 +3050,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3074 | */ | 3050 | */ |
3075 | logical = base + offset; | 3051 | logical = base + offset; |
3076 | physical_end = physical + nstripes * map->stripe_len; | 3052 | physical_end = physical + nstripes * map->stripe_len; |
3077 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3053 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3078 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3079 | get_raid56_logic_offset(physical_end, num, | 3054 | get_raid56_logic_offset(physical_end, num, |
3080 | map, &logic_end, NULL); | 3055 | map, &logic_end, NULL); |
3081 | logic_end += base; | 3056 | logic_end += base; |
@@ -3121,8 +3096,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
3121 | ret = 0; | 3096 | ret = 0; |
3122 | while (physical < physical_end) { | 3097 | while (physical < physical_end) { |
3123 | /* for raid56, we skip parity stripe */ | 3098 | /* for raid56, we skip parity stripe */ |
3124 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3099 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3125 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3126 | ret = get_raid56_logic_offset(physical, num, | 3100 | ret = get_raid56_logic_offset(physical, num, |
3127 | map, &logical, &stripe_logical); | 3101 | map, &logical, &stripe_logical); |
3128 | logical += base; | 3102 | logical += base; |
@@ -3280,8 +3254,7 @@ again: | |||
3280 | scrub_free_csums(sctx); | 3254 | scrub_free_csums(sctx); |
3281 | if (extent_logical + extent_len < | 3255 | if (extent_logical + extent_len < |
3282 | key.objectid + bytes) { | 3256 | key.objectid + bytes) { |
3283 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3257 | if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
3284 | BTRFS_BLOCK_GROUP_RAID6)) { | ||
3285 | /* | 3258 | /* |
3286 | * loop until we find next data stripe | 3259 | * loop until we find next data stripe |
3287 | * or we have finished all stripes. | 3260 | * or we have finished all stripes. |
@@ -3775,7 +3748,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
3775 | scrub_workers_put(fs_info); | 3748 | scrub_workers_put(fs_info); |
3776 | mutex_unlock(&fs_info->scrub_lock); | 3749 | mutex_unlock(&fs_info->scrub_lock); |
3777 | 3750 | ||
3778 | scrub_free_ctx(sctx); | 3751 | scrub_put_ctx(sctx); |
3779 | 3752 | ||
3780 | return ret; | 3753 | return ret; |
3781 | } | 3754 | } |
@@ -3881,14 +3854,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | |||
3881 | &mapped_length, &bbio, 0); | 3854 | &mapped_length, &bbio, 0); |
3882 | if (ret || !bbio || mapped_length < extent_len || | 3855 | if (ret || !bbio || mapped_length < extent_len || |
3883 | !bbio->stripes[0].dev->bdev) { | 3856 | !bbio->stripes[0].dev->bdev) { |
3884 | kfree(bbio); | 3857 | btrfs_put_bbio(bbio); |
3885 | return; | 3858 | return; |
3886 | } | 3859 | } |
3887 | 3860 | ||
3888 | *extent_physical = bbio->stripes[0].physical; | 3861 | *extent_physical = bbio->stripes[0].physical; |
3889 | *extent_mirror_num = bbio->mirror_num; | 3862 | *extent_mirror_num = bbio->mirror_num; |
3890 | *extent_dev = bbio->stripes[0].dev; | 3863 | *extent_dev = bbio->stripes[0].dev; |
3891 | kfree(bbio); | 3864 | btrfs_put_bbio(bbio); |
3892 | } | 3865 | } |
3893 | 3866 | ||
3894 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | 3867 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, |