diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r-- | fs/btrfs/scrub.c | 893 |
1 files changed, 831 insertions, 62 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efa083113827..f2bb13a23f86 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -63,10 +63,18 @@ struct scrub_ctx; | |||
63 | */ | 63 | */ |
64 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ | 64 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ |
65 | 65 | ||
66 | struct scrub_recover { | ||
67 | atomic_t refs; | ||
68 | struct btrfs_bio *bbio; | ||
69 | u64 *raid_map; | ||
70 | u64 map_length; | ||
71 | }; | ||
72 | |||
66 | struct scrub_page { | 73 | struct scrub_page { |
67 | struct scrub_block *sblock; | 74 | struct scrub_block *sblock; |
68 | struct page *page; | 75 | struct page *page; |
69 | struct btrfs_device *dev; | 76 | struct btrfs_device *dev; |
77 | struct list_head list; | ||
70 | u64 flags; /* extent flags */ | 78 | u64 flags; /* extent flags */ |
71 | u64 generation; | 79 | u64 generation; |
72 | u64 logical; | 80 | u64 logical; |
@@ -79,6 +87,8 @@ struct scrub_page { | |||
79 | unsigned int io_error:1; | 87 | unsigned int io_error:1; |
80 | }; | 88 | }; |
81 | u8 csum[BTRFS_CSUM_SIZE]; | 89 | u8 csum[BTRFS_CSUM_SIZE]; |
90 | |||
91 | struct scrub_recover *recover; | ||
82 | }; | 92 | }; |
83 | 93 | ||
84 | struct scrub_bio { | 94 | struct scrub_bio { |
@@ -105,14 +115,52 @@ struct scrub_block { | |||
105 | atomic_t outstanding_pages; | 115 | atomic_t outstanding_pages; |
106 | atomic_t ref_count; /* free mem on transition to zero */ | 116 | atomic_t ref_count; /* free mem on transition to zero */ |
107 | struct scrub_ctx *sctx; | 117 | struct scrub_ctx *sctx; |
118 | struct scrub_parity *sparity; | ||
108 | struct { | 119 | struct { |
109 | unsigned int header_error:1; | 120 | unsigned int header_error:1; |
110 | unsigned int checksum_error:1; | 121 | unsigned int checksum_error:1; |
111 | unsigned int no_io_error_seen:1; | 122 | unsigned int no_io_error_seen:1; |
112 | unsigned int generation_error:1; /* also sets header_error */ | 123 | unsigned int generation_error:1; /* also sets header_error */ |
124 | |||
125 | /* The following is for the data used to check parity */ | ||
126 | /* It is for the data with checksum */ | ||
127 | unsigned int data_corrected:1; | ||
113 | }; | 128 | }; |
114 | }; | 129 | }; |
115 | 130 | ||
131 | /* Used for the chunks with parity stripe such RAID5/6 */ | ||
132 | struct scrub_parity { | ||
133 | struct scrub_ctx *sctx; | ||
134 | |||
135 | struct btrfs_device *scrub_dev; | ||
136 | |||
137 | u64 logic_start; | ||
138 | |||
139 | u64 logic_end; | ||
140 | |||
141 | int nsectors; | ||
142 | |||
143 | int stripe_len; | ||
144 | |||
145 | atomic_t ref_count; | ||
146 | |||
147 | struct list_head spages; | ||
148 | |||
149 | /* Work of parity check and repair */ | ||
150 | struct btrfs_work work; | ||
151 | |||
152 | /* Mark the parity blocks which have data */ | ||
153 | unsigned long *dbitmap; | ||
154 | |||
155 | /* | ||
156 | * Mark the parity blocks which have data, but errors happen when | ||
157 | * read data or check data | ||
158 | */ | ||
159 | unsigned long *ebitmap; | ||
160 | |||
161 | unsigned long bitmap[0]; | ||
162 | }; | ||
163 | |||
116 | struct scrub_wr_ctx { | 164 | struct scrub_wr_ctx { |
117 | struct scrub_bio *wr_curr_bio; | 165 | struct scrub_bio *wr_curr_bio; |
118 | struct btrfs_device *tgtdev; | 166 | struct btrfs_device *tgtdev; |
@@ -196,7 +244,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | |||
196 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 244 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
197 | struct scrub_block *sblock, int is_metadata, | 245 | struct scrub_block *sblock, int is_metadata, |
198 | int have_csum, u8 *csum, u64 generation, | 246 | int have_csum, u8 *csum, u64 generation, |
199 | u16 csum_size); | 247 | u16 csum_size, int retry_failed_mirror); |
200 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 248 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
201 | struct scrub_block *sblock, | 249 | struct scrub_block *sblock, |
202 | int is_metadata, int have_csum, | 250 | int is_metadata, int have_csum, |
@@ -218,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock); | |||
218 | static void scrub_block_put(struct scrub_block *sblock); | 266 | static void scrub_block_put(struct scrub_block *sblock); |
219 | static void scrub_page_get(struct scrub_page *spage); | 267 | static void scrub_page_get(struct scrub_page *spage); |
220 | static void scrub_page_put(struct scrub_page *spage); | 268 | static void scrub_page_put(struct scrub_page *spage); |
269 | static void scrub_parity_get(struct scrub_parity *sparity); | ||
270 | static void scrub_parity_put(struct scrub_parity *sparity); | ||
221 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, | 271 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
222 | struct scrub_page *spage); | 272 | struct scrub_page *spage); |
223 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | 273 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
@@ -790,6 +840,20 @@ out: | |||
790 | scrub_pending_trans_workers_dec(sctx); | 840 | scrub_pending_trans_workers_dec(sctx); |
791 | } | 841 | } |
792 | 842 | ||
843 | static inline void scrub_get_recover(struct scrub_recover *recover) | ||
844 | { | ||
845 | atomic_inc(&recover->refs); | ||
846 | } | ||
847 | |||
848 | static inline void scrub_put_recover(struct scrub_recover *recover) | ||
849 | { | ||
850 | if (atomic_dec_and_test(&recover->refs)) { | ||
851 | kfree(recover->bbio); | ||
852 | kfree(recover->raid_map); | ||
853 | kfree(recover); | ||
854 | } | ||
855 | } | ||
856 | |||
793 | /* | 857 | /* |
794 | * scrub_handle_errored_block gets called when either verification of the | 858 | * scrub_handle_errored_block gets called when either verification of the |
795 | * pages failed or the bio failed to read, e.g. with EIO. In the latter | 859 | * pages failed or the bio failed to read, e.g. with EIO. In the latter |
@@ -906,7 +970,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
906 | 970 | ||
907 | /* build and submit the bios for the failed mirror, check checksums */ | 971 | /* build and submit the bios for the failed mirror, check checksums */ |
908 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, | 972 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, |
909 | csum, generation, sctx->csum_size); | 973 | csum, generation, sctx->csum_size, 1); |
910 | 974 | ||
911 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && | 975 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && |
912 | sblock_bad->no_io_error_seen) { | 976 | sblock_bad->no_io_error_seen) { |
@@ -920,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
920 | */ | 984 | */ |
921 | spin_lock(&sctx->stat_lock); | 985 | spin_lock(&sctx->stat_lock); |
922 | sctx->stat.unverified_errors++; | 986 | sctx->stat.unverified_errors++; |
987 | sblock_to_check->data_corrected = 1; | ||
923 | spin_unlock(&sctx->stat_lock); | 988 | spin_unlock(&sctx->stat_lock); |
924 | 989 | ||
925 | if (sctx->is_dev_replace) | 990 | if (sctx->is_dev_replace) |
@@ -1019,7 +1084,7 @@ nodatasum_case: | |||
1019 | /* build and submit the bios, check checksums */ | 1084 | /* build and submit the bios, check checksums */ |
1020 | scrub_recheck_block(fs_info, sblock_other, is_metadata, | 1085 | scrub_recheck_block(fs_info, sblock_other, is_metadata, |
1021 | have_csum, csum, generation, | 1086 | have_csum, csum, generation, |
1022 | sctx->csum_size); | 1087 | sctx->csum_size, 0); |
1023 | 1088 | ||
1024 | if (!sblock_other->header_error && | 1089 | if (!sblock_other->header_error && |
1025 | !sblock_other->checksum_error && | 1090 | !sblock_other->checksum_error && |
@@ -1169,7 +1234,7 @@ nodatasum_case: | |||
1169 | */ | 1234 | */ |
1170 | scrub_recheck_block(fs_info, sblock_bad, | 1235 | scrub_recheck_block(fs_info, sblock_bad, |
1171 | is_metadata, have_csum, csum, | 1236 | is_metadata, have_csum, csum, |
1172 | generation, sctx->csum_size); | 1237 | generation, sctx->csum_size, 1); |
1173 | if (!sblock_bad->header_error && | 1238 | if (!sblock_bad->header_error && |
1174 | !sblock_bad->checksum_error && | 1239 | !sblock_bad->checksum_error && |
1175 | sblock_bad->no_io_error_seen) | 1240 | sblock_bad->no_io_error_seen) |
@@ -1180,6 +1245,7 @@ nodatasum_case: | |||
1180 | corrected_error: | 1245 | corrected_error: |
1181 | spin_lock(&sctx->stat_lock); | 1246 | spin_lock(&sctx->stat_lock); |
1182 | sctx->stat.corrected_errors++; | 1247 | sctx->stat.corrected_errors++; |
1248 | sblock_to_check->data_corrected = 1; | ||
1183 | spin_unlock(&sctx->stat_lock); | 1249 | spin_unlock(&sctx->stat_lock); |
1184 | printk_ratelimited_in_rcu(KERN_ERR | 1250 | printk_ratelimited_in_rcu(KERN_ERR |
1185 | "BTRFS: fixed up error at logical %llu on dev %s\n", | 1251 | "BTRFS: fixed up error at logical %llu on dev %s\n", |
@@ -1201,11 +1267,18 @@ out: | |||
1201 | mirror_index++) { | 1267 | mirror_index++) { |
1202 | struct scrub_block *sblock = sblocks_for_recheck + | 1268 | struct scrub_block *sblock = sblocks_for_recheck + |
1203 | mirror_index; | 1269 | mirror_index; |
1270 | struct scrub_recover *recover; | ||
1204 | int page_index; | 1271 | int page_index; |
1205 | 1272 | ||
1206 | for (page_index = 0; page_index < sblock->page_count; | 1273 | for (page_index = 0; page_index < sblock->page_count; |
1207 | page_index++) { | 1274 | page_index++) { |
1208 | sblock->pagev[page_index]->sblock = NULL; | 1275 | sblock->pagev[page_index]->sblock = NULL; |
1276 | recover = sblock->pagev[page_index]->recover; | ||
1277 | if (recover) { | ||
1278 | scrub_put_recover(recover); | ||
1279 | sblock->pagev[page_index]->recover = | ||
1280 | NULL; | ||
1281 | } | ||
1209 | scrub_page_put(sblock->pagev[page_index]); | 1282 | scrub_page_put(sblock->pagev[page_index]); |
1210 | } | 1283 | } |
1211 | } | 1284 | } |
@@ -1215,14 +1288,63 @@ out: | |||
1215 | return 0; | 1288 | return 0; |
1216 | } | 1289 | } |
1217 | 1290 | ||
1291 | static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map) | ||
1292 | { | ||
1293 | if (raid_map) { | ||
1294 | if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE) | ||
1295 | return 3; | ||
1296 | else | ||
1297 | return 2; | ||
1298 | } else { | ||
1299 | return (int)bbio->num_stripes; | ||
1300 | } | ||
1301 | } | ||
1302 | |||
1303 | static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map, | ||
1304 | u64 mapped_length, | ||
1305 | int nstripes, int mirror, | ||
1306 | int *stripe_index, | ||
1307 | u64 *stripe_offset) | ||
1308 | { | ||
1309 | int i; | ||
1310 | |||
1311 | if (raid_map) { | ||
1312 | /* RAID5/6 */ | ||
1313 | for (i = 0; i < nstripes; i++) { | ||
1314 | if (raid_map[i] == RAID6_Q_STRIPE || | ||
1315 | raid_map[i] == RAID5_P_STRIPE) | ||
1316 | continue; | ||
1317 | |||
1318 | if (logical >= raid_map[i] && | ||
1319 | logical < raid_map[i] + mapped_length) | ||
1320 | break; | ||
1321 | } | ||
1322 | |||
1323 | *stripe_index = i; | ||
1324 | *stripe_offset = logical - raid_map[i]; | ||
1325 | } else { | ||
1326 | /* The other RAID type */ | ||
1327 | *stripe_index = mirror; | ||
1328 | *stripe_offset = 0; | ||
1329 | } | ||
1330 | } | ||
1331 | |||
1218 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 1332 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
1219 | struct btrfs_fs_info *fs_info, | 1333 | struct btrfs_fs_info *fs_info, |
1220 | struct scrub_block *original_sblock, | 1334 | struct scrub_block *original_sblock, |
1221 | u64 length, u64 logical, | 1335 | u64 length, u64 logical, |
1222 | struct scrub_block *sblocks_for_recheck) | 1336 | struct scrub_block *sblocks_for_recheck) |
1223 | { | 1337 | { |
1338 | struct scrub_recover *recover; | ||
1339 | struct btrfs_bio *bbio; | ||
1340 | u64 *raid_map; | ||
1341 | u64 sublen; | ||
1342 | u64 mapped_length; | ||
1343 | u64 stripe_offset; | ||
1344 | int stripe_index; | ||
1224 | int page_index; | 1345 | int page_index; |
1225 | int mirror_index; | 1346 | int mirror_index; |
1347 | int nmirrors; | ||
1226 | int ret; | 1348 | int ret; |
1227 | 1349 | ||
1228 | /* | 1350 | /* |
@@ -1233,23 +1355,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | |||
1233 | 1355 | ||
1234 | page_index = 0; | 1356 | page_index = 0; |
1235 | while (length > 0) { | 1357 | while (length > 0) { |
1236 | u64 sublen = min_t(u64, length, PAGE_SIZE); | 1358 | sublen = min_t(u64, length, PAGE_SIZE); |
1237 | u64 mapped_length = sublen; | 1359 | mapped_length = sublen; |
1238 | struct btrfs_bio *bbio = NULL; | 1360 | bbio = NULL; |
1361 | raid_map = NULL; | ||
1239 | 1362 | ||
1240 | /* | 1363 | /* |
1241 | * with a length of PAGE_SIZE, each returned stripe | 1364 | * with a length of PAGE_SIZE, each returned stripe |
1242 | * represents one mirror | 1365 | * represents one mirror |
1243 | */ | 1366 | */ |
1244 | ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, | 1367 | ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, |
1245 | &mapped_length, &bbio, 0); | 1368 | &mapped_length, &bbio, 0, &raid_map); |
1246 | if (ret || !bbio || mapped_length < sublen) { | 1369 | if (ret || !bbio || mapped_length < sublen) { |
1247 | kfree(bbio); | 1370 | kfree(bbio); |
1371 | kfree(raid_map); | ||
1248 | return -EIO; | 1372 | return -EIO; |
1249 | } | 1373 | } |
1250 | 1374 | ||
1375 | recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); | ||
1376 | if (!recover) { | ||
1377 | kfree(bbio); | ||
1378 | kfree(raid_map); | ||
1379 | return -ENOMEM; | ||
1380 | } | ||
1381 | |||
1382 | atomic_set(&recover->refs, 1); | ||
1383 | recover->bbio = bbio; | ||
1384 | recover->raid_map = raid_map; | ||
1385 | recover->map_length = mapped_length; | ||
1386 | |||
1251 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); | 1387 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
1252 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; | 1388 | |
1389 | nmirrors = scrub_nr_raid_mirrors(bbio, raid_map); | ||
1390 | for (mirror_index = 0; mirror_index < nmirrors; | ||
1253 | mirror_index++) { | 1391 | mirror_index++) { |
1254 | struct scrub_block *sblock; | 1392 | struct scrub_block *sblock; |
1255 | struct scrub_page *page; | 1393 | struct scrub_page *page; |
@@ -1265,26 +1403,38 @@ leave_nomem: | |||
1265 | spin_lock(&sctx->stat_lock); | 1403 | spin_lock(&sctx->stat_lock); |
1266 | sctx->stat.malloc_errors++; | 1404 | sctx->stat.malloc_errors++; |
1267 | spin_unlock(&sctx->stat_lock); | 1405 | spin_unlock(&sctx->stat_lock); |
1268 | kfree(bbio); | 1406 | scrub_put_recover(recover); |
1269 | return -ENOMEM; | 1407 | return -ENOMEM; |
1270 | } | 1408 | } |
1271 | scrub_page_get(page); | 1409 | scrub_page_get(page); |
1272 | sblock->pagev[page_index] = page; | 1410 | sblock->pagev[page_index] = page; |
1273 | page->logical = logical; | 1411 | page->logical = logical; |
1274 | page->physical = bbio->stripes[mirror_index].physical; | 1412 | |
1413 | scrub_stripe_index_and_offset(logical, raid_map, | ||
1414 | mapped_length, | ||
1415 | bbio->num_stripes, | ||
1416 | mirror_index, | ||
1417 | &stripe_index, | ||
1418 | &stripe_offset); | ||
1419 | page->physical = bbio->stripes[stripe_index].physical + | ||
1420 | stripe_offset; | ||
1421 | page->dev = bbio->stripes[stripe_index].dev; | ||
1422 | |||
1275 | BUG_ON(page_index >= original_sblock->page_count); | 1423 | BUG_ON(page_index >= original_sblock->page_count); |
1276 | page->physical_for_dev_replace = | 1424 | page->physical_for_dev_replace = |
1277 | original_sblock->pagev[page_index]-> | 1425 | original_sblock->pagev[page_index]-> |
1278 | physical_for_dev_replace; | 1426 | physical_for_dev_replace; |
1279 | /* for missing devices, dev->bdev is NULL */ | 1427 | /* for missing devices, dev->bdev is NULL */ |
1280 | page->dev = bbio->stripes[mirror_index].dev; | ||
1281 | page->mirror_num = mirror_index + 1; | 1428 | page->mirror_num = mirror_index + 1; |
1282 | sblock->page_count++; | 1429 | sblock->page_count++; |
1283 | page->page = alloc_page(GFP_NOFS); | 1430 | page->page = alloc_page(GFP_NOFS); |
1284 | if (!page->page) | 1431 | if (!page->page) |
1285 | goto leave_nomem; | 1432 | goto leave_nomem; |
1433 | |||
1434 | scrub_get_recover(recover); | ||
1435 | page->recover = recover; | ||
1286 | } | 1436 | } |
1287 | kfree(bbio); | 1437 | scrub_put_recover(recover); |
1288 | length -= sublen; | 1438 | length -= sublen; |
1289 | logical += sublen; | 1439 | logical += sublen; |
1290 | page_index++; | 1440 | page_index++; |
@@ -1293,6 +1443,51 @@ leave_nomem: | |||
1293 | return 0; | 1443 | return 0; |
1294 | } | 1444 | } |
1295 | 1445 | ||
1446 | struct scrub_bio_ret { | ||
1447 | struct completion event; | ||
1448 | int error; | ||
1449 | }; | ||
1450 | |||
1451 | static void scrub_bio_wait_endio(struct bio *bio, int error) | ||
1452 | { | ||
1453 | struct scrub_bio_ret *ret = bio->bi_private; | ||
1454 | |||
1455 | ret->error = error; | ||
1456 | complete(&ret->event); | ||
1457 | } | ||
1458 | |||
1459 | static inline int scrub_is_page_on_raid56(struct scrub_page *page) | ||
1460 | { | ||
1461 | return page->recover && page->recover->raid_map; | ||
1462 | } | ||
1463 | |||
1464 | static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, | ||
1465 | struct bio *bio, | ||
1466 | struct scrub_page *page) | ||
1467 | { | ||
1468 | struct scrub_bio_ret done; | ||
1469 | int ret; | ||
1470 | |||
1471 | init_completion(&done.event); | ||
1472 | done.error = 0; | ||
1473 | bio->bi_iter.bi_sector = page->logical >> 9; | ||
1474 | bio->bi_private = &done; | ||
1475 | bio->bi_end_io = scrub_bio_wait_endio; | ||
1476 | |||
1477 | ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio, | ||
1478 | page->recover->raid_map, | ||
1479 | page->recover->map_length, | ||
1480 | page->mirror_num, 0); | ||
1481 | if (ret) | ||
1482 | return ret; | ||
1483 | |||
1484 | wait_for_completion(&done.event); | ||
1485 | if (done.error) | ||
1486 | return -EIO; | ||
1487 | |||
1488 | return 0; | ||
1489 | } | ||
1490 | |||
1296 | /* | 1491 | /* |
1297 | * this function will check the on disk data for checksum errors, header | 1492 | * this function will check the on disk data for checksum errors, header |
1298 | * errors and read I/O errors. If any I/O errors happen, the exact pages | 1493 | * errors and read I/O errors. If any I/O errors happen, the exact pages |
@@ -1303,7 +1498,7 @@ leave_nomem: | |||
1303 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 1498 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
1304 | struct scrub_block *sblock, int is_metadata, | 1499 | struct scrub_block *sblock, int is_metadata, |
1305 | int have_csum, u8 *csum, u64 generation, | 1500 | int have_csum, u8 *csum, u64 generation, |
1306 | u16 csum_size) | 1501 | u16 csum_size, int retry_failed_mirror) |
1307 | { | 1502 | { |
1308 | int page_num; | 1503 | int page_num; |
1309 | 1504 | ||
@@ -1329,11 +1524,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1329 | continue; | 1524 | continue; |
1330 | } | 1525 | } |
1331 | bio->bi_bdev = page->dev->bdev; | 1526 | bio->bi_bdev = page->dev->bdev; |
1332 | bio->bi_iter.bi_sector = page->physical >> 9; | ||
1333 | 1527 | ||
1334 | bio_add_page(bio, page->page, PAGE_SIZE, 0); | 1528 | bio_add_page(bio, page->page, PAGE_SIZE, 0); |
1335 | if (btrfsic_submit_bio_wait(READ, bio)) | 1529 | if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { |
1336 | sblock->no_io_error_seen = 0; | 1530 | if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) |
1531 | sblock->no_io_error_seen = 0; | ||
1532 | } else { | ||
1533 | bio->bi_iter.bi_sector = page->physical >> 9; | ||
1534 | |||
1535 | if (btrfsic_submit_bio_wait(READ, bio)) | ||
1536 | sblock->no_io_error_seen = 0; | ||
1537 | } | ||
1337 | 1538 | ||
1338 | bio_put(bio); | 1539 | bio_put(bio); |
1339 | } | 1540 | } |
@@ -1486,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock) | |||
1486 | { | 1687 | { |
1487 | int page_num; | 1688 | int page_num; |
1488 | 1689 | ||
1690 | /* | ||
1691 | * This block is used for the check of the parity on the source device, | ||
1692 | * so the data needn't be written into the destination device. | ||
1693 | */ | ||
1694 | if (sblock->sparity) | ||
1695 | return; | ||
1696 | |||
1489 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | 1697 | for (page_num = 0; page_num < sblock->page_count; page_num++) { |
1490 | int ret; | 1698 | int ret; |
1491 | 1699 | ||
@@ -1867,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock) | |||
1867 | if (atomic_dec_and_test(&sblock->ref_count)) { | 2075 | if (atomic_dec_and_test(&sblock->ref_count)) { |
1868 | int i; | 2076 | int i; |
1869 | 2077 | ||
2078 | if (sblock->sparity) | ||
2079 | scrub_parity_put(sblock->sparity); | ||
2080 | |||
1870 | for (i = 0; i < sblock->page_count; i++) | 2081 | for (i = 0; i < sblock->page_count; i++) |
1871 | scrub_page_put(sblock->pagev[i]); | 2082 | scrub_page_put(sblock->pagev[i]); |
1872 | kfree(sblock); | 2083 | kfree(sblock); |
@@ -2124,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
2124 | scrub_pending_bio_dec(sctx); | 2335 | scrub_pending_bio_dec(sctx); |
2125 | } | 2336 | } |
2126 | 2337 | ||
2338 | static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, | ||
2339 | unsigned long *bitmap, | ||
2340 | u64 start, u64 len) | ||
2341 | { | ||
2342 | int offset; | ||
2343 | int nsectors; | ||
2344 | int sectorsize = sparity->sctx->dev_root->sectorsize; | ||
2345 | |||
2346 | if (len >= sparity->stripe_len) { | ||
2347 | bitmap_set(bitmap, 0, sparity->nsectors); | ||
2348 | return; | ||
2349 | } | ||
2350 | |||
2351 | start -= sparity->logic_start; | ||
2352 | offset = (int)do_div(start, sparity->stripe_len); | ||
2353 | offset /= sectorsize; | ||
2354 | nsectors = (int)len / sectorsize; | ||
2355 | |||
2356 | if (offset + nsectors <= sparity->nsectors) { | ||
2357 | bitmap_set(bitmap, offset, nsectors); | ||
2358 | return; | ||
2359 | } | ||
2360 | |||
2361 | bitmap_set(bitmap, offset, sparity->nsectors - offset); | ||
2362 | bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset)); | ||
2363 | } | ||
2364 | |||
2365 | static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity, | ||
2366 | u64 start, u64 len) | ||
2367 | { | ||
2368 | __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len); | ||
2369 | } | ||
2370 | |||
2371 | static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity, | ||
2372 | u64 start, u64 len) | ||
2373 | { | ||
2374 | __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len); | ||
2375 | } | ||
2376 | |||
2127 | static void scrub_block_complete(struct scrub_block *sblock) | 2377 | static void scrub_block_complete(struct scrub_block *sblock) |
2128 | { | 2378 | { |
2379 | int corrupted = 0; | ||
2380 | |||
2129 | if (!sblock->no_io_error_seen) { | 2381 | if (!sblock->no_io_error_seen) { |
2382 | corrupted = 1; | ||
2130 | scrub_handle_errored_block(sblock); | 2383 | scrub_handle_errored_block(sblock); |
2131 | } else { | 2384 | } else { |
2132 | /* | 2385 | /* |
@@ -2134,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock) | |||
2134 | * dev replace case, otherwise write here in dev replace | 2387 | * dev replace case, otherwise write here in dev replace |
2135 | * case. | 2388 | * case. |
2136 | */ | 2389 | */ |
2137 | if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) | 2390 | corrupted = scrub_checksum(sblock); |
2391 | if (!corrupted && sblock->sctx->is_dev_replace) | ||
2138 | scrub_write_block_to_dev_replace(sblock); | 2392 | scrub_write_block_to_dev_replace(sblock); |
2139 | } | 2393 | } |
2394 | |||
2395 | if (sblock->sparity && corrupted && !sblock->data_corrected) { | ||
2396 | u64 start = sblock->pagev[0]->logical; | ||
2397 | u64 end = sblock->pagev[sblock->page_count - 1]->logical + | ||
2398 | PAGE_SIZE; | ||
2399 | |||
2400 | scrub_parity_mark_sectors_error(sblock->sparity, | ||
2401 | start, end - start); | ||
2402 | } | ||
2140 | } | 2403 | } |
2141 | 2404 | ||
2142 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | 2405 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, |
@@ -2228,6 +2491,132 @@ behind_scrub_pages: | |||
2228 | return 0; | 2491 | return 0; |
2229 | } | 2492 | } |
2230 | 2493 | ||
2494 | static int scrub_pages_for_parity(struct scrub_parity *sparity, | ||
2495 | u64 logical, u64 len, | ||
2496 | u64 physical, struct btrfs_device *dev, | ||
2497 | u64 flags, u64 gen, int mirror_num, u8 *csum) | ||
2498 | { | ||
2499 | struct scrub_ctx *sctx = sparity->sctx; | ||
2500 | struct scrub_block *sblock; | ||
2501 | int index; | ||
2502 | |||
2503 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); | ||
2504 | if (!sblock) { | ||
2505 | spin_lock(&sctx->stat_lock); | ||
2506 | sctx->stat.malloc_errors++; | ||
2507 | spin_unlock(&sctx->stat_lock); | ||
2508 | return -ENOMEM; | ||
2509 | } | ||
2510 | |||
2511 | /* one ref inside this function, plus one for each page added to | ||
2512 | * a bio later on */ | ||
2513 | atomic_set(&sblock->ref_count, 1); | ||
2514 | sblock->sctx = sctx; | ||
2515 | sblock->no_io_error_seen = 1; | ||
2516 | sblock->sparity = sparity; | ||
2517 | scrub_parity_get(sparity); | ||
2518 | |||
2519 | for (index = 0; len > 0; index++) { | ||
2520 | struct scrub_page *spage; | ||
2521 | u64 l = min_t(u64, len, PAGE_SIZE); | ||
2522 | |||
2523 | spage = kzalloc(sizeof(*spage), GFP_NOFS); | ||
2524 | if (!spage) { | ||
2525 | leave_nomem: | ||
2526 | spin_lock(&sctx->stat_lock); | ||
2527 | sctx->stat.malloc_errors++; | ||
2528 | spin_unlock(&sctx->stat_lock); | ||
2529 | scrub_block_put(sblock); | ||
2530 | return -ENOMEM; | ||
2531 | } | ||
2532 | BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); | ||
2533 | /* For scrub block */ | ||
2534 | scrub_page_get(spage); | ||
2535 | sblock->pagev[index] = spage; | ||
2536 | /* For scrub parity */ | ||
2537 | scrub_page_get(spage); | ||
2538 | list_add_tail(&spage->list, &sparity->spages); | ||
2539 | spage->sblock = sblock; | ||
2540 | spage->dev = dev; | ||
2541 | spage->flags = flags; | ||
2542 | spage->generation = gen; | ||
2543 | spage->logical = logical; | ||
2544 | spage->physical = physical; | ||
2545 | spage->mirror_num = mirror_num; | ||
2546 | if (csum) { | ||
2547 | spage->have_csum = 1; | ||
2548 | memcpy(spage->csum, csum, sctx->csum_size); | ||
2549 | } else { | ||
2550 | spage->have_csum = 0; | ||
2551 | } | ||
2552 | sblock->page_count++; | ||
2553 | spage->page = alloc_page(GFP_NOFS); | ||
2554 | if (!spage->page) | ||
2555 | goto leave_nomem; | ||
2556 | len -= l; | ||
2557 | logical += l; | ||
2558 | physical += l; | ||
2559 | } | ||
2560 | |||
2561 | WARN_ON(sblock->page_count == 0); | ||
2562 | for (index = 0; index < sblock->page_count; index++) { | ||
2563 | struct scrub_page *spage = sblock->pagev[index]; | ||
2564 | int ret; | ||
2565 | |||
2566 | ret = scrub_add_page_to_rd_bio(sctx, spage); | ||
2567 | if (ret) { | ||
2568 | scrub_block_put(sblock); | ||
2569 | return ret; | ||
2570 | } | ||
2571 | } | ||
2572 | |||
2573 | /* last one frees, either here or in bio completion for last page */ | ||
2574 | scrub_block_put(sblock); | ||
2575 | return 0; | ||
2576 | } | ||
2577 | |||
2578 | static int scrub_extent_for_parity(struct scrub_parity *sparity, | ||
2579 | u64 logical, u64 len, | ||
2580 | u64 physical, struct btrfs_device *dev, | ||
2581 | u64 flags, u64 gen, int mirror_num) | ||
2582 | { | ||
2583 | struct scrub_ctx *sctx = sparity->sctx; | ||
2584 | int ret; | ||
2585 | u8 csum[BTRFS_CSUM_SIZE]; | ||
2586 | u32 blocksize; | ||
2587 | |||
2588 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
2589 | blocksize = sctx->sectorsize; | ||
2590 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
2591 | blocksize = sctx->nodesize; | ||
2592 | } else { | ||
2593 | blocksize = sctx->sectorsize; | ||
2594 | WARN_ON(1); | ||
2595 | } | ||
2596 | |||
2597 | while (len) { | ||
2598 | u64 l = min_t(u64, len, blocksize); | ||
2599 | int have_csum = 0; | ||
2600 | |||
2601 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
2602 | /* push csums to sbio */ | ||
2603 | have_csum = scrub_find_csum(sctx, logical, l, csum); | ||
2604 | if (have_csum == 0) | ||
2605 | goto skip; | ||
2606 | } | ||
2607 | ret = scrub_pages_for_parity(sparity, logical, l, physical, dev, | ||
2608 | flags, gen, mirror_num, | ||
2609 | have_csum ? csum : NULL); | ||
2610 | skip: | ||
2611 | if (ret) | ||
2612 | return ret; | ||
2613 | len -= l; | ||
2614 | logical += l; | ||
2615 | physical += l; | ||
2616 | } | ||
2617 | return 0; | ||
2618 | } | ||
2619 | |||
2231 | /* | 2620 | /* |
2232 | * Given a physical address, this will calculate it's | 2621 | * Given a physical address, this will calculate it's |
2233 | * logical offset. if this is a parity stripe, it will return | 2622 | * logical offset. if this is a parity stripe, it will return |
@@ -2236,7 +2625,8 @@ behind_scrub_pages: | |||
2236 | * return 0 if it is a data stripe, 1 means parity stripe. | 2625 | * return 0 if it is a data stripe, 1 means parity stripe. |
2237 | */ | 2626 | */ |
2238 | static int get_raid56_logic_offset(u64 physical, int num, | 2627 | static int get_raid56_logic_offset(u64 physical, int num, |
2239 | struct map_lookup *map, u64 *offset) | 2628 | struct map_lookup *map, u64 *offset, |
2629 | u64 *stripe_start) | ||
2240 | { | 2630 | { |
2241 | int i; | 2631 | int i; |
2242 | int j = 0; | 2632 | int j = 0; |
@@ -2247,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2247 | 2637 | ||
2248 | last_offset = (physical - map->stripes[num].physical) * | 2638 | last_offset = (physical - map->stripes[num].physical) * |
2249 | nr_data_stripes(map); | 2639 | nr_data_stripes(map); |
2640 | if (stripe_start) | ||
2641 | *stripe_start = last_offset; | ||
2642 | |||
2250 | *offset = last_offset; | 2643 | *offset = last_offset; |
2251 | for (i = 0; i < nr_data_stripes(map); i++) { | 2644 | for (i = 0; i < nr_data_stripes(map); i++) { |
2252 | *offset = last_offset + i * map->stripe_len; | 2645 | *offset = last_offset + i * map->stripe_len; |
@@ -2269,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num, | |||
2269 | return 1; | 2662 | return 1; |
2270 | } | 2663 | } |
2271 | 2664 | ||
2665 | static void scrub_free_parity(struct scrub_parity *sparity) | ||
2666 | { | ||
2667 | struct scrub_ctx *sctx = sparity->sctx; | ||
2668 | struct scrub_page *curr, *next; | ||
2669 | int nbits; | ||
2670 | |||
2671 | nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors); | ||
2672 | if (nbits) { | ||
2673 | spin_lock(&sctx->stat_lock); | ||
2674 | sctx->stat.read_errors += nbits; | ||
2675 | sctx->stat.uncorrectable_errors += nbits; | ||
2676 | spin_unlock(&sctx->stat_lock); | ||
2677 | } | ||
2678 | |||
2679 | list_for_each_entry_safe(curr, next, &sparity->spages, list) { | ||
2680 | list_del_init(&curr->list); | ||
2681 | scrub_page_put(curr); | ||
2682 | } | ||
2683 | |||
2684 | kfree(sparity); | ||
2685 | } | ||
2686 | |||
2687 | static void scrub_parity_bio_endio(struct bio *bio, int error) | ||
2688 | { | ||
2689 | struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; | ||
2690 | struct scrub_ctx *sctx = sparity->sctx; | ||
2691 | |||
2692 | if (error) | ||
2693 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | ||
2694 | sparity->nsectors); | ||
2695 | |||
2696 | scrub_free_parity(sparity); | ||
2697 | scrub_pending_bio_dec(sctx); | ||
2698 | bio_put(bio); | ||
2699 | } | ||
2700 | |||
2701 | static void scrub_parity_check_and_repair(struct scrub_parity *sparity) | ||
2702 | { | ||
2703 | struct scrub_ctx *sctx = sparity->sctx; | ||
2704 | struct bio *bio; | ||
2705 | struct btrfs_raid_bio *rbio; | ||
2706 | struct scrub_page *spage; | ||
2707 | struct btrfs_bio *bbio = NULL; | ||
2708 | u64 *raid_map = NULL; | ||
2709 | u64 length; | ||
2710 | int ret; | ||
2711 | |||
2712 | if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap, | ||
2713 | sparity->nsectors)) | ||
2714 | goto out; | ||
2715 | |||
2716 | length = sparity->logic_end - sparity->logic_start + 1; | ||
2717 | ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE, | ||
2718 | sparity->logic_start, | ||
2719 | &length, &bbio, 0, &raid_map); | ||
2720 | if (ret || !bbio || !raid_map) | ||
2721 | goto bbio_out; | ||
2722 | |||
2723 | bio = btrfs_io_bio_alloc(GFP_NOFS, 0); | ||
2724 | if (!bio) | ||
2725 | goto bbio_out; | ||
2726 | |||
2727 | bio->bi_iter.bi_sector = sparity->logic_start >> 9; | ||
2728 | bio->bi_private = sparity; | ||
2729 | bio->bi_end_io = scrub_parity_bio_endio; | ||
2730 | |||
2731 | rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio, | ||
2732 | raid_map, length, | ||
2733 | sparity->scrub_dev, | ||
2734 | sparity->dbitmap, | ||
2735 | sparity->nsectors); | ||
2736 | if (!rbio) | ||
2737 | goto rbio_out; | ||
2738 | |||
2739 | list_for_each_entry(spage, &sparity->spages, list) | ||
2740 | raid56_parity_add_scrub_pages(rbio, spage->page, | ||
2741 | spage->logical); | ||
2742 | |||
2743 | scrub_pending_bio_inc(sctx); | ||
2744 | raid56_parity_submit_scrub_rbio(rbio); | ||
2745 | return; | ||
2746 | |||
2747 | rbio_out: | ||
2748 | bio_put(bio); | ||
2749 | bbio_out: | ||
2750 | kfree(bbio); | ||
2751 | kfree(raid_map); | ||
2752 | bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, | ||
2753 | sparity->nsectors); | ||
2754 | spin_lock(&sctx->stat_lock); | ||
2755 | sctx->stat.malloc_errors++; | ||
2756 | spin_unlock(&sctx->stat_lock); | ||
2757 | out: | ||
2758 | scrub_free_parity(sparity); | ||
2759 | } | ||
2760 | |||
2761 | static inline int scrub_calc_parity_bitmap_len(int nsectors) | ||
2762 | { | ||
2763 | return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8); | ||
2764 | } | ||
2765 | |||
2766 | static void scrub_parity_get(struct scrub_parity *sparity) | ||
2767 | { | ||
2768 | atomic_inc(&sparity->ref_count); | ||
2769 | } | ||
2770 | |||
2771 | static void scrub_parity_put(struct scrub_parity *sparity) | ||
2772 | { | ||
2773 | if (!atomic_dec_and_test(&sparity->ref_count)) | ||
2774 | return; | ||
2775 | |||
2776 | scrub_parity_check_and_repair(sparity); | ||
2777 | } | ||
2778 | |||
2779 | static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, | ||
2780 | struct map_lookup *map, | ||
2781 | struct btrfs_device *sdev, | ||
2782 | struct btrfs_path *path, | ||
2783 | u64 logic_start, | ||
2784 | u64 logic_end) | ||
2785 | { | ||
2786 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
2787 | struct btrfs_root *root = fs_info->extent_root; | ||
2788 | struct btrfs_root *csum_root = fs_info->csum_root; | ||
2789 | struct btrfs_extent_item *extent; | ||
2790 | u64 flags; | ||
2791 | int ret; | ||
2792 | int slot; | ||
2793 | struct extent_buffer *l; | ||
2794 | struct btrfs_key key; | ||
2795 | u64 generation; | ||
2796 | u64 extent_logical; | ||
2797 | u64 extent_physical; | ||
2798 | u64 extent_len; | ||
2799 | struct btrfs_device *extent_dev; | ||
2800 | struct scrub_parity *sparity; | ||
2801 | int nsectors; | ||
2802 | int bitmap_len; | ||
2803 | int extent_mirror_num; | ||
2804 | int stop_loop = 0; | ||
2805 | |||
2806 | nsectors = map->stripe_len / root->sectorsize; | ||
2807 | bitmap_len = scrub_calc_parity_bitmap_len(nsectors); | ||
2808 | sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len, | ||
2809 | GFP_NOFS); | ||
2810 | if (!sparity) { | ||
2811 | spin_lock(&sctx->stat_lock); | ||
2812 | sctx->stat.malloc_errors++; | ||
2813 | spin_unlock(&sctx->stat_lock); | ||
2814 | return -ENOMEM; | ||
2815 | } | ||
2816 | |||
2817 | sparity->stripe_len = map->stripe_len; | ||
2818 | sparity->nsectors = nsectors; | ||
2819 | sparity->sctx = sctx; | ||
2820 | sparity->scrub_dev = sdev; | ||
2821 | sparity->logic_start = logic_start; | ||
2822 | sparity->logic_end = logic_end; | ||
2823 | atomic_set(&sparity->ref_count, 1); | ||
2824 | INIT_LIST_HEAD(&sparity->spages); | ||
2825 | sparity->dbitmap = sparity->bitmap; | ||
2826 | sparity->ebitmap = (void *)sparity->bitmap + bitmap_len; | ||
2827 | |||
2828 | ret = 0; | ||
2829 | while (logic_start < logic_end) { | ||
2830 | if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) | ||
2831 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
2832 | else | ||
2833 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2834 | key.objectid = logic_start; | ||
2835 | key.offset = (u64)-1; | ||
2836 | |||
2837 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
2838 | if (ret < 0) | ||
2839 | goto out; | ||
2840 | |||
2841 | if (ret > 0) { | ||
2842 | ret = btrfs_previous_extent_item(root, path, 0); | ||
2843 | if (ret < 0) | ||
2844 | goto out; | ||
2845 | if (ret > 0) { | ||
2846 | btrfs_release_path(path); | ||
2847 | ret = btrfs_search_slot(NULL, root, &key, | ||
2848 | path, 0, 0); | ||
2849 | if (ret < 0) | ||
2850 | goto out; | ||
2851 | } | ||
2852 | } | ||
2853 | |||
2854 | stop_loop = 0; | ||
2855 | while (1) { | ||
2856 | u64 bytes; | ||
2857 | |||
2858 | l = path->nodes[0]; | ||
2859 | slot = path->slots[0]; | ||
2860 | if (slot >= btrfs_header_nritems(l)) { | ||
2861 | ret = btrfs_next_leaf(root, path); | ||
2862 | if (ret == 0) | ||
2863 | continue; | ||
2864 | if (ret < 0) | ||
2865 | goto out; | ||
2866 | |||
2867 | stop_loop = 1; | ||
2868 | break; | ||
2869 | } | ||
2870 | btrfs_item_key_to_cpu(l, &key, slot); | ||
2871 | |||
2872 | if (key.type == BTRFS_METADATA_ITEM_KEY) | ||
2873 | bytes = root->nodesize; | ||
2874 | else | ||
2875 | bytes = key.offset; | ||
2876 | |||
2877 | if (key.objectid + bytes <= logic_start) | ||
2878 | goto next; | ||
2879 | |||
2880 | if (key.type != BTRFS_EXTENT_ITEM_KEY && | ||
2881 | key.type != BTRFS_METADATA_ITEM_KEY) | ||
2882 | goto next; | ||
2883 | |||
2884 | if (key.objectid > logic_end) { | ||
2885 | stop_loop = 1; | ||
2886 | break; | ||
2887 | } | ||
2888 | |||
2889 | while (key.objectid >= logic_start + map->stripe_len) | ||
2890 | logic_start += map->stripe_len; | ||
2891 | |||
2892 | extent = btrfs_item_ptr(l, slot, | ||
2893 | struct btrfs_extent_item); | ||
2894 | flags = btrfs_extent_flags(l, extent); | ||
2895 | generation = btrfs_extent_generation(l, extent); | ||
2896 | |||
2897 | if (key.objectid < logic_start && | ||
2898 | (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { | ||
2899 | btrfs_err(fs_info, | ||
2900 | "scrub: tree block %llu spanning stripes, ignored. logical=%llu", | ||
2901 | key.objectid, logic_start); | ||
2902 | goto next; | ||
2903 | } | ||
2904 | again: | ||
2905 | extent_logical = key.objectid; | ||
2906 | extent_len = bytes; | ||
2907 | |||
2908 | if (extent_logical < logic_start) { | ||
2909 | extent_len -= logic_start - extent_logical; | ||
2910 | extent_logical = logic_start; | ||
2911 | } | ||
2912 | |||
2913 | if (extent_logical + extent_len > | ||
2914 | logic_start + map->stripe_len) | ||
2915 | extent_len = logic_start + map->stripe_len - | ||
2916 | extent_logical; | ||
2917 | |||
2918 | scrub_parity_mark_sectors_data(sparity, extent_logical, | ||
2919 | extent_len); | ||
2920 | |||
2921 | scrub_remap_extent(fs_info, extent_logical, | ||
2922 | extent_len, &extent_physical, | ||
2923 | &extent_dev, | ||
2924 | &extent_mirror_num); | ||
2925 | |||
2926 | ret = btrfs_lookup_csums_range(csum_root, | ||
2927 | extent_logical, | ||
2928 | extent_logical + extent_len - 1, | ||
2929 | &sctx->csum_list, 1); | ||
2930 | if (ret) | ||
2931 | goto out; | ||
2932 | |||
2933 | ret = scrub_extent_for_parity(sparity, extent_logical, | ||
2934 | extent_len, | ||
2935 | extent_physical, | ||
2936 | extent_dev, flags, | ||
2937 | generation, | ||
2938 | extent_mirror_num); | ||
2939 | if (ret) | ||
2940 | goto out; | ||
2941 | |||
2942 | scrub_free_csums(sctx); | ||
2943 | if (extent_logical + extent_len < | ||
2944 | key.objectid + bytes) { | ||
2945 | logic_start += map->stripe_len; | ||
2946 | |||
2947 | if (logic_start >= logic_end) { | ||
2948 | stop_loop = 1; | ||
2949 | break; | ||
2950 | } | ||
2951 | |||
2952 | if (logic_start < key.objectid + bytes) { | ||
2953 | cond_resched(); | ||
2954 | goto again; | ||
2955 | } | ||
2956 | } | ||
2957 | next: | ||
2958 | path->slots[0]++; | ||
2959 | } | ||
2960 | |||
2961 | btrfs_release_path(path); | ||
2962 | |||
2963 | if (stop_loop) | ||
2964 | break; | ||
2965 | |||
2966 | logic_start += map->stripe_len; | ||
2967 | } | ||
2968 | out: | ||
2969 | if (ret < 0) | ||
2970 | scrub_parity_mark_sectors_error(sparity, logic_start, | ||
2971 | logic_end - logic_start + 1); | ||
2972 | scrub_parity_put(sparity); | ||
2973 | scrub_submit(sctx); | ||
2974 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2975 | scrub_wr_submit(sctx); | ||
2976 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
2977 | |||
2978 | btrfs_release_path(path); | ||
2979 | return ret < 0 ? ret : 0; | ||
2980 | } | ||
2981 | |||
2272 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | 2982 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
2273 | struct map_lookup *map, | 2983 | struct map_lookup *map, |
2274 | struct btrfs_device *scrub_dev, | 2984 | struct btrfs_device *scrub_dev, |
2275 | int num, u64 base, u64 length, | 2985 | int num, u64 base, u64 length, |
2276 | int is_dev_replace) | 2986 | int is_dev_replace) |
2277 | { | 2987 | { |
2278 | struct btrfs_path *path; | 2988 | struct btrfs_path *path, *ppath; |
2279 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | 2989 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
2280 | struct btrfs_root *root = fs_info->extent_root; | 2990 | struct btrfs_root *root = fs_info->extent_root; |
2281 | struct btrfs_root *csum_root = fs_info->csum_root; | 2991 | struct btrfs_root *csum_root = fs_info->csum_root; |
@@ -2302,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2302 | u64 extent_logical; | 3012 | u64 extent_logical; |
2303 | u64 extent_physical; | 3013 | u64 extent_physical; |
2304 | u64 extent_len; | 3014 | u64 extent_len; |
3015 | u64 stripe_logical; | ||
3016 | u64 stripe_end; | ||
2305 | struct btrfs_device *extent_dev; | 3017 | struct btrfs_device *extent_dev; |
2306 | int extent_mirror_num; | 3018 | int extent_mirror_num; |
2307 | int stop_loop = 0; | 3019 | int stop_loop = 0; |
@@ -2327,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2327 | mirror_num = num % map->num_stripes + 1; | 3039 | mirror_num = num % map->num_stripes + 1; |
2328 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3040 | } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2329 | BTRFS_BLOCK_GROUP_RAID6)) { | 3041 | BTRFS_BLOCK_GROUP_RAID6)) { |
2330 | get_raid56_logic_offset(physical, num, map, &offset); | 3042 | get_raid56_logic_offset(physical, num, map, &offset, NULL); |
2331 | increment = map->stripe_len * nr_data_stripes(map); | 3043 | increment = map->stripe_len * nr_data_stripes(map); |
2332 | mirror_num = 1; | 3044 | mirror_num = 1; |
2333 | } else { | 3045 | } else { |
@@ -2339,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2339 | if (!path) | 3051 | if (!path) |
2340 | return -ENOMEM; | 3052 | return -ENOMEM; |
2341 | 3053 | ||
3054 | ppath = btrfs_alloc_path(); | ||
3055 | if (!ppath) { | ||
3056 | btrfs_free_path(ppath); | ||
3057 | return -ENOMEM; | ||
3058 | } | ||
3059 | |||
2342 | /* | 3060 | /* |
2343 | * work on commit root. The related disk blocks are static as | 3061 | * work on commit root. The related disk blocks are static as |
2344 | * long as COW is applied. This means, it is save to rewrite | 3062 | * long as COW is applied. This means, it is save to rewrite |
@@ -2357,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2357 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3075 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2358 | BTRFS_BLOCK_GROUP_RAID6)) { | 3076 | BTRFS_BLOCK_GROUP_RAID6)) { |
2359 | get_raid56_logic_offset(physical_end, num, | 3077 | get_raid56_logic_offset(physical_end, num, |
2360 | map, &logic_end); | 3078 | map, &logic_end, NULL); |
2361 | logic_end += base; | 3079 | logic_end += base; |
2362 | } else { | 3080 | } else { |
2363 | logic_end = logical + increment * nstripes; | 3081 | logic_end = logical + increment * nstripes; |
@@ -2404,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2404 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | | 3122 | if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | |
2405 | BTRFS_BLOCK_GROUP_RAID6)) { | 3123 | BTRFS_BLOCK_GROUP_RAID6)) { |
2406 | ret = get_raid56_logic_offset(physical, num, | 3124 | ret = get_raid56_logic_offset(physical, num, |
2407 | map, &logical); | 3125 | map, &logical, &stripe_logical); |
2408 | logical += base; | 3126 | logical += base; |
2409 | if (ret) | 3127 | if (ret) { |
3128 | stripe_logical += base; | ||
3129 | stripe_end = stripe_logical + increment - 1; | ||
3130 | ret = scrub_raid56_parity(sctx, map, scrub_dev, | ||
3131 | ppath, stripe_logical, | ||
3132 | stripe_end); | ||
3133 | if (ret) | ||
3134 | goto out; | ||
2410 | goto skip; | 3135 | goto skip; |
3136 | } | ||
2411 | } | 3137 | } |
2412 | /* | 3138 | /* |
2413 | * canceled? | 3139 | * canceled? |
@@ -2558,13 +3284,25 @@ again: | |||
2558 | * loop until we find next data stripe | 3284 | * loop until we find next data stripe |
2559 | * or we have finished all stripes. | 3285 | * or we have finished all stripes. |
2560 | */ | 3286 | */ |
2561 | do { | 3287 | loop: |
2562 | physical += map->stripe_len; | 3288 | physical += map->stripe_len; |
2563 | ret = get_raid56_logic_offset( | 3289 | ret = get_raid56_logic_offset(physical, |
2564 | physical, num, | 3290 | num, map, &logical, |
2565 | map, &logical); | 3291 | &stripe_logical); |
2566 | logical += base; | 3292 | logical += base; |
2567 | } while (physical < physical_end && ret); | 3293 | |
3294 | if (ret && physical < physical_end) { | ||
3295 | stripe_logical += base; | ||
3296 | stripe_end = stripe_logical + | ||
3297 | increment - 1; | ||
3298 | ret = scrub_raid56_parity(sctx, | ||
3299 | map, scrub_dev, ppath, | ||
3300 | stripe_logical, | ||
3301 | stripe_end); | ||
3302 | if (ret) | ||
3303 | goto out; | ||
3304 | goto loop; | ||
3305 | } | ||
2568 | } else { | 3306 | } else { |
2569 | physical += map->stripe_len; | 3307 | physical += map->stripe_len; |
2570 | logical += increment; | 3308 | logical += increment; |
@@ -2605,6 +3343,7 @@ out: | |||
2605 | 3343 | ||
2606 | blk_finish_plug(&plug); | 3344 | blk_finish_plug(&plug); |
2607 | btrfs_free_path(path); | 3345 | btrfs_free_path(path); |
3346 | btrfs_free_path(ppath); | ||
2608 | return ret < 0 ? ret : 0; | 3347 | return ret < 0 ? ret : 0; |
2609 | } | 3348 | } |
2610 | 3349 | ||
@@ -3310,6 +4049,50 @@ out: | |||
3310 | scrub_pending_trans_workers_dec(sctx); | 4049 | scrub_pending_trans_workers_dec(sctx); |
3311 | } | 4050 | } |
3312 | 4051 | ||
4052 | static int check_extent_to_block(struct inode *inode, u64 start, u64 len, | ||
4053 | u64 logical) | ||
4054 | { | ||
4055 | struct extent_state *cached_state = NULL; | ||
4056 | struct btrfs_ordered_extent *ordered; | ||
4057 | struct extent_io_tree *io_tree; | ||
4058 | struct extent_map *em; | ||
4059 | u64 lockstart = start, lockend = start + len - 1; | ||
4060 | int ret = 0; | ||
4061 | |||
4062 | io_tree = &BTRFS_I(inode)->io_tree; | ||
4063 | |||
4064 | lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); | ||
4065 | ordered = btrfs_lookup_ordered_range(inode, lockstart, len); | ||
4066 | if (ordered) { | ||
4067 | btrfs_put_ordered_extent(ordered); | ||
4068 | ret = 1; | ||
4069 | goto out_unlock; | ||
4070 | } | ||
4071 | |||
4072 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
4073 | if (IS_ERR(em)) { | ||
4074 | ret = PTR_ERR(em); | ||
4075 | goto out_unlock; | ||
4076 | } | ||
4077 | |||
4078 | /* | ||
4079 | * This extent does not actually cover the logical extent anymore, | ||
4080 | * move on to the next inode. | ||
4081 | */ | ||
4082 | if (em->block_start > logical || | ||
4083 | em->block_start + em->block_len < logical + len) { | ||
4084 | free_extent_map(em); | ||
4085 | ret = 1; | ||
4086 | goto out_unlock; | ||
4087 | } | ||
4088 | free_extent_map(em); | ||
4089 | |||
4090 | out_unlock: | ||
4091 | unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, | ||
4092 | GFP_NOFS); | ||
4093 | return ret; | ||
4094 | } | ||
4095 | |||
3313 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | 4096 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, |
3314 | struct scrub_copy_nocow_ctx *nocow_ctx) | 4097 | struct scrub_copy_nocow_ctx *nocow_ctx) |
3315 | { | 4098 | { |
@@ -3318,13 +4101,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | |||
3318 | struct inode *inode; | 4101 | struct inode *inode; |
3319 | struct page *page; | 4102 | struct page *page; |
3320 | struct btrfs_root *local_root; | 4103 | struct btrfs_root *local_root; |
3321 | struct btrfs_ordered_extent *ordered; | ||
3322 | struct extent_map *em; | ||
3323 | struct extent_state *cached_state = NULL; | ||
3324 | struct extent_io_tree *io_tree; | 4104 | struct extent_io_tree *io_tree; |
3325 | u64 physical_for_dev_replace; | 4105 | u64 physical_for_dev_replace; |
4106 | u64 nocow_ctx_logical; | ||
3326 | u64 len = nocow_ctx->len; | 4107 | u64 len = nocow_ctx->len; |
3327 | u64 lockstart = offset, lockend = offset + len - 1; | ||
3328 | unsigned long index; | 4108 | unsigned long index; |
3329 | int srcu_index; | 4109 | int srcu_index; |
3330 | int ret = 0; | 4110 | int ret = 0; |
@@ -3356,30 +4136,13 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | |||
3356 | 4136 | ||
3357 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | 4137 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; |
3358 | io_tree = &BTRFS_I(inode)->io_tree; | 4138 | io_tree = &BTRFS_I(inode)->io_tree; |
4139 | nocow_ctx_logical = nocow_ctx->logical; | ||
3359 | 4140 | ||
3360 | lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); | 4141 | ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical); |
3361 | ordered = btrfs_lookup_ordered_range(inode, lockstart, len); | 4142 | if (ret) { |
3362 | if (ordered) { | 4143 | ret = ret > 0 ? 0 : ret; |
3363 | btrfs_put_ordered_extent(ordered); | 4144 | goto out; |
3364 | goto out_unlock; | ||
3365 | } | ||
3366 | |||
3367 | em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0); | ||
3368 | if (IS_ERR(em)) { | ||
3369 | ret = PTR_ERR(em); | ||
3370 | goto out_unlock; | ||
3371 | } | ||
3372 | |||
3373 | /* | ||
3374 | * This extent does not actually cover the logical extent anymore, | ||
3375 | * move on to the next inode. | ||
3376 | */ | ||
3377 | if (em->block_start > nocow_ctx->logical || | ||
3378 | em->block_start + em->block_len < nocow_ctx->logical + len) { | ||
3379 | free_extent_map(em); | ||
3380 | goto out_unlock; | ||
3381 | } | 4145 | } |
3382 | free_extent_map(em); | ||
3383 | 4146 | ||
3384 | while (len >= PAGE_CACHE_SIZE) { | 4147 | while (len >= PAGE_CACHE_SIZE) { |
3385 | index = offset >> PAGE_CACHE_SHIFT; | 4148 | index = offset >> PAGE_CACHE_SHIFT; |
@@ -3396,7 +4159,7 @@ again: | |||
3396 | goto next_page; | 4159 | goto next_page; |
3397 | } else { | 4160 | } else { |
3398 | ClearPageError(page); | 4161 | ClearPageError(page); |
3399 | err = extent_read_full_page_nolock(io_tree, page, | 4162 | err = extent_read_full_page(io_tree, page, |
3400 | btrfs_get_extent, | 4163 | btrfs_get_extent, |
3401 | nocow_ctx->mirror_num); | 4164 | nocow_ctx->mirror_num); |
3402 | if (err) { | 4165 | if (err) { |
@@ -3421,6 +4184,14 @@ again: | |||
3421 | goto next_page; | 4184 | goto next_page; |
3422 | } | 4185 | } |
3423 | } | 4186 | } |
4187 | |||
4188 | ret = check_extent_to_block(inode, offset, len, | ||
4189 | nocow_ctx_logical); | ||
4190 | if (ret) { | ||
4191 | ret = ret > 0 ? 0 : ret; | ||
4192 | goto next_page; | ||
4193 | } | ||
4194 | |||
3424 | err = write_page_nocow(nocow_ctx->sctx, | 4195 | err = write_page_nocow(nocow_ctx->sctx, |
3425 | physical_for_dev_replace, page); | 4196 | physical_for_dev_replace, page); |
3426 | if (err) | 4197 | if (err) |
@@ -3434,12 +4205,10 @@ next_page: | |||
3434 | 4205 | ||
3435 | offset += PAGE_CACHE_SIZE; | 4206 | offset += PAGE_CACHE_SIZE; |
3436 | physical_for_dev_replace += PAGE_CACHE_SIZE; | 4207 | physical_for_dev_replace += PAGE_CACHE_SIZE; |
4208 | nocow_ctx_logical += PAGE_CACHE_SIZE; | ||
3437 | len -= PAGE_CACHE_SIZE; | 4209 | len -= PAGE_CACHE_SIZE; |
3438 | } | 4210 | } |
3439 | ret = COPY_COMPLETE; | 4211 | ret = COPY_COMPLETE; |
3440 | out_unlock: | ||
3441 | unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, | ||
3442 | GFP_NOFS); | ||
3443 | out: | 4212 | out: |
3444 | mutex_unlock(&inode->i_mutex); | 4213 | mutex_unlock(&inode->i_mutex); |
3445 | iput(inode); | 4214 | iput(inode); |