aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c893
1 files changed, 831 insertions, 62 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index efa083113827..f2bb13a23f86 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -63,10 +63,18 @@ struct scrub_ctx;
63 */ 63 */
64#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ 64#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
65 65
66struct scrub_recover {
67 atomic_t refs;
68 struct btrfs_bio *bbio;
69 u64 *raid_map;
70 u64 map_length;
71};
72
66struct scrub_page { 73struct scrub_page {
67 struct scrub_block *sblock; 74 struct scrub_block *sblock;
68 struct page *page; 75 struct page *page;
69 struct btrfs_device *dev; 76 struct btrfs_device *dev;
77 struct list_head list;
70 u64 flags; /* extent flags */ 78 u64 flags; /* extent flags */
71 u64 generation; 79 u64 generation;
72 u64 logical; 80 u64 logical;
@@ -79,6 +87,8 @@ struct scrub_page {
79 unsigned int io_error:1; 87 unsigned int io_error:1;
80 }; 88 };
81 u8 csum[BTRFS_CSUM_SIZE]; 89 u8 csum[BTRFS_CSUM_SIZE];
90
91 struct scrub_recover *recover;
82}; 92};
83 93
84struct scrub_bio { 94struct scrub_bio {
@@ -105,14 +115,52 @@ struct scrub_block {
105 atomic_t outstanding_pages; 115 atomic_t outstanding_pages;
106 atomic_t ref_count; /* free mem on transition to zero */ 116 atomic_t ref_count; /* free mem on transition to zero */
107 struct scrub_ctx *sctx; 117 struct scrub_ctx *sctx;
118 struct scrub_parity *sparity;
108 struct { 119 struct {
109 unsigned int header_error:1; 120 unsigned int header_error:1;
110 unsigned int checksum_error:1; 121 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1; 122 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1; /* also sets header_error */ 123 unsigned int generation_error:1; /* also sets header_error */
124
125 /* The following is for the data used to check parity */
126 /* It is for the data with checksum */
127 unsigned int data_corrected:1;
113 }; 128 };
114}; 129};
115 130
131/* Used for the chunks with parity stripe such RAID5/6 */
132struct scrub_parity {
133 struct scrub_ctx *sctx;
134
135 struct btrfs_device *scrub_dev;
136
137 u64 logic_start;
138
139 u64 logic_end;
140
141 int nsectors;
142
143 int stripe_len;
144
145 atomic_t ref_count;
146
147 struct list_head spages;
148
149 /* Work of parity check and repair */
150 struct btrfs_work work;
151
152 /* Mark the parity blocks which have data */
153 unsigned long *dbitmap;
154
155 /*
156 * Mark the parity blocks which have data, but errors happen when
157 * read data or check data
158 */
159 unsigned long *ebitmap;
160
161 unsigned long bitmap[0];
162};
163
116struct scrub_wr_ctx { 164struct scrub_wr_ctx {
117 struct scrub_bio *wr_curr_bio; 165 struct scrub_bio *wr_curr_bio;
118 struct btrfs_device *tgtdev; 166 struct btrfs_device *tgtdev;
@@ -196,7 +244,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
196static void scrub_recheck_block(struct btrfs_fs_info *fs_info, 244static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
197 struct scrub_block *sblock, int is_metadata, 245 struct scrub_block *sblock, int is_metadata,
198 int have_csum, u8 *csum, u64 generation, 246 int have_csum, u8 *csum, u64 generation,
199 u16 csum_size); 247 u16 csum_size, int retry_failed_mirror);
200static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, 248static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
201 struct scrub_block *sblock, 249 struct scrub_block *sblock,
202 int is_metadata, int have_csum, 250 int is_metadata, int have_csum,
@@ -218,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock);
218static void scrub_block_put(struct scrub_block *sblock); 266static void scrub_block_put(struct scrub_block *sblock);
219static void scrub_page_get(struct scrub_page *spage); 267static void scrub_page_get(struct scrub_page *spage);
220static void scrub_page_put(struct scrub_page *spage); 268static void scrub_page_put(struct scrub_page *spage);
269static void scrub_parity_get(struct scrub_parity *sparity);
270static void scrub_parity_put(struct scrub_parity *sparity);
221static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, 271static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
222 struct scrub_page *spage); 272 struct scrub_page *spage);
223static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 273static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
@@ -790,6 +840,20 @@ out:
790 scrub_pending_trans_workers_dec(sctx); 840 scrub_pending_trans_workers_dec(sctx);
791} 841}
792 842
843static inline void scrub_get_recover(struct scrub_recover *recover)
844{
845 atomic_inc(&recover->refs);
846}
847
848static inline void scrub_put_recover(struct scrub_recover *recover)
849{
850 if (atomic_dec_and_test(&recover->refs)) {
851 kfree(recover->bbio);
852 kfree(recover->raid_map);
853 kfree(recover);
854 }
855}
856
793/* 857/*
794 * scrub_handle_errored_block gets called when either verification of the 858 * scrub_handle_errored_block gets called when either verification of the
795 * pages failed or the bio failed to read, e.g. with EIO. In the latter 859 * pages failed or the bio failed to read, e.g. with EIO. In the latter
@@ -906,7 +970,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
906 970
907 /* build and submit the bios for the failed mirror, check checksums */ 971 /* build and submit the bios for the failed mirror, check checksums */
908 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, 972 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
909 csum, generation, sctx->csum_size); 973 csum, generation, sctx->csum_size, 1);
910 974
911 if (!sblock_bad->header_error && !sblock_bad->checksum_error && 975 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
912 sblock_bad->no_io_error_seen) { 976 sblock_bad->no_io_error_seen) {
@@ -920,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
920 */ 984 */
921 spin_lock(&sctx->stat_lock); 985 spin_lock(&sctx->stat_lock);
922 sctx->stat.unverified_errors++; 986 sctx->stat.unverified_errors++;
987 sblock_to_check->data_corrected = 1;
923 spin_unlock(&sctx->stat_lock); 988 spin_unlock(&sctx->stat_lock);
924 989
925 if (sctx->is_dev_replace) 990 if (sctx->is_dev_replace)
@@ -1019,7 +1084,7 @@ nodatasum_case:
1019 /* build and submit the bios, check checksums */ 1084 /* build and submit the bios, check checksums */
1020 scrub_recheck_block(fs_info, sblock_other, is_metadata, 1085 scrub_recheck_block(fs_info, sblock_other, is_metadata,
1021 have_csum, csum, generation, 1086 have_csum, csum, generation,
1022 sctx->csum_size); 1087 sctx->csum_size, 0);
1023 1088
1024 if (!sblock_other->header_error && 1089 if (!sblock_other->header_error &&
1025 !sblock_other->checksum_error && 1090 !sblock_other->checksum_error &&
@@ -1169,7 +1234,7 @@ nodatasum_case:
1169 */ 1234 */
1170 scrub_recheck_block(fs_info, sblock_bad, 1235 scrub_recheck_block(fs_info, sblock_bad,
1171 is_metadata, have_csum, csum, 1236 is_metadata, have_csum, csum,
1172 generation, sctx->csum_size); 1237 generation, sctx->csum_size, 1);
1173 if (!sblock_bad->header_error && 1238 if (!sblock_bad->header_error &&
1174 !sblock_bad->checksum_error && 1239 !sblock_bad->checksum_error &&
1175 sblock_bad->no_io_error_seen) 1240 sblock_bad->no_io_error_seen)
@@ -1180,6 +1245,7 @@ nodatasum_case:
1180corrected_error: 1245corrected_error:
1181 spin_lock(&sctx->stat_lock); 1246 spin_lock(&sctx->stat_lock);
1182 sctx->stat.corrected_errors++; 1247 sctx->stat.corrected_errors++;
1248 sblock_to_check->data_corrected = 1;
1183 spin_unlock(&sctx->stat_lock); 1249 spin_unlock(&sctx->stat_lock);
1184 printk_ratelimited_in_rcu(KERN_ERR 1250 printk_ratelimited_in_rcu(KERN_ERR
1185 "BTRFS: fixed up error at logical %llu on dev %s\n", 1251 "BTRFS: fixed up error at logical %llu on dev %s\n",
@@ -1201,11 +1267,18 @@ out:
1201 mirror_index++) { 1267 mirror_index++) {
1202 struct scrub_block *sblock = sblocks_for_recheck + 1268 struct scrub_block *sblock = sblocks_for_recheck +
1203 mirror_index; 1269 mirror_index;
1270 struct scrub_recover *recover;
1204 int page_index; 1271 int page_index;
1205 1272
1206 for (page_index = 0; page_index < sblock->page_count; 1273 for (page_index = 0; page_index < sblock->page_count;
1207 page_index++) { 1274 page_index++) {
1208 sblock->pagev[page_index]->sblock = NULL; 1275 sblock->pagev[page_index]->sblock = NULL;
1276 recover = sblock->pagev[page_index]->recover;
1277 if (recover) {
1278 scrub_put_recover(recover);
1279 sblock->pagev[page_index]->recover =
1280 NULL;
1281 }
1209 scrub_page_put(sblock->pagev[page_index]); 1282 scrub_page_put(sblock->pagev[page_index]);
1210 } 1283 }
1211 } 1284 }
@@ -1215,14 +1288,63 @@ out:
1215 return 0; 1288 return 0;
1216} 1289}
1217 1290
1291static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map)
1292{
1293 if (raid_map) {
1294 if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
1295 return 3;
1296 else
1297 return 2;
1298 } else {
1299 return (int)bbio->num_stripes;
1300 }
1301}
1302
1303static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
1304 u64 mapped_length,
1305 int nstripes, int mirror,
1306 int *stripe_index,
1307 u64 *stripe_offset)
1308{
1309 int i;
1310
1311 if (raid_map) {
1312 /* RAID5/6 */
1313 for (i = 0; i < nstripes; i++) {
1314 if (raid_map[i] == RAID6_Q_STRIPE ||
1315 raid_map[i] == RAID5_P_STRIPE)
1316 continue;
1317
1318 if (logical >= raid_map[i] &&
1319 logical < raid_map[i] + mapped_length)
1320 break;
1321 }
1322
1323 *stripe_index = i;
1324 *stripe_offset = logical - raid_map[i];
1325 } else {
1326 /* The other RAID type */
1327 *stripe_index = mirror;
1328 *stripe_offset = 0;
1329 }
1330}
1331
1218static int scrub_setup_recheck_block(struct scrub_ctx *sctx, 1332static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1219 struct btrfs_fs_info *fs_info, 1333 struct btrfs_fs_info *fs_info,
1220 struct scrub_block *original_sblock, 1334 struct scrub_block *original_sblock,
1221 u64 length, u64 logical, 1335 u64 length, u64 logical,
1222 struct scrub_block *sblocks_for_recheck) 1336 struct scrub_block *sblocks_for_recheck)
1223{ 1337{
1338 struct scrub_recover *recover;
1339 struct btrfs_bio *bbio;
1340 u64 *raid_map;
1341 u64 sublen;
1342 u64 mapped_length;
1343 u64 stripe_offset;
1344 int stripe_index;
1224 int page_index; 1345 int page_index;
1225 int mirror_index; 1346 int mirror_index;
1347 int nmirrors;
1226 int ret; 1348 int ret;
1227 1349
1228 /* 1350 /*
@@ -1233,23 +1355,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1233 1355
1234 page_index = 0; 1356 page_index = 0;
1235 while (length > 0) { 1357 while (length > 0) {
1236 u64 sublen = min_t(u64, length, PAGE_SIZE); 1358 sublen = min_t(u64, length, PAGE_SIZE);
1237 u64 mapped_length = sublen; 1359 mapped_length = sublen;
1238 struct btrfs_bio *bbio = NULL; 1360 bbio = NULL;
1361 raid_map = NULL;
1239 1362
1240 /* 1363 /*
1241 * with a length of PAGE_SIZE, each returned stripe 1364 * with a length of PAGE_SIZE, each returned stripe
1242 * represents one mirror 1365 * represents one mirror
1243 */ 1366 */
1244 ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, 1367 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
1245 &mapped_length, &bbio, 0); 1368 &mapped_length, &bbio, 0, &raid_map);
1246 if (ret || !bbio || mapped_length < sublen) { 1369 if (ret || !bbio || mapped_length < sublen) {
1247 kfree(bbio); 1370 kfree(bbio);
1371 kfree(raid_map);
1248 return -EIO; 1372 return -EIO;
1249 } 1373 }
1250 1374
1375 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1376 if (!recover) {
1377 kfree(bbio);
1378 kfree(raid_map);
1379 return -ENOMEM;
1380 }
1381
1382 atomic_set(&recover->refs, 1);
1383 recover->bbio = bbio;
1384 recover->raid_map = raid_map;
1385 recover->map_length = mapped_length;
1386
1251 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); 1387 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1252 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; 1388
1389 nmirrors = scrub_nr_raid_mirrors(bbio, raid_map);
1390 for (mirror_index = 0; mirror_index < nmirrors;
1253 mirror_index++) { 1391 mirror_index++) {
1254 struct scrub_block *sblock; 1392 struct scrub_block *sblock;
1255 struct scrub_page *page; 1393 struct scrub_page *page;
@@ -1265,26 +1403,38 @@ leave_nomem:
1265 spin_lock(&sctx->stat_lock); 1403 spin_lock(&sctx->stat_lock);
1266 sctx->stat.malloc_errors++; 1404 sctx->stat.malloc_errors++;
1267 spin_unlock(&sctx->stat_lock); 1405 spin_unlock(&sctx->stat_lock);
1268 kfree(bbio); 1406 scrub_put_recover(recover);
1269 return -ENOMEM; 1407 return -ENOMEM;
1270 } 1408 }
1271 scrub_page_get(page); 1409 scrub_page_get(page);
1272 sblock->pagev[page_index] = page; 1410 sblock->pagev[page_index] = page;
1273 page->logical = logical; 1411 page->logical = logical;
1274 page->physical = bbio->stripes[mirror_index].physical; 1412
1413 scrub_stripe_index_and_offset(logical, raid_map,
1414 mapped_length,
1415 bbio->num_stripes,
1416 mirror_index,
1417 &stripe_index,
1418 &stripe_offset);
1419 page->physical = bbio->stripes[stripe_index].physical +
1420 stripe_offset;
1421 page->dev = bbio->stripes[stripe_index].dev;
1422
1275 BUG_ON(page_index >= original_sblock->page_count); 1423 BUG_ON(page_index >= original_sblock->page_count);
1276 page->physical_for_dev_replace = 1424 page->physical_for_dev_replace =
1277 original_sblock->pagev[page_index]-> 1425 original_sblock->pagev[page_index]->
1278 physical_for_dev_replace; 1426 physical_for_dev_replace;
1279 /* for missing devices, dev->bdev is NULL */ 1427 /* for missing devices, dev->bdev is NULL */
1280 page->dev = bbio->stripes[mirror_index].dev;
1281 page->mirror_num = mirror_index + 1; 1428 page->mirror_num = mirror_index + 1;
1282 sblock->page_count++; 1429 sblock->page_count++;
1283 page->page = alloc_page(GFP_NOFS); 1430 page->page = alloc_page(GFP_NOFS);
1284 if (!page->page) 1431 if (!page->page)
1285 goto leave_nomem; 1432 goto leave_nomem;
1433
1434 scrub_get_recover(recover);
1435 page->recover = recover;
1286 } 1436 }
1287 kfree(bbio); 1437 scrub_put_recover(recover);
1288 length -= sublen; 1438 length -= sublen;
1289 logical += sublen; 1439 logical += sublen;
1290 page_index++; 1440 page_index++;
@@ -1293,6 +1443,51 @@ leave_nomem:
1293 return 0; 1443 return 0;
1294} 1444}
1295 1445
1446struct scrub_bio_ret {
1447 struct completion event;
1448 int error;
1449};
1450
1451static void scrub_bio_wait_endio(struct bio *bio, int error)
1452{
1453 struct scrub_bio_ret *ret = bio->bi_private;
1454
1455 ret->error = error;
1456 complete(&ret->event);
1457}
1458
1459static inline int scrub_is_page_on_raid56(struct scrub_page *page)
1460{
1461 return page->recover && page->recover->raid_map;
1462}
1463
1464static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1465 struct bio *bio,
1466 struct scrub_page *page)
1467{
1468 struct scrub_bio_ret done;
1469 int ret;
1470
1471 init_completion(&done.event);
1472 done.error = 0;
1473 bio->bi_iter.bi_sector = page->logical >> 9;
1474 bio->bi_private = &done;
1475 bio->bi_end_io = scrub_bio_wait_endio;
1476
1477 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
1478 page->recover->raid_map,
1479 page->recover->map_length,
1480 page->mirror_num, 0);
1481 if (ret)
1482 return ret;
1483
1484 wait_for_completion(&done.event);
1485 if (done.error)
1486 return -EIO;
1487
1488 return 0;
1489}
1490
1296/* 1491/*
1297 * this function will check the on disk data for checksum errors, header 1492 * this function will check the on disk data for checksum errors, header
1298 * errors and read I/O errors. If any I/O errors happen, the exact pages 1493 * errors and read I/O errors. If any I/O errors happen, the exact pages
@@ -1303,7 +1498,7 @@ leave_nomem:
1303static void scrub_recheck_block(struct btrfs_fs_info *fs_info, 1498static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1304 struct scrub_block *sblock, int is_metadata, 1499 struct scrub_block *sblock, int is_metadata,
1305 int have_csum, u8 *csum, u64 generation, 1500 int have_csum, u8 *csum, u64 generation,
1306 u16 csum_size) 1501 u16 csum_size, int retry_failed_mirror)
1307{ 1502{
1308 int page_num; 1503 int page_num;
1309 1504
@@ -1329,11 +1524,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1329 continue; 1524 continue;
1330 } 1525 }
1331 bio->bi_bdev = page->dev->bdev; 1526 bio->bi_bdev = page->dev->bdev;
1332 bio->bi_iter.bi_sector = page->physical >> 9;
1333 1527
1334 bio_add_page(bio, page->page, PAGE_SIZE, 0); 1528 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1335 if (btrfsic_submit_bio_wait(READ, bio)) 1529 if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
1336 sblock->no_io_error_seen = 0; 1530 if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
1531 sblock->no_io_error_seen = 0;
1532 } else {
1533 bio->bi_iter.bi_sector = page->physical >> 9;
1534
1535 if (btrfsic_submit_bio_wait(READ, bio))
1536 sblock->no_io_error_seen = 0;
1537 }
1337 1538
1338 bio_put(bio); 1539 bio_put(bio);
1339 } 1540 }
@@ -1486,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1486{ 1687{
1487 int page_num; 1688 int page_num;
1488 1689
1690 /*
1691 * This block is used for the check of the parity on the source device,
1692 * so the data needn't be written into the destination device.
1693 */
1694 if (sblock->sparity)
1695 return;
1696
1489 for (page_num = 0; page_num < sblock->page_count; page_num++) { 1697 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1490 int ret; 1698 int ret;
1491 1699
@@ -1867,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock)
1867 if (atomic_dec_and_test(&sblock->ref_count)) { 2075 if (atomic_dec_and_test(&sblock->ref_count)) {
1868 int i; 2076 int i;
1869 2077
2078 if (sblock->sparity)
2079 scrub_parity_put(sblock->sparity);
2080
1870 for (i = 0; i < sblock->page_count; i++) 2081 for (i = 0; i < sblock->page_count; i++)
1871 scrub_page_put(sblock->pagev[i]); 2082 scrub_page_put(sblock->pagev[i]);
1872 kfree(sblock); 2083 kfree(sblock);
@@ -2124,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
2124 scrub_pending_bio_dec(sctx); 2335 scrub_pending_bio_dec(sctx);
2125} 2336}
2126 2337
2338static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2339 unsigned long *bitmap,
2340 u64 start, u64 len)
2341{
2342 int offset;
2343 int nsectors;
2344 int sectorsize = sparity->sctx->dev_root->sectorsize;
2345
2346 if (len >= sparity->stripe_len) {
2347 bitmap_set(bitmap, 0, sparity->nsectors);
2348 return;
2349 }
2350
2351 start -= sparity->logic_start;
2352 offset = (int)do_div(start, sparity->stripe_len);
2353 offset /= sectorsize;
2354 nsectors = (int)len / sectorsize;
2355
2356 if (offset + nsectors <= sparity->nsectors) {
2357 bitmap_set(bitmap, offset, nsectors);
2358 return;
2359 }
2360
2361 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2362 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2363}
2364
2365static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2366 u64 start, u64 len)
2367{
2368 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2369}
2370
2371static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2372 u64 start, u64 len)
2373{
2374 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2375}
2376
2127static void scrub_block_complete(struct scrub_block *sblock) 2377static void scrub_block_complete(struct scrub_block *sblock)
2128{ 2378{
2379 int corrupted = 0;
2380
2129 if (!sblock->no_io_error_seen) { 2381 if (!sblock->no_io_error_seen) {
2382 corrupted = 1;
2130 scrub_handle_errored_block(sblock); 2383 scrub_handle_errored_block(sblock);
2131 } else { 2384 } else {
2132 /* 2385 /*
@@ -2134,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock)
2134 * dev replace case, otherwise write here in dev replace 2387 * dev replace case, otherwise write here in dev replace
2135 * case. 2388 * case.
2136 */ 2389 */
2137 if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) 2390 corrupted = scrub_checksum(sblock);
2391 if (!corrupted && sblock->sctx->is_dev_replace)
2138 scrub_write_block_to_dev_replace(sblock); 2392 scrub_write_block_to_dev_replace(sblock);
2139 } 2393 }
2394
2395 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2396 u64 start = sblock->pagev[0]->logical;
2397 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2398 PAGE_SIZE;
2399
2400 scrub_parity_mark_sectors_error(sblock->sparity,
2401 start, end - start);
2402 }
2140} 2403}
2141 2404
2142static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, 2405static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
@@ -2228,6 +2491,132 @@ behind_scrub_pages:
2228 return 0; 2491 return 0;
2229} 2492}
2230 2493
2494static int scrub_pages_for_parity(struct scrub_parity *sparity,
2495 u64 logical, u64 len,
2496 u64 physical, struct btrfs_device *dev,
2497 u64 flags, u64 gen, int mirror_num, u8 *csum)
2498{
2499 struct scrub_ctx *sctx = sparity->sctx;
2500 struct scrub_block *sblock;
2501 int index;
2502
2503 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
2504 if (!sblock) {
2505 spin_lock(&sctx->stat_lock);
2506 sctx->stat.malloc_errors++;
2507 spin_unlock(&sctx->stat_lock);
2508 return -ENOMEM;
2509 }
2510
2511 /* one ref inside this function, plus one for each page added to
2512 * a bio later on */
2513 atomic_set(&sblock->ref_count, 1);
2514 sblock->sctx = sctx;
2515 sblock->no_io_error_seen = 1;
2516 sblock->sparity = sparity;
2517 scrub_parity_get(sparity);
2518
2519 for (index = 0; len > 0; index++) {
2520 struct scrub_page *spage;
2521 u64 l = min_t(u64, len, PAGE_SIZE);
2522
2523 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2524 if (!spage) {
2525leave_nomem:
2526 spin_lock(&sctx->stat_lock);
2527 sctx->stat.malloc_errors++;
2528 spin_unlock(&sctx->stat_lock);
2529 scrub_block_put(sblock);
2530 return -ENOMEM;
2531 }
2532 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2533 /* For scrub block */
2534 scrub_page_get(spage);
2535 sblock->pagev[index] = spage;
2536 /* For scrub parity */
2537 scrub_page_get(spage);
2538 list_add_tail(&spage->list, &sparity->spages);
2539 spage->sblock = sblock;
2540 spage->dev = dev;
2541 spage->flags = flags;
2542 spage->generation = gen;
2543 spage->logical = logical;
2544 spage->physical = physical;
2545 spage->mirror_num = mirror_num;
2546 if (csum) {
2547 spage->have_csum = 1;
2548 memcpy(spage->csum, csum, sctx->csum_size);
2549 } else {
2550 spage->have_csum = 0;
2551 }
2552 sblock->page_count++;
2553 spage->page = alloc_page(GFP_NOFS);
2554 if (!spage->page)
2555 goto leave_nomem;
2556 len -= l;
2557 logical += l;
2558 physical += l;
2559 }
2560
2561 WARN_ON(sblock->page_count == 0);
2562 for (index = 0; index < sblock->page_count; index++) {
2563 struct scrub_page *spage = sblock->pagev[index];
2564 int ret;
2565
2566 ret = scrub_add_page_to_rd_bio(sctx, spage);
2567 if (ret) {
2568 scrub_block_put(sblock);
2569 return ret;
2570 }
2571 }
2572
2573 /* last one frees, either here or in bio completion for last page */
2574 scrub_block_put(sblock);
2575 return 0;
2576}
2577
2578static int scrub_extent_for_parity(struct scrub_parity *sparity,
2579 u64 logical, u64 len,
2580 u64 physical, struct btrfs_device *dev,
2581 u64 flags, u64 gen, int mirror_num)
2582{
2583 struct scrub_ctx *sctx = sparity->sctx;
2584 int ret;
2585 u8 csum[BTRFS_CSUM_SIZE];
2586 u32 blocksize;
2587
2588 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2589 blocksize = sctx->sectorsize;
2590 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2591 blocksize = sctx->nodesize;
2592 } else {
2593 blocksize = sctx->sectorsize;
2594 WARN_ON(1);
2595 }
2596
2597 while (len) {
2598 u64 l = min_t(u64, len, blocksize);
2599 int have_csum = 0;
2600
2601 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2602 /* push csums to sbio */
2603 have_csum = scrub_find_csum(sctx, logical, l, csum);
2604 if (have_csum == 0)
2605 goto skip;
2606 }
2607 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2608 flags, gen, mirror_num,
2609 have_csum ? csum : NULL);
2610skip:
2611 if (ret)
2612 return ret;
2613 len -= l;
2614 logical += l;
2615 physical += l;
2616 }
2617 return 0;
2618}
2619
2231/* 2620/*
2232 * Given a physical address, this will calculate it's 2621 * Given a physical address, this will calculate it's
2233 * logical offset. if this is a parity stripe, it will return 2622 * logical offset. if this is a parity stripe, it will return
@@ -2236,7 +2625,8 @@ behind_scrub_pages:
2236 * return 0 if it is a data stripe, 1 means parity stripe. 2625 * return 0 if it is a data stripe, 1 means parity stripe.
2237 */ 2626 */
2238static int get_raid56_logic_offset(u64 physical, int num, 2627static int get_raid56_logic_offset(u64 physical, int num,
2239 struct map_lookup *map, u64 *offset) 2628 struct map_lookup *map, u64 *offset,
2629 u64 *stripe_start)
2240{ 2630{
2241 int i; 2631 int i;
2242 int j = 0; 2632 int j = 0;
@@ -2247,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num,
2247 2637
2248 last_offset = (physical - map->stripes[num].physical) * 2638 last_offset = (physical - map->stripes[num].physical) *
2249 nr_data_stripes(map); 2639 nr_data_stripes(map);
2640 if (stripe_start)
2641 *stripe_start = last_offset;
2642
2250 *offset = last_offset; 2643 *offset = last_offset;
2251 for (i = 0; i < nr_data_stripes(map); i++) { 2644 for (i = 0; i < nr_data_stripes(map); i++) {
2252 *offset = last_offset + i * map->stripe_len; 2645 *offset = last_offset + i * map->stripe_len;
@@ -2269,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num,
2269 return 1; 2662 return 1;
2270} 2663}
2271 2664
2665static void scrub_free_parity(struct scrub_parity *sparity)
2666{
2667 struct scrub_ctx *sctx = sparity->sctx;
2668 struct scrub_page *curr, *next;
2669 int nbits;
2670
2671 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2672 if (nbits) {
2673 spin_lock(&sctx->stat_lock);
2674 sctx->stat.read_errors += nbits;
2675 sctx->stat.uncorrectable_errors += nbits;
2676 spin_unlock(&sctx->stat_lock);
2677 }
2678
2679 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2680 list_del_init(&curr->list);
2681 scrub_page_put(curr);
2682 }
2683
2684 kfree(sparity);
2685}
2686
2687static void scrub_parity_bio_endio(struct bio *bio, int error)
2688{
2689 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2690 struct scrub_ctx *sctx = sparity->sctx;
2691
2692 if (error)
2693 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2694 sparity->nsectors);
2695
2696 scrub_free_parity(sparity);
2697 scrub_pending_bio_dec(sctx);
2698 bio_put(bio);
2699}
2700
2701static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2702{
2703 struct scrub_ctx *sctx = sparity->sctx;
2704 struct bio *bio;
2705 struct btrfs_raid_bio *rbio;
2706 struct scrub_page *spage;
2707 struct btrfs_bio *bbio = NULL;
2708 u64 *raid_map = NULL;
2709 u64 length;
2710 int ret;
2711
2712 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2713 sparity->nsectors))
2714 goto out;
2715
2716 length = sparity->logic_end - sparity->logic_start + 1;
2717 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
2718 sparity->logic_start,
2719 &length, &bbio, 0, &raid_map);
2720 if (ret || !bbio || !raid_map)
2721 goto bbio_out;
2722
2723 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
2724 if (!bio)
2725 goto bbio_out;
2726
2727 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2728 bio->bi_private = sparity;
2729 bio->bi_end_io = scrub_parity_bio_endio;
2730
2731 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
2732 raid_map, length,
2733 sparity->scrub_dev,
2734 sparity->dbitmap,
2735 sparity->nsectors);
2736 if (!rbio)
2737 goto rbio_out;
2738
2739 list_for_each_entry(spage, &sparity->spages, list)
2740 raid56_parity_add_scrub_pages(rbio, spage->page,
2741 spage->logical);
2742
2743 scrub_pending_bio_inc(sctx);
2744 raid56_parity_submit_scrub_rbio(rbio);
2745 return;
2746
2747rbio_out:
2748 bio_put(bio);
2749bbio_out:
2750 kfree(bbio);
2751 kfree(raid_map);
2752 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2753 sparity->nsectors);
2754 spin_lock(&sctx->stat_lock);
2755 sctx->stat.malloc_errors++;
2756 spin_unlock(&sctx->stat_lock);
2757out:
2758 scrub_free_parity(sparity);
2759}
2760
2761static inline int scrub_calc_parity_bitmap_len(int nsectors)
2762{
2763 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
2764}
2765
2766static void scrub_parity_get(struct scrub_parity *sparity)
2767{
2768 atomic_inc(&sparity->ref_count);
2769}
2770
2771static void scrub_parity_put(struct scrub_parity *sparity)
2772{
2773 if (!atomic_dec_and_test(&sparity->ref_count))
2774 return;
2775
2776 scrub_parity_check_and_repair(sparity);
2777}
2778
2779static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2780 struct map_lookup *map,
2781 struct btrfs_device *sdev,
2782 struct btrfs_path *path,
2783 u64 logic_start,
2784 u64 logic_end)
2785{
2786 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2787 struct btrfs_root *root = fs_info->extent_root;
2788 struct btrfs_root *csum_root = fs_info->csum_root;
2789 struct btrfs_extent_item *extent;
2790 u64 flags;
2791 int ret;
2792 int slot;
2793 struct extent_buffer *l;
2794 struct btrfs_key key;
2795 u64 generation;
2796 u64 extent_logical;
2797 u64 extent_physical;
2798 u64 extent_len;
2799 struct btrfs_device *extent_dev;
2800 struct scrub_parity *sparity;
2801 int nsectors;
2802 int bitmap_len;
2803 int extent_mirror_num;
2804 int stop_loop = 0;
2805
2806 nsectors = map->stripe_len / root->sectorsize;
2807 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2808 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2809 GFP_NOFS);
2810 if (!sparity) {
2811 spin_lock(&sctx->stat_lock);
2812 sctx->stat.malloc_errors++;
2813 spin_unlock(&sctx->stat_lock);
2814 return -ENOMEM;
2815 }
2816
2817 sparity->stripe_len = map->stripe_len;
2818 sparity->nsectors = nsectors;
2819 sparity->sctx = sctx;
2820 sparity->scrub_dev = sdev;
2821 sparity->logic_start = logic_start;
2822 sparity->logic_end = logic_end;
2823 atomic_set(&sparity->ref_count, 1);
2824 INIT_LIST_HEAD(&sparity->spages);
2825 sparity->dbitmap = sparity->bitmap;
2826 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2827
2828 ret = 0;
2829 while (logic_start < logic_end) {
2830 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2831 key.type = BTRFS_METADATA_ITEM_KEY;
2832 else
2833 key.type = BTRFS_EXTENT_ITEM_KEY;
2834 key.objectid = logic_start;
2835 key.offset = (u64)-1;
2836
2837 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2838 if (ret < 0)
2839 goto out;
2840
2841 if (ret > 0) {
2842 ret = btrfs_previous_extent_item(root, path, 0);
2843 if (ret < 0)
2844 goto out;
2845 if (ret > 0) {
2846 btrfs_release_path(path);
2847 ret = btrfs_search_slot(NULL, root, &key,
2848 path, 0, 0);
2849 if (ret < 0)
2850 goto out;
2851 }
2852 }
2853
2854 stop_loop = 0;
2855 while (1) {
2856 u64 bytes;
2857
2858 l = path->nodes[0];
2859 slot = path->slots[0];
2860 if (slot >= btrfs_header_nritems(l)) {
2861 ret = btrfs_next_leaf(root, path);
2862 if (ret == 0)
2863 continue;
2864 if (ret < 0)
2865 goto out;
2866
2867 stop_loop = 1;
2868 break;
2869 }
2870 btrfs_item_key_to_cpu(l, &key, slot);
2871
2872 if (key.type == BTRFS_METADATA_ITEM_KEY)
2873 bytes = root->nodesize;
2874 else
2875 bytes = key.offset;
2876
2877 if (key.objectid + bytes <= logic_start)
2878 goto next;
2879
2880 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2881 key.type != BTRFS_METADATA_ITEM_KEY)
2882 goto next;
2883
2884 if (key.objectid > logic_end) {
2885 stop_loop = 1;
2886 break;
2887 }
2888
2889 while (key.objectid >= logic_start + map->stripe_len)
2890 logic_start += map->stripe_len;
2891
2892 extent = btrfs_item_ptr(l, slot,
2893 struct btrfs_extent_item);
2894 flags = btrfs_extent_flags(l, extent);
2895 generation = btrfs_extent_generation(l, extent);
2896
2897 if (key.objectid < logic_start &&
2898 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2899 btrfs_err(fs_info,
2900 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2901 key.objectid, logic_start);
2902 goto next;
2903 }
2904again:
2905 extent_logical = key.objectid;
2906 extent_len = bytes;
2907
2908 if (extent_logical < logic_start) {
2909 extent_len -= logic_start - extent_logical;
2910 extent_logical = logic_start;
2911 }
2912
2913 if (extent_logical + extent_len >
2914 logic_start + map->stripe_len)
2915 extent_len = logic_start + map->stripe_len -
2916 extent_logical;
2917
2918 scrub_parity_mark_sectors_data(sparity, extent_logical,
2919 extent_len);
2920
2921 scrub_remap_extent(fs_info, extent_logical,
2922 extent_len, &extent_physical,
2923 &extent_dev,
2924 &extent_mirror_num);
2925
2926 ret = btrfs_lookup_csums_range(csum_root,
2927 extent_logical,
2928 extent_logical + extent_len - 1,
2929 &sctx->csum_list, 1);
2930 if (ret)
2931 goto out;
2932
2933 ret = scrub_extent_for_parity(sparity, extent_logical,
2934 extent_len,
2935 extent_physical,
2936 extent_dev, flags,
2937 generation,
2938 extent_mirror_num);
2939 if (ret)
2940 goto out;
2941
2942 scrub_free_csums(sctx);
2943 if (extent_logical + extent_len <
2944 key.objectid + bytes) {
2945 logic_start += map->stripe_len;
2946
2947 if (logic_start >= logic_end) {
2948 stop_loop = 1;
2949 break;
2950 }
2951
2952 if (logic_start < key.objectid + bytes) {
2953 cond_resched();
2954 goto again;
2955 }
2956 }
2957next:
2958 path->slots[0]++;
2959 }
2960
2961 btrfs_release_path(path);
2962
2963 if (stop_loop)
2964 break;
2965
2966 logic_start += map->stripe_len;
2967 }
2968out:
2969 if (ret < 0)
2970 scrub_parity_mark_sectors_error(sparity, logic_start,
2971 logic_end - logic_start + 1);
2972 scrub_parity_put(sparity);
2973 scrub_submit(sctx);
2974 mutex_lock(&sctx->wr_ctx.wr_lock);
2975 scrub_wr_submit(sctx);
2976 mutex_unlock(&sctx->wr_ctx.wr_lock);
2977
2978 btrfs_release_path(path);
2979 return ret < 0 ? ret : 0;
2980}
2981
2272static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, 2982static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2273 struct map_lookup *map, 2983 struct map_lookup *map,
2274 struct btrfs_device *scrub_dev, 2984 struct btrfs_device *scrub_dev,
2275 int num, u64 base, u64 length, 2985 int num, u64 base, u64 length,
2276 int is_dev_replace) 2986 int is_dev_replace)
2277{ 2987{
2278 struct btrfs_path *path; 2988 struct btrfs_path *path, *ppath;
2279 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; 2989 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2280 struct btrfs_root *root = fs_info->extent_root; 2990 struct btrfs_root *root = fs_info->extent_root;
2281 struct btrfs_root *csum_root = fs_info->csum_root; 2991 struct btrfs_root *csum_root = fs_info->csum_root;
@@ -2302,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2302 u64 extent_logical; 3012 u64 extent_logical;
2303 u64 extent_physical; 3013 u64 extent_physical;
2304 u64 extent_len; 3014 u64 extent_len;
3015 u64 stripe_logical;
3016 u64 stripe_end;
2305 struct btrfs_device *extent_dev; 3017 struct btrfs_device *extent_dev;
2306 int extent_mirror_num; 3018 int extent_mirror_num;
2307 int stop_loop = 0; 3019 int stop_loop = 0;
@@ -2327,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2327 mirror_num = num % map->num_stripes + 1; 3039 mirror_num = num % map->num_stripes + 1;
2328 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3040 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2329 BTRFS_BLOCK_GROUP_RAID6)) { 3041 BTRFS_BLOCK_GROUP_RAID6)) {
2330 get_raid56_logic_offset(physical, num, map, &offset); 3042 get_raid56_logic_offset(physical, num, map, &offset, NULL);
2331 increment = map->stripe_len * nr_data_stripes(map); 3043 increment = map->stripe_len * nr_data_stripes(map);
2332 mirror_num = 1; 3044 mirror_num = 1;
2333 } else { 3045 } else {
@@ -2339,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2339 if (!path) 3051 if (!path)
2340 return -ENOMEM; 3052 return -ENOMEM;
2341 3053
3054 ppath = btrfs_alloc_path();
3055 if (!ppath) {
3056 btrfs_free_path(ppath);
3057 return -ENOMEM;
3058 }
3059
2342 /* 3060 /*
2343 * work on commit root. The related disk blocks are static as 3061 * work on commit root. The related disk blocks are static as
2344 * long as COW is applied. This means, it is save to rewrite 3062 * long as COW is applied. This means, it is save to rewrite
@@ -2357,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2357 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3075 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2358 BTRFS_BLOCK_GROUP_RAID6)) { 3076 BTRFS_BLOCK_GROUP_RAID6)) {
2359 get_raid56_logic_offset(physical_end, num, 3077 get_raid56_logic_offset(physical_end, num,
2360 map, &logic_end); 3078 map, &logic_end, NULL);
2361 logic_end += base; 3079 logic_end += base;
2362 } else { 3080 } else {
2363 logic_end = logical + increment * nstripes; 3081 logic_end = logical + increment * nstripes;
@@ -2404,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2404 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 3122 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2405 BTRFS_BLOCK_GROUP_RAID6)) { 3123 BTRFS_BLOCK_GROUP_RAID6)) {
2406 ret = get_raid56_logic_offset(physical, num, 3124 ret = get_raid56_logic_offset(physical, num,
2407 map, &logical); 3125 map, &logical, &stripe_logical);
2408 logical += base; 3126 logical += base;
2409 if (ret) 3127 if (ret) {
3128 stripe_logical += base;
3129 stripe_end = stripe_logical + increment - 1;
3130 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3131 ppath, stripe_logical,
3132 stripe_end);
3133 if (ret)
3134 goto out;
2410 goto skip; 3135 goto skip;
3136 }
2411 } 3137 }
2412 /* 3138 /*
2413 * canceled? 3139 * canceled?
@@ -2558,13 +3284,25 @@ again:
2558 * loop until we find next data stripe 3284 * loop until we find next data stripe
2559 * or we have finished all stripes. 3285 * or we have finished all stripes.
2560 */ 3286 */
2561 do { 3287loop:
2562 physical += map->stripe_len; 3288 physical += map->stripe_len;
2563 ret = get_raid56_logic_offset( 3289 ret = get_raid56_logic_offset(physical,
2564 physical, num, 3290 num, map, &logical,
2565 map, &logical); 3291 &stripe_logical);
2566 logical += base; 3292 logical += base;
2567 } while (physical < physical_end && ret); 3293
3294 if (ret && physical < physical_end) {
3295 stripe_logical += base;
3296 stripe_end = stripe_logical +
3297 increment - 1;
3298 ret = scrub_raid56_parity(sctx,
3299 map, scrub_dev, ppath,
3300 stripe_logical,
3301 stripe_end);
3302 if (ret)
3303 goto out;
3304 goto loop;
3305 }
2568 } else { 3306 } else {
2569 physical += map->stripe_len; 3307 physical += map->stripe_len;
2570 logical += increment; 3308 logical += increment;
@@ -2605,6 +3343,7 @@ out:
2605 3343
2606 blk_finish_plug(&plug); 3344 blk_finish_plug(&plug);
2607 btrfs_free_path(path); 3345 btrfs_free_path(path);
3346 btrfs_free_path(ppath);
2608 return ret < 0 ? ret : 0; 3347 return ret < 0 ? ret : 0;
2609} 3348}
2610 3349
@@ -3310,6 +4049,50 @@ out:
3310 scrub_pending_trans_workers_dec(sctx); 4049 scrub_pending_trans_workers_dec(sctx);
3311} 4050}
3312 4051
4052static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
4053 u64 logical)
4054{
4055 struct extent_state *cached_state = NULL;
4056 struct btrfs_ordered_extent *ordered;
4057 struct extent_io_tree *io_tree;
4058 struct extent_map *em;
4059 u64 lockstart = start, lockend = start + len - 1;
4060 int ret = 0;
4061
4062 io_tree = &BTRFS_I(inode)->io_tree;
4063
4064 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
4065 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
4066 if (ordered) {
4067 btrfs_put_ordered_extent(ordered);
4068 ret = 1;
4069 goto out_unlock;
4070 }
4071
4072 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
4073 if (IS_ERR(em)) {
4074 ret = PTR_ERR(em);
4075 goto out_unlock;
4076 }
4077
4078 /*
4079 * This extent does not actually cover the logical extent anymore,
4080 * move on to the next inode.
4081 */
4082 if (em->block_start > logical ||
4083 em->block_start + em->block_len < logical + len) {
4084 free_extent_map(em);
4085 ret = 1;
4086 goto out_unlock;
4087 }
4088 free_extent_map(em);
4089
4090out_unlock:
4091 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
4092 GFP_NOFS);
4093 return ret;
4094}
4095
3313static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, 4096static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3314 struct scrub_copy_nocow_ctx *nocow_ctx) 4097 struct scrub_copy_nocow_ctx *nocow_ctx)
3315{ 4098{
@@ -3318,13 +4101,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3318 struct inode *inode; 4101 struct inode *inode;
3319 struct page *page; 4102 struct page *page;
3320 struct btrfs_root *local_root; 4103 struct btrfs_root *local_root;
3321 struct btrfs_ordered_extent *ordered;
3322 struct extent_map *em;
3323 struct extent_state *cached_state = NULL;
3324 struct extent_io_tree *io_tree; 4104 struct extent_io_tree *io_tree;
3325 u64 physical_for_dev_replace; 4105 u64 physical_for_dev_replace;
4106 u64 nocow_ctx_logical;
3326 u64 len = nocow_ctx->len; 4107 u64 len = nocow_ctx->len;
3327 u64 lockstart = offset, lockend = offset + len - 1;
3328 unsigned long index; 4108 unsigned long index;
3329 int srcu_index; 4109 int srcu_index;
3330 int ret = 0; 4110 int ret = 0;
@@ -3356,30 +4136,13 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3356 4136
3357 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; 4137 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3358 io_tree = &BTRFS_I(inode)->io_tree; 4138 io_tree = &BTRFS_I(inode)->io_tree;
4139 nocow_ctx_logical = nocow_ctx->logical;
3359 4140
3360 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); 4141 ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
3361 ordered = btrfs_lookup_ordered_range(inode, lockstart, len); 4142 if (ret) {
3362 if (ordered) { 4143 ret = ret > 0 ? 0 : ret;
3363 btrfs_put_ordered_extent(ordered); 4144 goto out;
3364 goto out_unlock;
3365 }
3366
3367 em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
3368 if (IS_ERR(em)) {
3369 ret = PTR_ERR(em);
3370 goto out_unlock;
3371 }
3372
3373 /*
3374 * This extent does not actually cover the logical extent anymore,
3375 * move on to the next inode.
3376 */
3377 if (em->block_start > nocow_ctx->logical ||
3378 em->block_start + em->block_len < nocow_ctx->logical + len) {
3379 free_extent_map(em);
3380 goto out_unlock;
3381 } 4145 }
3382 free_extent_map(em);
3383 4146
3384 while (len >= PAGE_CACHE_SIZE) { 4147 while (len >= PAGE_CACHE_SIZE) {
3385 index = offset >> PAGE_CACHE_SHIFT; 4148 index = offset >> PAGE_CACHE_SHIFT;
@@ -3396,7 +4159,7 @@ again:
3396 goto next_page; 4159 goto next_page;
3397 } else { 4160 } else {
3398 ClearPageError(page); 4161 ClearPageError(page);
3399 err = extent_read_full_page_nolock(io_tree, page, 4162 err = extent_read_full_page(io_tree, page,
3400 btrfs_get_extent, 4163 btrfs_get_extent,
3401 nocow_ctx->mirror_num); 4164 nocow_ctx->mirror_num);
3402 if (err) { 4165 if (err) {
@@ -3421,6 +4184,14 @@ again:
3421 goto next_page; 4184 goto next_page;
3422 } 4185 }
3423 } 4186 }
4187
4188 ret = check_extent_to_block(inode, offset, len,
4189 nocow_ctx_logical);
4190 if (ret) {
4191 ret = ret > 0 ? 0 : ret;
4192 goto next_page;
4193 }
4194
3424 err = write_page_nocow(nocow_ctx->sctx, 4195 err = write_page_nocow(nocow_ctx->sctx,
3425 physical_for_dev_replace, page); 4196 physical_for_dev_replace, page);
3426 if (err) 4197 if (err)
@@ -3434,12 +4205,10 @@ next_page:
3434 4205
3435 offset += PAGE_CACHE_SIZE; 4206 offset += PAGE_CACHE_SIZE;
3436 physical_for_dev_replace += PAGE_CACHE_SIZE; 4207 physical_for_dev_replace += PAGE_CACHE_SIZE;
4208 nocow_ctx_logical += PAGE_CACHE_SIZE;
3437 len -= PAGE_CACHE_SIZE; 4209 len -= PAGE_CACHE_SIZE;
3438 } 4210 }
3439 ret = COPY_COMPLETE; 4211 ret = COPY_COMPLETE;
3440out_unlock:
3441 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
3442 GFP_NOFS);
3443out: 4212out:
3444 mutex_unlock(&inode->i_mutex); 4213 mutex_unlock(&inode->i_mutex);
3445 iput(inode); 4214 iput(inode);