aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
authorStefan Behrens <sbehrens@giantdisaster.de>2012-11-02 09:58:04 -0400
committerJosef Bacik <jbacik@fusionio.com>2012-12-12 17:15:30 -0500
commit7a9e9987681198c56ac7f165725ca322d7a196e1 (patch)
tree352517134c77a7f0fc64579198326bd73ecb4ae3 /fs/btrfs/scrub.c
parenta36cf8b8933e4a7a7f2f2cbc3c70b097e97f7fd1 (diff)
Btrfs: make the scrub page array dynamically allocated
With the modified design (in order to support the devive replace procedure) it is necessary to alloc the page array dynamically. The reason is that pages are reused. At first a page is used for the bio to read the data from the filesystem, then the same page is reused for the bio that writes the data to the target disk. Since the read process and the write process are completely decoupled, this requires a new concept of refcounts and get/put functions for pages, and it requires to use newly created pages for each read bio which are freed after the write operation is finished. Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c195
1 files changed, 121 insertions, 74 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 822c08a420c2..15ac82ae5770 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -46,6 +46,12 @@ struct scrub_ctx;
46 46
47#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ 47#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
48#define SCRUB_BIOS_PER_CTX 16 /* 1 MB per device in flight */ 48#define SCRUB_BIOS_PER_CTX 16 /* 1 MB per device in flight */
49
50/*
51 * the following value times PAGE_SIZE needs to be large enough to match the
52 * largest node/leaf/sector size that shall be supported.
53 * Values larger than BTRFS_STRIPE_LEN are not supported.
54 */
49#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ 55#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
50 56
51struct scrub_page { 57struct scrub_page {
@@ -56,6 +62,7 @@ struct scrub_page {
56 u64 generation; 62 u64 generation;
57 u64 logical; 63 u64 logical;
58 u64 physical; 64 u64 physical;
65 atomic_t ref_count;
59 struct { 66 struct {
60 unsigned int mirror_num:8; 67 unsigned int mirror_num:8;
61 unsigned int have_csum:1; 68 unsigned int have_csum:1;
@@ -79,7 +86,7 @@ struct scrub_bio {
79}; 86};
80 87
81struct scrub_block { 88struct scrub_block {
82 struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK]; 89 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
83 int page_count; 90 int page_count;
84 atomic_t outstanding_pages; 91 atomic_t outstanding_pages;
85 atomic_t ref_count; /* free mem on transition to zero */ 92 atomic_t ref_count; /* free mem on transition to zero */
@@ -165,6 +172,8 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock);
165static int scrub_checksum_super(struct scrub_block *sblock); 172static int scrub_checksum_super(struct scrub_block *sblock);
166static void scrub_block_get(struct scrub_block *sblock); 173static void scrub_block_get(struct scrub_block *sblock);
167static void scrub_block_put(struct scrub_block *sblock); 174static void scrub_block_put(struct scrub_block *sblock);
175static void scrub_page_get(struct scrub_page *spage);
176static void scrub_page_put(struct scrub_page *spage);
168static int scrub_add_page_to_bio(struct scrub_ctx *sctx, 177static int scrub_add_page_to_bio(struct scrub_ctx *sctx,
169 struct scrub_page *spage); 178 struct scrub_page *spage);
170static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 179static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
@@ -364,15 +373,15 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
364 int ret; 373 int ret;
365 374
366 WARN_ON(sblock->page_count < 1); 375 WARN_ON(sblock->page_count < 1);
367 dev = sblock->pagev[0].dev; 376 dev = sblock->pagev[0]->dev;
368 fs_info = sblock->sctx->dev_root->fs_info; 377 fs_info = sblock->sctx->dev_root->fs_info;
369 378
370 path = btrfs_alloc_path(); 379 path = btrfs_alloc_path();
371 380
372 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); 381 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
373 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); 382 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
374 swarn.sector = (sblock->pagev[0].physical) >> 9; 383 swarn.sector = (sblock->pagev[0]->physical) >> 9;
375 swarn.logical = sblock->pagev[0].logical; 384 swarn.logical = sblock->pagev[0]->logical;
376 swarn.errstr = errstr; 385 swarn.errstr = errstr;
377 swarn.dev = NULL; 386 swarn.dev = NULL;
378 swarn.msg_bufsize = bufsize; 387 swarn.msg_bufsize = bufsize;
@@ -642,15 +651,15 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
642 BUG_ON(sblock_to_check->page_count < 1); 651 BUG_ON(sblock_to_check->page_count < 1);
643 fs_info = sctx->dev_root->fs_info; 652 fs_info = sctx->dev_root->fs_info;
644 length = sblock_to_check->page_count * PAGE_SIZE; 653 length = sblock_to_check->page_count * PAGE_SIZE;
645 logical = sblock_to_check->pagev[0].logical; 654 logical = sblock_to_check->pagev[0]->logical;
646 generation = sblock_to_check->pagev[0].generation; 655 generation = sblock_to_check->pagev[0]->generation;
647 BUG_ON(sblock_to_check->pagev[0].mirror_num < 1); 656 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
648 failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1; 657 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
649 is_metadata = !(sblock_to_check->pagev[0].flags & 658 is_metadata = !(sblock_to_check->pagev[0]->flags &
650 BTRFS_EXTENT_FLAG_DATA); 659 BTRFS_EXTENT_FLAG_DATA);
651 have_csum = sblock_to_check->pagev[0].have_csum; 660 have_csum = sblock_to_check->pagev[0]->have_csum;
652 csum = sblock_to_check->pagev[0].csum; 661 csum = sblock_to_check->pagev[0]->csum;
653 dev = sblock_to_check->pagev[0].dev; 662 dev = sblock_to_check->pagev[0]->dev;
654 663
655 /* 664 /*
656 * read all mirrors one after the other. This includes to 665 * read all mirrors one after the other. This includes to
@@ -892,7 +901,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
892 901
893 success = 1; 902 success = 1;
894 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { 903 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
895 struct scrub_page *page_bad = sblock_bad->pagev + page_num; 904 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
896 905
897 if (!page_bad->io_error) 906 if (!page_bad->io_error)
898 continue; 907 continue;
@@ -903,8 +912,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
903 mirror_index++) { 912 mirror_index++) {
904 struct scrub_block *sblock_other = sblocks_for_recheck + 913 struct scrub_block *sblock_other = sblocks_for_recheck +
905 mirror_index; 914 mirror_index;
906 struct scrub_page *page_other = sblock_other->pagev + 915 struct scrub_page *page_other = sblock_other->pagev[
907 page_num; 916 page_num];
908 917
909 if (!page_other->io_error) { 918 if (!page_other->io_error) {
910 ret = scrub_repair_page_from_good_copy( 919 ret = scrub_repair_page_from_good_copy(
@@ -971,11 +980,11 @@ out:
971 mirror_index; 980 mirror_index;
972 int page_index; 981 int page_index;
973 982
974 for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO; 983 for (page_index = 0; page_index < sblock->page_count;
975 page_index++) 984 page_index++) {
976 if (sblock->pagev[page_index].page) 985 sblock->pagev[page_index]->sblock = NULL;
977 __free_page( 986 scrub_page_put(sblock->pagev[page_index]);
978 sblock->pagev[page_index].page); 987 }
979 } 988 }
980 kfree(sblocks_for_recheck); 989 kfree(sblocks_for_recheck);
981 } 990 }
@@ -993,7 +1002,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
993 int ret; 1002 int ret;
994 1003
995 /* 1004 /*
996 * note: the three members sctx, ref_count and outstanding_pages 1005 * note: the two members ref_count and outstanding_pages
997 * are not used (and not set) in the blocks that are used for 1006 * are not used (and not set) in the blocks that are used for
998 * the recheck procedure 1007 * the recheck procedure
999 */ 1008 */
@@ -1025,21 +1034,27 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1025 continue; 1034 continue;
1026 1035
1027 sblock = sblocks_for_recheck + mirror_index; 1036 sblock = sblocks_for_recheck + mirror_index;
1028 page = sblock->pagev + page_index; 1037 sblock->sctx = sctx;
1029 page->logical = logical; 1038 page = kzalloc(sizeof(*page), GFP_NOFS);
1030 page->physical = bbio->stripes[mirror_index].physical; 1039 if (!page) {
1031 /* for missing devices, dev->bdev is NULL */ 1040leave_nomem:
1032 page->dev = bbio->stripes[mirror_index].dev;
1033 page->mirror_num = mirror_index + 1;
1034 page->page = alloc_page(GFP_NOFS);
1035 if (!page->page) {
1036 spin_lock(&sctx->stat_lock); 1041 spin_lock(&sctx->stat_lock);
1037 sctx->stat.malloc_errors++; 1042 sctx->stat.malloc_errors++;
1038 spin_unlock(&sctx->stat_lock); 1043 spin_unlock(&sctx->stat_lock);
1039 kfree(bbio); 1044 kfree(bbio);
1040 return -ENOMEM; 1045 return -ENOMEM;
1041 } 1046 }
1047 scrub_page_get(page);
1048 sblock->pagev[page_index] = page;
1049 page->logical = logical;
1050 page->physical = bbio->stripes[mirror_index].physical;
1051 /* for missing devices, dev->bdev is NULL */
1052 page->dev = bbio->stripes[mirror_index].dev;
1053 page->mirror_num = mirror_index + 1;
1042 sblock->page_count++; 1054 sblock->page_count++;
1055 page->page = alloc_page(GFP_NOFS);
1056 if (!page->page)
1057 goto leave_nomem;
1043 } 1058 }
1044 kfree(bbio); 1059 kfree(bbio);
1045 length -= sublen; 1060 length -= sublen;
@@ -1071,7 +1086,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1071 for (page_num = 0; page_num < sblock->page_count; page_num++) { 1086 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1072 struct bio *bio; 1087 struct bio *bio;
1073 int ret; 1088 int ret;
1074 struct scrub_page *page = sblock->pagev + page_num; 1089 struct scrub_page *page = sblock->pagev[page_num];
1075 DECLARE_COMPLETION_ONSTACK(complete); 1090 DECLARE_COMPLETION_ONSTACK(complete);
1076 1091
1077 if (page->dev->bdev == NULL) { 1092 if (page->dev->bdev == NULL) {
@@ -1080,7 +1095,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1080 continue; 1095 continue;
1081 } 1096 }
1082 1097
1083 BUG_ON(!page->page); 1098 WARN_ON(!page->page);
1084 bio = bio_alloc(GFP_NOFS, 1); 1099 bio = bio_alloc(GFP_NOFS, 1);
1085 if (!bio) 1100 if (!bio)
1086 return -EIO; 1101 return -EIO;
@@ -1125,14 +1140,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1125 struct btrfs_root *root = fs_info->extent_root; 1140 struct btrfs_root *root = fs_info->extent_root;
1126 void *mapped_buffer; 1141 void *mapped_buffer;
1127 1142
1128 BUG_ON(!sblock->pagev[0].page); 1143 WARN_ON(!sblock->pagev[0]->page);
1129 if (is_metadata) { 1144 if (is_metadata) {
1130 struct btrfs_header *h; 1145 struct btrfs_header *h;
1131 1146
1132 mapped_buffer = kmap_atomic(sblock->pagev[0].page); 1147 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1133 h = (struct btrfs_header *)mapped_buffer; 1148 h = (struct btrfs_header *)mapped_buffer;
1134 1149
1135 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || 1150 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
1136 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || 1151 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1137 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, 1152 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1138 BTRFS_UUID_SIZE)) { 1153 BTRFS_UUID_SIZE)) {
@@ -1146,7 +1161,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1146 if (!have_csum) 1161 if (!have_csum)
1147 return; 1162 return;
1148 1163
1149 mapped_buffer = kmap_atomic(sblock->pagev[0].page); 1164 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1150 } 1165 }
1151 1166
1152 for (page_num = 0;;) { 1167 for (page_num = 0;;) {
@@ -1162,9 +1177,9 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1162 page_num++; 1177 page_num++;
1163 if (page_num >= sblock->page_count) 1178 if (page_num >= sblock->page_count)
1164 break; 1179 break;
1165 BUG_ON(!sblock->pagev[page_num].page); 1180 WARN_ON(!sblock->pagev[page_num]->page);
1166 1181
1167 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page); 1182 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1168 } 1183 }
1169 1184
1170 btrfs_csum_final(crc, calculated_csum); 1185 btrfs_csum_final(crc, calculated_csum);
@@ -1202,11 +1217,11 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1202 struct scrub_block *sblock_good, 1217 struct scrub_block *sblock_good,
1203 int page_num, int force_write) 1218 int page_num, int force_write)
1204{ 1219{
1205 struct scrub_page *page_bad = sblock_bad->pagev + page_num; 1220 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1206 struct scrub_page *page_good = sblock_good->pagev + page_num; 1221 struct scrub_page *page_good = sblock_good->pagev[page_num];
1207 1222
1208 BUG_ON(sblock_bad->pagev[page_num].page == NULL); 1223 BUG_ON(page_bad->page == NULL);
1209 BUG_ON(sblock_good->pagev[page_num].page == NULL); 1224 BUG_ON(page_good->page == NULL);
1210 if (force_write || sblock_bad->header_error || 1225 if (force_write || sblock_bad->header_error ||
1211 sblock_bad->checksum_error || page_bad->io_error) { 1226 sblock_bad->checksum_error || page_bad->io_error) {
1212 struct bio *bio; 1227 struct bio *bio;
@@ -1247,8 +1262,8 @@ static void scrub_checksum(struct scrub_block *sblock)
1247 u64 flags; 1262 u64 flags;
1248 int ret; 1263 int ret;
1249 1264
1250 BUG_ON(sblock->page_count < 1); 1265 WARN_ON(sblock->page_count < 1);
1251 flags = sblock->pagev[0].flags; 1266 flags = sblock->pagev[0]->flags;
1252 ret = 0; 1267 ret = 0;
1253 if (flags & BTRFS_EXTENT_FLAG_DATA) 1268 if (flags & BTRFS_EXTENT_FLAG_DATA)
1254 ret = scrub_checksum_data(sblock); 1269 ret = scrub_checksum_data(sblock);
@@ -1276,11 +1291,11 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1276 int index; 1291 int index;
1277 1292
1278 BUG_ON(sblock->page_count < 1); 1293 BUG_ON(sblock->page_count < 1);
1279 if (!sblock->pagev[0].have_csum) 1294 if (!sblock->pagev[0]->have_csum)
1280 return 0; 1295 return 0;
1281 1296
1282 on_disk_csum = sblock->pagev[0].csum; 1297 on_disk_csum = sblock->pagev[0]->csum;
1283 page = sblock->pagev[0].page; 1298 page = sblock->pagev[0]->page;
1284 buffer = kmap_atomic(page); 1299 buffer = kmap_atomic(page);
1285 1300
1286 len = sctx->sectorsize; 1301 len = sctx->sectorsize;
@@ -1295,8 +1310,8 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1295 break; 1310 break;
1296 index++; 1311 index++;
1297 BUG_ON(index >= sblock->page_count); 1312 BUG_ON(index >= sblock->page_count);
1298 BUG_ON(!sblock->pagev[index].page); 1313 BUG_ON(!sblock->pagev[index]->page);
1299 page = sblock->pagev[index].page; 1314 page = sblock->pagev[index]->page;
1300 buffer = kmap_atomic(page); 1315 buffer = kmap_atomic(page);
1301 } 1316 }
1302 1317
@@ -1326,7 +1341,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1326 int index; 1341 int index;
1327 1342
1328 BUG_ON(sblock->page_count < 1); 1343 BUG_ON(sblock->page_count < 1);
1329 page = sblock->pagev[0].page; 1344 page = sblock->pagev[0]->page;
1330 mapped_buffer = kmap_atomic(page); 1345 mapped_buffer = kmap_atomic(page);
1331 h = (struct btrfs_header *)mapped_buffer; 1346 h = (struct btrfs_header *)mapped_buffer;
1332 memcpy(on_disk_csum, h->csum, sctx->csum_size); 1347 memcpy(on_disk_csum, h->csum, sctx->csum_size);
@@ -1337,10 +1352,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1337 * b) the page is already kmapped 1352 * b) the page is already kmapped
1338 */ 1353 */
1339 1354
1340 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr)) 1355 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
1341 ++fail; 1356 ++fail;
1342 1357
1343 if (sblock->pagev[0].generation != le64_to_cpu(h->generation)) 1358 if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
1344 ++fail; 1359 ++fail;
1345 1360
1346 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1361 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1365,8 +1380,8 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1365 break; 1380 break;
1366 index++; 1381 index++;
1367 BUG_ON(index >= sblock->page_count); 1382 BUG_ON(index >= sblock->page_count);
1368 BUG_ON(!sblock->pagev[index].page); 1383 BUG_ON(!sblock->pagev[index]->page);
1369 page = sblock->pagev[index].page; 1384 page = sblock->pagev[index]->page;
1370 mapped_buffer = kmap_atomic(page); 1385 mapped_buffer = kmap_atomic(page);
1371 mapped_size = PAGE_SIZE; 1386 mapped_size = PAGE_SIZE;
1372 p = mapped_buffer; 1387 p = mapped_buffer;
@@ -1398,15 +1413,15 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1398 int index; 1413 int index;
1399 1414
1400 BUG_ON(sblock->page_count < 1); 1415 BUG_ON(sblock->page_count < 1);
1401 page = sblock->pagev[0].page; 1416 page = sblock->pagev[0]->page;
1402 mapped_buffer = kmap_atomic(page); 1417 mapped_buffer = kmap_atomic(page);
1403 s = (struct btrfs_super_block *)mapped_buffer; 1418 s = (struct btrfs_super_block *)mapped_buffer;
1404 memcpy(on_disk_csum, s->csum, sctx->csum_size); 1419 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1405 1420
1406 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) 1421 if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
1407 ++fail_cor; 1422 ++fail_cor;
1408 1423
1409 if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) 1424 if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
1410 ++fail_gen; 1425 ++fail_gen;
1411 1426
1412 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1427 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1426,8 +1441,8 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1426 break; 1441 break;
1427 index++; 1442 index++;
1428 BUG_ON(index >= sblock->page_count); 1443 BUG_ON(index >= sblock->page_count);
1429 BUG_ON(!sblock->pagev[index].page); 1444 BUG_ON(!sblock->pagev[index]->page);
1430 page = sblock->pagev[index].page; 1445 page = sblock->pagev[index]->page;
1431 mapped_buffer = kmap_atomic(page); 1446 mapped_buffer = kmap_atomic(page);
1432 mapped_size = PAGE_SIZE; 1447 mapped_size = PAGE_SIZE;
1433 p = mapped_buffer; 1448 p = mapped_buffer;
@@ -1447,10 +1462,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1447 ++sctx->stat.super_errors; 1462 ++sctx->stat.super_errors;
1448 spin_unlock(&sctx->stat_lock); 1463 spin_unlock(&sctx->stat_lock);
1449 if (fail_cor) 1464 if (fail_cor)
1450 btrfs_dev_stat_inc_and_print(sblock->pagev[0].dev, 1465 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1451 BTRFS_DEV_STAT_CORRUPTION_ERRS); 1466 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1452 else 1467 else
1453 btrfs_dev_stat_inc_and_print(sblock->pagev[0].dev, 1468 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1454 BTRFS_DEV_STAT_GENERATION_ERRS); 1469 BTRFS_DEV_STAT_GENERATION_ERRS);
1455 } 1470 }
1456 1471
@@ -1468,12 +1483,25 @@ static void scrub_block_put(struct scrub_block *sblock)
1468 int i; 1483 int i;
1469 1484
1470 for (i = 0; i < sblock->page_count; i++) 1485 for (i = 0; i < sblock->page_count; i++)
1471 if (sblock->pagev[i].page) 1486 scrub_page_put(sblock->pagev[i]);
1472 __free_page(sblock->pagev[i].page);
1473 kfree(sblock); 1487 kfree(sblock);
1474 } 1488 }
1475} 1489}
1476 1490
1491static void scrub_page_get(struct scrub_page *spage)
1492{
1493 atomic_inc(&spage->ref_count);
1494}
1495
1496static void scrub_page_put(struct scrub_page *spage)
1497{
1498 if (atomic_dec_and_test(&spage->ref_count)) {
1499 if (spage->page)
1500 __free_page(spage->page);
1501 kfree(spage);
1502 }
1503}
1504
1477static void scrub_submit(struct scrub_ctx *sctx) 1505static void scrub_submit(struct scrub_ctx *sctx)
1478{ 1506{
1479 struct scrub_bio *sbio; 1507 struct scrub_bio *sbio;
@@ -1577,28 +1605,28 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
1577 return -ENOMEM; 1605 return -ENOMEM;
1578 } 1606 }
1579 1607
1580 /* one ref inside this function, plus one for each page later on */ 1608 /* one ref inside this function, plus one for each page added to
1609 * a bio later on */
1581 atomic_set(&sblock->ref_count, 1); 1610 atomic_set(&sblock->ref_count, 1);
1582 sblock->sctx = sctx; 1611 sblock->sctx = sctx;
1583 sblock->no_io_error_seen = 1; 1612 sblock->no_io_error_seen = 1;
1584 1613
1585 for (index = 0; len > 0; index++) { 1614 for (index = 0; len > 0; index++) {
1586 struct scrub_page *spage = sblock->pagev + index; 1615 struct scrub_page *spage;
1587 u64 l = min_t(u64, len, PAGE_SIZE); 1616 u64 l = min_t(u64, len, PAGE_SIZE);
1588 1617
1589 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); 1618 spage = kzalloc(sizeof(*spage), GFP_NOFS);
1590 spage->page = alloc_page(GFP_NOFS); 1619 if (!spage) {
1591 if (!spage->page) { 1620leave_nomem:
1592 spin_lock(&sctx->stat_lock); 1621 spin_lock(&sctx->stat_lock);
1593 sctx->stat.malloc_errors++; 1622 sctx->stat.malloc_errors++;
1594 spin_unlock(&sctx->stat_lock); 1623 spin_unlock(&sctx->stat_lock);
1595 while (index > 0) { 1624 scrub_block_put(sblock);
1596 index--;
1597 __free_page(sblock->pagev[index].page);
1598 }
1599 kfree(sblock);
1600 return -ENOMEM; 1625 return -ENOMEM;
1601 } 1626 }
1627 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
1628 scrub_page_get(spage);
1629 sblock->pagev[index] = spage;
1602 spage->sblock = sblock; 1630 spage->sblock = sblock;
1603 spage->dev = dev; 1631 spage->dev = dev;
1604 spage->flags = flags; 1632 spage->flags = flags;
@@ -1613,14 +1641,17 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
1613 spage->have_csum = 0; 1641 spage->have_csum = 0;
1614 } 1642 }
1615 sblock->page_count++; 1643 sblock->page_count++;
1644 spage->page = alloc_page(GFP_NOFS);
1645 if (!spage->page)
1646 goto leave_nomem;
1616 len -= l; 1647 len -= l;
1617 logical += l; 1648 logical += l;
1618 physical += l; 1649 physical += l;
1619 } 1650 }
1620 1651
1621 BUG_ON(sblock->page_count == 0); 1652 WARN_ON(sblock->page_count == 0);
1622 for (index = 0; index < sblock->page_count; index++) { 1653 for (index = 0; index < sblock->page_count; index++) {
1623 struct scrub_page *spage = sblock->pagev + index; 1654 struct scrub_page *spage = sblock->pagev[index];
1624 int ret; 1655 int ret;
1625 1656
1626 ret = scrub_add_page_to_bio(sctx, spage); 1657 ret = scrub_add_page_to_bio(sctx, spage);
@@ -2289,6 +2320,22 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
2289 return -EINVAL; 2320 return -EINVAL;
2290 } 2321 }
2291 2322
2323 if (fs_info->chunk_root->nodesize >
2324 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
2325 fs_info->chunk_root->sectorsize >
2326 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
2327 /*
2328 * would exhaust the array bounds of pagev member in
2329 * struct scrub_block
2330 */
2331 pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
2332 fs_info->chunk_root->nodesize,
2333 SCRUB_MAX_PAGES_PER_BLOCK,
2334 fs_info->chunk_root->sectorsize,
2335 SCRUB_MAX_PAGES_PER_BLOCK);
2336 return -EINVAL;
2337 }
2338
2292 ret = scrub_workers_get(root); 2339 ret = scrub_workers_get(root);
2293 if (ret) 2340 if (ret)
2294 return ret; 2341 return ret;