diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 200 |
1 files changed, 82 insertions, 118 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b9a53646ceb2..98ff4fbcb386 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include "btrfs_inode.h" | 31 | #include "btrfs_inode.h" |
32 | #include "volumes.h" | 32 | #include "volumes.h" |
33 | #include "print-tree.h" | 33 | #include "print-tree.h" |
34 | #include "async-thread.h" | ||
34 | 35 | ||
35 | #if 0 | 36 | #if 0 |
36 | static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) | 37 | static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) |
@@ -46,8 +47,7 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) | |||
46 | #endif | 47 | #endif |
47 | 48 | ||
48 | static struct extent_io_ops btree_extent_io_ops; | 49 | static struct extent_io_ops btree_extent_io_ops; |
49 | static struct workqueue_struct *end_io_workqueue; | 50 | static void end_workqueue_fn(struct btrfs_work *work); |
50 | static struct workqueue_struct *async_submit_workqueue; | ||
51 | 51 | ||
52 | struct end_io_wq { | 52 | struct end_io_wq { |
53 | struct bio *bio; | 53 | struct bio *bio; |
@@ -57,6 +57,7 @@ struct end_io_wq { | |||
57 | int error; | 57 | int error; |
58 | int metadata; | 58 | int metadata; |
59 | struct list_head list; | 59 | struct list_head list; |
60 | struct btrfs_work work; | ||
60 | }; | 61 | }; |
61 | 62 | ||
62 | struct async_submit_bio { | 63 | struct async_submit_bio { |
@@ -66,6 +67,7 @@ struct async_submit_bio { | |||
66 | extent_submit_bio_hook_t *submit_bio_hook; | 67 | extent_submit_bio_hook_t *submit_bio_hook; |
67 | int rw; | 68 | int rw; |
68 | int mirror_num; | 69 | int mirror_num; |
70 | struct btrfs_work work; | ||
69 | }; | 71 | }; |
70 | 72 | ||
71 | struct extent_map *btree_get_extent(struct inode *inode, struct page *page, | 73 | struct extent_map *btree_get_extent(struct inode *inode, struct page *page, |
@@ -389,7 +391,6 @@ static int end_workqueue_bio(struct bio *bio, | |||
389 | { | 391 | { |
390 | struct end_io_wq *end_io_wq = bio->bi_private; | 392 | struct end_io_wq *end_io_wq = bio->bi_private; |
391 | struct btrfs_fs_info *fs_info; | 393 | struct btrfs_fs_info *fs_info; |
392 | unsigned long flags; | ||
393 | 394 | ||
394 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | 395 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) |
395 | if (bio->bi_size) | 396 | if (bio->bi_size) |
@@ -397,11 +398,10 @@ static int end_workqueue_bio(struct bio *bio, | |||
397 | #endif | 398 | #endif |
398 | 399 | ||
399 | fs_info = end_io_wq->info; | 400 | fs_info = end_io_wq->info; |
400 | spin_lock_irqsave(&fs_info->end_io_work_lock, flags); | ||
401 | end_io_wq->error = err; | 401 | end_io_wq->error = err; |
402 | list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list); | 402 | end_io_wq->work.func = end_workqueue_fn; |
403 | spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); | 403 | end_io_wq->work.flags = 0; |
404 | queue_work(end_io_workqueue, &fs_info->end_io_work); | 404 | btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); |
405 | 405 | ||
406 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | 406 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) |
407 | return 0; | 407 | return 0; |
@@ -428,6 +428,19 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
428 | return 0; | 428 | return 0; |
429 | } | 429 | } |
430 | 430 | ||
431 | static void run_one_async_submit(struct btrfs_work *work) | ||
432 | { | ||
433 | struct btrfs_fs_info *fs_info; | ||
434 | struct async_submit_bio *async; | ||
435 | |||
436 | async = container_of(work, struct async_submit_bio, work); | ||
437 | fs_info = BTRFS_I(async->inode)->root->fs_info; | ||
438 | atomic_dec(&fs_info->nr_async_submits); | ||
439 | async->submit_bio_hook(async->inode, async->rw, async->bio, | ||
440 | async->mirror_num); | ||
441 | kfree(async); | ||
442 | } | ||
443 | |||
431 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 444 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
432 | int rw, struct bio *bio, int mirror_num, | 445 | int rw, struct bio *bio, int mirror_num, |
433 | extent_submit_bio_hook_t *submit_bio_hook) | 446 | extent_submit_bio_hook_t *submit_bio_hook) |
@@ -443,13 +456,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
443 | async->bio = bio; | 456 | async->bio = bio; |
444 | async->mirror_num = mirror_num; | 457 | async->mirror_num = mirror_num; |
445 | async->submit_bio_hook = submit_bio_hook; | 458 | async->submit_bio_hook = submit_bio_hook; |
446 | 459 | async->work.func = run_one_async_submit; | |
447 | spin_lock(&fs_info->async_submit_work_lock); | 460 | async->work.flags = 0; |
448 | list_add_tail(&async->list, &fs_info->async_submit_work_list); | ||
449 | atomic_inc(&fs_info->nr_async_submits); | 461 | atomic_inc(&fs_info->nr_async_submits); |
450 | spin_unlock(&fs_info->async_submit_work_lock); | 462 | btrfs_queue_worker(&fs_info->workers, &async->work); |
451 | |||
452 | queue_work(async_submit_workqueue, &fs_info->async_submit_work); | ||
453 | return 0; | 463 | return 0; |
454 | } | 464 | } |
455 | 465 | ||
@@ -462,19 +472,32 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
462 | 472 | ||
463 | offset = bio->bi_sector << 9; | 473 | offset = bio->bi_sector << 9; |
464 | 474 | ||
475 | /* | ||
476 | * when we're called for a write, we're already in the async | ||
477 | * submission context. Just jump ingo btrfs_map_bio | ||
478 | */ | ||
465 | if (rw & (1 << BIO_RW)) { | 479 | if (rw & (1 << BIO_RW)) { |
466 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); | 480 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
481 | mirror_num, 0); | ||
467 | } | 482 | } |
468 | 483 | ||
484 | /* | ||
485 | * called for a read, do the setup so that checksum validation | ||
486 | * can happen in the async kernel threads | ||
487 | */ | ||
469 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); | 488 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); |
470 | BUG_ON(ret); | 489 | BUG_ON(ret); |
471 | 490 | ||
472 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); | 491 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
473 | } | 492 | } |
474 | 493 | ||
475 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 494 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
476 | int mirror_num) | 495 | int mirror_num) |
477 | { | 496 | { |
497 | /* | ||
498 | * kthread helpers are used to submit writes so that checksumming | ||
499 | * can happen in parallel across all CPUs | ||
500 | */ | ||
478 | if (!(rw & (1 << BIO_RW))) { | 501 | if (!(rw & (1 << BIO_RW))) { |
479 | return __btree_submit_bio_hook(inode, rw, bio, mirror_num); | 502 | return __btree_submit_bio_hook(inode, rw, bio, mirror_num); |
480 | } | 503 | } |
@@ -1036,95 +1059,40 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1036 | return ret; | 1059 | return ret; |
1037 | } | 1060 | } |
1038 | 1061 | ||
1039 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | 1062 | /* |
1040 | static void btrfs_end_io_csum(void *p) | 1063 | * called by the kthread helper functions to finally call the bio end_io |
1041 | #else | 1064 | * functions. This is where read checksum verification actually happens |
1042 | static void btrfs_end_io_csum(struct work_struct *work) | 1065 | */ |
1043 | #endif | 1066 | static void end_workqueue_fn(struct btrfs_work *work) |
1044 | { | 1067 | { |
1045 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | ||
1046 | struct btrfs_fs_info *fs_info = p; | ||
1047 | #else | ||
1048 | struct btrfs_fs_info *fs_info = container_of(work, | ||
1049 | struct btrfs_fs_info, | ||
1050 | end_io_work); | ||
1051 | #endif | ||
1052 | unsigned long flags; | ||
1053 | struct end_io_wq *end_io_wq; | ||
1054 | struct bio *bio; | 1068 | struct bio *bio; |
1055 | struct list_head *next; | 1069 | struct end_io_wq *end_io_wq; |
1070 | struct btrfs_fs_info *fs_info; | ||
1056 | int error; | 1071 | int error; |
1057 | int was_empty; | ||
1058 | 1072 | ||
1059 | while(1) { | 1073 | end_io_wq = container_of(work, struct end_io_wq, work); |
1060 | spin_lock_irqsave(&fs_info->end_io_work_lock, flags); | 1074 | bio = end_io_wq->bio; |
1061 | if (list_empty(&fs_info->end_io_work_list)) { | 1075 | fs_info = end_io_wq->info; |
1062 | spin_unlock_irqrestore(&fs_info->end_io_work_lock, | ||
1063 | flags); | ||
1064 | return; | ||
1065 | } | ||
1066 | next = fs_info->end_io_work_list.next; | ||
1067 | list_del(next); | ||
1068 | spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); | ||
1069 | |||
1070 | end_io_wq = list_entry(next, struct end_io_wq, list); | ||
1071 | |||
1072 | bio = end_io_wq->bio; | ||
1073 | if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { | ||
1074 | spin_lock_irqsave(&fs_info->end_io_work_lock, flags); | ||
1075 | was_empty = list_empty(&fs_info->end_io_work_list); | ||
1076 | list_add_tail(&end_io_wq->list, | ||
1077 | &fs_info->end_io_work_list); | ||
1078 | spin_unlock_irqrestore(&fs_info->end_io_work_lock, | ||
1079 | flags); | ||
1080 | if (was_empty) | ||
1081 | return; | ||
1082 | continue; | ||
1083 | } | ||
1084 | error = end_io_wq->error; | ||
1085 | bio->bi_private = end_io_wq->private; | ||
1086 | bio->bi_end_io = end_io_wq->end_io; | ||
1087 | kfree(end_io_wq); | ||
1088 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | ||
1089 | bio_endio(bio, bio->bi_size, error); | ||
1090 | #else | ||
1091 | bio_endio(bio, error); | ||
1092 | #endif | ||
1093 | } | ||
1094 | } | ||
1095 | 1076 | ||
1096 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | 1077 | /* metadata bios are special because the whole tree block must |
1097 | static void btrfs_async_submit_work(void *p) | 1078 | * be checksummed at once. This makes sure the entire block is in |
1098 | #else | 1079 | * ram and up to date before trying to verify things. For |
1099 | static void btrfs_async_submit_work(struct work_struct *work) | 1080 | * blocksize <= pagesize, it is basically a noop |
1100 | #endif | 1081 | */ |
1101 | { | 1082 | if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { |
1102 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | 1083 | btrfs_queue_worker(&fs_info->endio_workers, |
1103 | struct btrfs_fs_info *fs_info = p; | 1084 | &end_io_wq->work); |
1085 | return; | ||
1086 | } | ||
1087 | error = end_io_wq->error; | ||
1088 | bio->bi_private = end_io_wq->private; | ||
1089 | bio->bi_end_io = end_io_wq->end_io; | ||
1090 | kfree(end_io_wq); | ||
1091 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) | ||
1092 | bio_endio(bio, bio->bi_size, error); | ||
1104 | #else | 1093 | #else |
1105 | struct btrfs_fs_info *fs_info = container_of(work, | 1094 | bio_endio(bio, error); |
1106 | struct btrfs_fs_info, | ||
1107 | async_submit_work); | ||
1108 | #endif | 1095 | #endif |
1109 | struct async_submit_bio *async; | ||
1110 | struct list_head *next; | ||
1111 | |||
1112 | while(1) { | ||
1113 | spin_lock(&fs_info->async_submit_work_lock); | ||
1114 | if (list_empty(&fs_info->async_submit_work_list)) { | ||
1115 | spin_unlock(&fs_info->async_submit_work_lock); | ||
1116 | return; | ||
1117 | } | ||
1118 | next = fs_info->async_submit_work_list.next; | ||
1119 | list_del(next); | ||
1120 | atomic_dec(&fs_info->nr_async_submits); | ||
1121 | spin_unlock(&fs_info->async_submit_work_lock); | ||
1122 | |||
1123 | async = list_entry(next, struct async_submit_bio, list); | ||
1124 | async->submit_bio_hook(async->inode, async->rw, async->bio, | ||
1125 | async->mirror_num); | ||
1126 | kfree(async); | ||
1127 | } | ||
1128 | } | 1096 | } |
1129 | 1097 | ||
1130 | struct btrfs_root *open_ctree(struct super_block *sb, | 1098 | struct btrfs_root *open_ctree(struct super_block *sb, |
@@ -1155,19 +1123,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1155 | err = -ENOMEM; | 1123 | err = -ENOMEM; |
1156 | goto fail; | 1124 | goto fail; |
1157 | } | 1125 | } |
1158 | end_io_workqueue = create_workqueue("btrfs-end-io"); | ||
1159 | BUG_ON(!end_io_workqueue); | ||
1160 | async_submit_workqueue = create_workqueue("btrfs-async-submit"); | ||
1161 | |||
1162 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | 1126 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); |
1163 | INIT_LIST_HEAD(&fs_info->trans_list); | 1127 | INIT_LIST_HEAD(&fs_info->trans_list); |
1164 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1128 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1165 | INIT_LIST_HEAD(&fs_info->hashers); | 1129 | INIT_LIST_HEAD(&fs_info->hashers); |
1166 | INIT_LIST_HEAD(&fs_info->end_io_work_list); | ||
1167 | INIT_LIST_HEAD(&fs_info->async_submit_work_list); | ||
1168 | spin_lock_init(&fs_info->hash_lock); | 1130 | spin_lock_init(&fs_info->hash_lock); |
1169 | spin_lock_init(&fs_info->end_io_work_lock); | ||
1170 | spin_lock_init(&fs_info->async_submit_work_lock); | ||
1171 | spin_lock_init(&fs_info->delalloc_lock); | 1131 | spin_lock_init(&fs_info->delalloc_lock); |
1172 | spin_lock_init(&fs_info->new_trans_lock); | 1132 | spin_lock_init(&fs_info->new_trans_lock); |
1173 | 1133 | ||
@@ -1222,13 +1182,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1222 | fs_info->do_barriers = 1; | 1182 | fs_info->do_barriers = 1; |
1223 | 1183 | ||
1224 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) | 1184 | #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) |
1225 | INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info); | ||
1226 | INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work, | ||
1227 | fs_info); | ||
1228 | INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); | 1185 | INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); |
1229 | #else | 1186 | #else |
1230 | INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); | ||
1231 | INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work); | ||
1232 | INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); | 1187 | INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); |
1233 | #endif | 1188 | #endif |
1234 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | 1189 | BTRFS_I(fs_info->btree_inode)->root = tree_root; |
@@ -1240,6 +1195,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1240 | mutex_init(&fs_info->trans_mutex); | 1195 | mutex_init(&fs_info->trans_mutex); |
1241 | mutex_init(&fs_info->fs_mutex); | 1196 | mutex_init(&fs_info->fs_mutex); |
1242 | 1197 | ||
1198 | /* we need to start all the end_io workers up front because the | ||
1199 | * queue work function gets called at interrupt time. The endio | ||
1200 | * workers don't normally start IO, so some number of them <= the | ||
1201 | * number of cpus is fine. They handle checksumming after a read. | ||
1202 | * | ||
1203 | * The other worker threads do start IO, so the max is larger than | ||
1204 | * the number of CPUs. FIXME, tune this for huge machines | ||
1205 | */ | ||
1206 | btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2); | ||
1207 | btrfs_init_workers(&fs_info->endio_workers, num_online_cpus()); | ||
1208 | btrfs_start_workers(&fs_info->workers, 1); | ||
1209 | btrfs_start_workers(&fs_info->endio_workers, num_online_cpus()); | ||
1210 | |||
1243 | #if 0 | 1211 | #if 0 |
1244 | ret = add_hasher(fs_info, "crc32c"); | 1212 | ret = add_hasher(fs_info, "crc32c"); |
1245 | if (ret) { | 1213 | if (ret) { |
@@ -1375,6 +1343,8 @@ fail_sb_buffer: | |||
1375 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); | 1343 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); |
1376 | fail_iput: | 1344 | fail_iput: |
1377 | iput(fs_info->btree_inode); | 1345 | iput(fs_info->btree_inode); |
1346 | btrfs_stop_workers(&fs_info->workers); | ||
1347 | btrfs_stop_workers(&fs_info->endio_workers); | ||
1378 | fail: | 1348 | fail: |
1379 | btrfs_close_devices(fs_info->fs_devices); | 1349 | btrfs_close_devices(fs_info->fs_devices); |
1380 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 1350 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
@@ -1623,16 +1593,10 @@ int close_ctree(struct btrfs_root *root) | |||
1623 | extent_io_tree_empty_lru(&fs_info->extent_ins); | 1593 | extent_io_tree_empty_lru(&fs_info->extent_ins); |
1624 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); | 1594 | extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); |
1625 | 1595 | ||
1626 | flush_workqueue(async_submit_workqueue); | ||
1627 | flush_workqueue(end_io_workqueue); | ||
1628 | |||
1629 | truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); | 1596 | truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); |
1630 | 1597 | ||
1631 | flush_workqueue(async_submit_workqueue); | 1598 | btrfs_stop_workers(&fs_info->workers); |
1632 | destroy_workqueue(async_submit_workqueue); | 1599 | btrfs_stop_workers(&fs_info->endio_workers); |
1633 | |||
1634 | flush_workqueue(end_io_workqueue); | ||
1635 | destroy_workqueue(end_io_workqueue); | ||
1636 | 1600 | ||
1637 | iput(fs_info->btree_inode); | 1601 | iput(fs_info->btree_inode); |
1638 | #if 0 | 1602 | #if 0 |