aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c200
1 files changed, 82 insertions, 118 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b9a53646ceb2..98ff4fbcb386 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -31,6 +31,7 @@
31#include "btrfs_inode.h" 31#include "btrfs_inode.h"
32#include "volumes.h" 32#include "volumes.h"
33#include "print-tree.h" 33#include "print-tree.h"
34#include "async-thread.h"
34 35
35#if 0 36#if 0
36static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) 37static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
@@ -46,8 +47,7 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
46#endif 47#endif
47 48
48static struct extent_io_ops btree_extent_io_ops; 49static struct extent_io_ops btree_extent_io_ops;
49static struct workqueue_struct *end_io_workqueue; 50static void end_workqueue_fn(struct btrfs_work *work);
50static struct workqueue_struct *async_submit_workqueue;
51 51
52struct end_io_wq { 52struct end_io_wq {
53 struct bio *bio; 53 struct bio *bio;
@@ -57,6 +57,7 @@ struct end_io_wq {
57 int error; 57 int error;
58 int metadata; 58 int metadata;
59 struct list_head list; 59 struct list_head list;
60 struct btrfs_work work;
60}; 61};
61 62
62struct async_submit_bio { 63struct async_submit_bio {
@@ -66,6 +67,7 @@ struct async_submit_bio {
66 extent_submit_bio_hook_t *submit_bio_hook; 67 extent_submit_bio_hook_t *submit_bio_hook;
67 int rw; 68 int rw;
68 int mirror_num; 69 int mirror_num;
70 struct btrfs_work work;
69}; 71};
70 72
71struct extent_map *btree_get_extent(struct inode *inode, struct page *page, 73struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
@@ -389,7 +391,6 @@ static int end_workqueue_bio(struct bio *bio,
389{ 391{
390 struct end_io_wq *end_io_wq = bio->bi_private; 392 struct end_io_wq *end_io_wq = bio->bi_private;
391 struct btrfs_fs_info *fs_info; 393 struct btrfs_fs_info *fs_info;
392 unsigned long flags;
393 394
394#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 395#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
395 if (bio->bi_size) 396 if (bio->bi_size)
@@ -397,11 +398,10 @@ static int end_workqueue_bio(struct bio *bio,
397#endif 398#endif
398 399
399 fs_info = end_io_wq->info; 400 fs_info = end_io_wq->info;
400 spin_lock_irqsave(&fs_info->end_io_work_lock, flags);
401 end_io_wq->error = err; 401 end_io_wq->error = err;
402 list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list); 402 end_io_wq->work.func = end_workqueue_fn;
403 spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); 403 end_io_wq->work.flags = 0;
404 queue_work(end_io_workqueue, &fs_info->end_io_work); 404 btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
405 405
406#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 406#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
407 return 0; 407 return 0;
@@ -428,6 +428,19 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
428 return 0; 428 return 0;
429} 429}
430 430
431static void run_one_async_submit(struct btrfs_work *work)
432{
433 struct btrfs_fs_info *fs_info;
434 struct async_submit_bio *async;
435
436 async = container_of(work, struct async_submit_bio, work);
437 fs_info = BTRFS_I(async->inode)->root->fs_info;
438 atomic_dec(&fs_info->nr_async_submits);
439 async->submit_bio_hook(async->inode, async->rw, async->bio,
440 async->mirror_num);
441 kfree(async);
442}
443
431int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 444int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
432 int rw, struct bio *bio, int mirror_num, 445 int rw, struct bio *bio, int mirror_num,
433 extent_submit_bio_hook_t *submit_bio_hook) 446 extent_submit_bio_hook_t *submit_bio_hook)
@@ -443,13 +456,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
443 async->bio = bio; 456 async->bio = bio;
444 async->mirror_num = mirror_num; 457 async->mirror_num = mirror_num;
445 async->submit_bio_hook = submit_bio_hook; 458 async->submit_bio_hook = submit_bio_hook;
446 459 async->work.func = run_one_async_submit;
447 spin_lock(&fs_info->async_submit_work_lock); 460 async->work.flags = 0;
448 list_add_tail(&async->list, &fs_info->async_submit_work_list);
449 atomic_inc(&fs_info->nr_async_submits); 461 atomic_inc(&fs_info->nr_async_submits);
450 spin_unlock(&fs_info->async_submit_work_lock); 462 btrfs_queue_worker(&fs_info->workers, &async->work);
451
452 queue_work(async_submit_workqueue, &fs_info->async_submit_work);
453 return 0; 463 return 0;
454} 464}
455 465
@@ -462,19 +472,32 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
462 472
463 offset = bio->bi_sector << 9; 473 offset = bio->bi_sector << 9;
464 474
475 /*
476 * when we're called for a write, we're already in the async
477 * submission context. Just jump ingo btrfs_map_bio
478 */
465 if (rw & (1 << BIO_RW)) { 479 if (rw & (1 << BIO_RW)) {
466 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); 480 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
481 mirror_num, 0);
467 } 482 }
468 483
484 /*
485 * called for a read, do the setup so that checksum validation
486 * can happen in the async kernel threads
487 */
469 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); 488 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1);
470 BUG_ON(ret); 489 BUG_ON(ret);
471 490
472 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); 491 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
473} 492}
474 493
475static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 494static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
476 int mirror_num) 495 int mirror_num)
477{ 496{
497 /*
498 * kthread helpers are used to submit writes so that checksumming
499 * can happen in parallel across all CPUs
500 */
478 if (!(rw & (1 << BIO_RW))) { 501 if (!(rw & (1 << BIO_RW))) {
479 return __btree_submit_bio_hook(inode, rw, bio, mirror_num); 502 return __btree_submit_bio_hook(inode, rw, bio, mirror_num);
480 } 503 }
@@ -1036,95 +1059,40 @@ static int bio_ready_for_csum(struct bio *bio)
1036 return ret; 1059 return ret;
1037} 1060}
1038 1061
1039#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) 1062/*
1040static void btrfs_end_io_csum(void *p) 1063 * called by the kthread helper functions to finally call the bio end_io
1041#else 1064 * functions. This is where read checksum verification actually happens
1042static void btrfs_end_io_csum(struct work_struct *work) 1065 */
1043#endif 1066static void end_workqueue_fn(struct btrfs_work *work)
1044{ 1067{
1045#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1046 struct btrfs_fs_info *fs_info = p;
1047#else
1048 struct btrfs_fs_info *fs_info = container_of(work,
1049 struct btrfs_fs_info,
1050 end_io_work);
1051#endif
1052 unsigned long flags;
1053 struct end_io_wq *end_io_wq;
1054 struct bio *bio; 1068 struct bio *bio;
1055 struct list_head *next; 1069 struct end_io_wq *end_io_wq;
1070 struct btrfs_fs_info *fs_info;
1056 int error; 1071 int error;
1057 int was_empty;
1058 1072
1059 while(1) { 1073 end_io_wq = container_of(work, struct end_io_wq, work);
1060 spin_lock_irqsave(&fs_info->end_io_work_lock, flags); 1074 bio = end_io_wq->bio;
1061 if (list_empty(&fs_info->end_io_work_list)) { 1075 fs_info = end_io_wq->info;
1062 spin_unlock_irqrestore(&fs_info->end_io_work_lock,
1063 flags);
1064 return;
1065 }
1066 next = fs_info->end_io_work_list.next;
1067 list_del(next);
1068 spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags);
1069
1070 end_io_wq = list_entry(next, struct end_io_wq, list);
1071
1072 bio = end_io_wq->bio;
1073 if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
1074 spin_lock_irqsave(&fs_info->end_io_work_lock, flags);
1075 was_empty = list_empty(&fs_info->end_io_work_list);
1076 list_add_tail(&end_io_wq->list,
1077 &fs_info->end_io_work_list);
1078 spin_unlock_irqrestore(&fs_info->end_io_work_lock,
1079 flags);
1080 if (was_empty)
1081 return;
1082 continue;
1083 }
1084 error = end_io_wq->error;
1085 bio->bi_private = end_io_wq->private;
1086 bio->bi_end_io = end_io_wq->end_io;
1087 kfree(end_io_wq);
1088#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1089 bio_endio(bio, bio->bi_size, error);
1090#else
1091 bio_endio(bio, error);
1092#endif
1093 }
1094}
1095 1076
1096#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) 1077 /* metadata bios are special because the whole tree block must
1097static void btrfs_async_submit_work(void *p) 1078 * be checksummed at once. This makes sure the entire block is in
1098#else 1079 * ram and up to date before trying to verify things. For
1099static void btrfs_async_submit_work(struct work_struct *work) 1080 * blocksize <= pagesize, it is basically a noop
1100#endif 1081 */
1101{ 1082 if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
1102#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) 1083 btrfs_queue_worker(&fs_info->endio_workers,
1103 struct btrfs_fs_info *fs_info = p; 1084 &end_io_wq->work);
1085 return;
1086 }
1087 error = end_io_wq->error;
1088 bio->bi_private = end_io_wq->private;
1089 bio->bi_end_io = end_io_wq->end_io;
1090 kfree(end_io_wq);
1091#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1092 bio_endio(bio, bio->bi_size, error);
1104#else 1093#else
1105 struct btrfs_fs_info *fs_info = container_of(work, 1094 bio_endio(bio, error);
1106 struct btrfs_fs_info,
1107 async_submit_work);
1108#endif 1095#endif
1109 struct async_submit_bio *async;
1110 struct list_head *next;
1111
1112 while(1) {
1113 spin_lock(&fs_info->async_submit_work_lock);
1114 if (list_empty(&fs_info->async_submit_work_list)) {
1115 spin_unlock(&fs_info->async_submit_work_lock);
1116 return;
1117 }
1118 next = fs_info->async_submit_work_list.next;
1119 list_del(next);
1120 atomic_dec(&fs_info->nr_async_submits);
1121 spin_unlock(&fs_info->async_submit_work_lock);
1122
1123 async = list_entry(next, struct async_submit_bio, list);
1124 async->submit_bio_hook(async->inode, async->rw, async->bio,
1125 async->mirror_num);
1126 kfree(async);
1127 }
1128} 1096}
1129 1097
1130struct btrfs_root *open_ctree(struct super_block *sb, 1098struct btrfs_root *open_ctree(struct super_block *sb,
@@ -1155,19 +1123,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1155 err = -ENOMEM; 1123 err = -ENOMEM;
1156 goto fail; 1124 goto fail;
1157 } 1125 }
1158 end_io_workqueue = create_workqueue("btrfs-end-io");
1159 BUG_ON(!end_io_workqueue);
1160 async_submit_workqueue = create_workqueue("btrfs-async-submit");
1161
1162 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); 1126 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
1163 INIT_LIST_HEAD(&fs_info->trans_list); 1127 INIT_LIST_HEAD(&fs_info->trans_list);
1164 INIT_LIST_HEAD(&fs_info->dead_roots); 1128 INIT_LIST_HEAD(&fs_info->dead_roots);
1165 INIT_LIST_HEAD(&fs_info->hashers); 1129 INIT_LIST_HEAD(&fs_info->hashers);
1166 INIT_LIST_HEAD(&fs_info->end_io_work_list);
1167 INIT_LIST_HEAD(&fs_info->async_submit_work_list);
1168 spin_lock_init(&fs_info->hash_lock); 1130 spin_lock_init(&fs_info->hash_lock);
1169 spin_lock_init(&fs_info->end_io_work_lock);
1170 spin_lock_init(&fs_info->async_submit_work_lock);
1171 spin_lock_init(&fs_info->delalloc_lock); 1131 spin_lock_init(&fs_info->delalloc_lock);
1172 spin_lock_init(&fs_info->new_trans_lock); 1132 spin_lock_init(&fs_info->new_trans_lock);
1173 1133
@@ -1222,13 +1182,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1222 fs_info->do_barriers = 1; 1182 fs_info->do_barriers = 1;
1223 1183
1224#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) 1184#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1225 INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info);
1226 INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work,
1227 fs_info);
1228 INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); 1185 INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
1229#else 1186#else
1230 INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum);
1231 INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work);
1232 INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); 1187 INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner);
1233#endif 1188#endif
1234 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1189 BTRFS_I(fs_info->btree_inode)->root = tree_root;
@@ -1240,6 +1195,19 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1240 mutex_init(&fs_info->trans_mutex); 1195 mutex_init(&fs_info->trans_mutex);
1241 mutex_init(&fs_info->fs_mutex); 1196 mutex_init(&fs_info->fs_mutex);
1242 1197
1198 /* we need to start all the end_io workers up front because the
1199 * queue work function gets called at interrupt time. The endio
1200 * workers don't normally start IO, so some number of them <= the
1201 * number of cpus is fine. They handle checksumming after a read.
1202 *
1203 * The other worker threads do start IO, so the max is larger than
1204 * the number of CPUs. FIXME, tune this for huge machines
1205 */
1206 btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2);
1207 btrfs_init_workers(&fs_info->endio_workers, num_online_cpus());
1208 btrfs_start_workers(&fs_info->workers, 1);
1209 btrfs_start_workers(&fs_info->endio_workers, num_online_cpus());
1210
1243#if 0 1211#if 0
1244 ret = add_hasher(fs_info, "crc32c"); 1212 ret = add_hasher(fs_info, "crc32c");
1245 if (ret) { 1213 if (ret) {
@@ -1375,6 +1343,8 @@ fail_sb_buffer:
1375 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); 1343 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
1376fail_iput: 1344fail_iput:
1377 iput(fs_info->btree_inode); 1345 iput(fs_info->btree_inode);
1346 btrfs_stop_workers(&fs_info->workers);
1347 btrfs_stop_workers(&fs_info->endio_workers);
1378fail: 1348fail:
1379 btrfs_close_devices(fs_info->fs_devices); 1349 btrfs_close_devices(fs_info->fs_devices);
1380 btrfs_mapping_tree_free(&fs_info->mapping_tree); 1350 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -1623,16 +1593,10 @@ int close_ctree(struct btrfs_root *root)
1623 extent_io_tree_empty_lru(&fs_info->extent_ins); 1593 extent_io_tree_empty_lru(&fs_info->extent_ins);
1624 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); 1594 extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
1625 1595
1626 flush_workqueue(async_submit_workqueue);
1627 flush_workqueue(end_io_workqueue);
1628
1629 truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); 1596 truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
1630 1597
1631 flush_workqueue(async_submit_workqueue); 1598 btrfs_stop_workers(&fs_info->workers);
1632 destroy_workqueue(async_submit_workqueue); 1599 btrfs_stop_workers(&fs_info->endio_workers);
1633
1634 flush_workqueue(end_io_workqueue);
1635 destroy_workqueue(end_io_workqueue);
1636 1600
1637 iput(fs_info->btree_inode); 1601 iput(fs_info->btree_inode);
1638#if 0 1602#if 0