aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c23
-rw-r--r--fs/btrfs/reada.c248
3 files changed, 141 insertions, 137 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6af1211d4ee9..e2a5cc0d4a14 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1825,6 +1825,9 @@ struct btrfs_fs_info {
1825 spinlock_t reada_lock; 1825 spinlock_t reada_lock;
1826 struct radix_tree_root reada_tree; 1826 struct radix_tree_root reada_tree;
1827 1827
1828 /* readahead works cnt */
1829 atomic_t reada_works_cnt;
1830
1828 /* Extent buffer radix tree */ 1831 /* Extent buffer radix tree */
1829 spinlock_t buffer_lock; 1832 spinlock_t buffer_lock;
1830 struct radix_tree_root buffer_radix; 1833 struct radix_tree_root buffer_radix;
@@ -4563,8 +4566,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
4563 struct btrfs_key *start, struct btrfs_key *end); 4566 struct btrfs_key *start, struct btrfs_key *end);
4564int btrfs_reada_wait(void *handle); 4567int btrfs_reada_wait(void *handle);
4565void btrfs_reada_detach(void *handle); 4568void btrfs_reada_detach(void *handle);
4566int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 4569int btree_readahead_hook(struct btrfs_fs_info *fs_info,
4567 u64 start, int err); 4570 struct extent_buffer *eb, u64 start, int err);
4568 4571
4569static inline int is_fstree(u64 rootid) 4572static inline int is_fstree(u64 rootid)
4570{ 4573{
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 294c77729df3..de68b8b61fd2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -612,6 +612,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
612 int found_level; 612 int found_level;
613 struct extent_buffer *eb; 613 struct extent_buffer *eb;
614 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 614 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
615 struct btrfs_fs_info *fs_info = root->fs_info;
615 int ret = 0; 616 int ret = 0;
616 int reads_done; 617 int reads_done;
617 618
@@ -637,21 +638,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
637 638
638 found_start = btrfs_header_bytenr(eb); 639 found_start = btrfs_header_bytenr(eb);
639 if (found_start != eb->start) { 640 if (found_start != eb->start) {
640 btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", 641 btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
641 found_start, eb->start); 642 found_start, eb->start);
642 ret = -EIO; 643 ret = -EIO;
643 goto err; 644 goto err;
644 } 645 }
645 if (check_tree_block_fsid(root->fs_info, eb)) { 646 if (check_tree_block_fsid(fs_info, eb)) {
646 btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", 647 btrfs_err_rl(fs_info, "bad fsid on block %llu",
647 eb->start); 648 eb->start);
648 ret = -EIO; 649 ret = -EIO;
649 goto err; 650 goto err;
650 } 651 }
651 found_level = btrfs_header_level(eb); 652 found_level = btrfs_header_level(eb);
652 if (found_level >= BTRFS_MAX_LEVEL) { 653 if (found_level >= BTRFS_MAX_LEVEL) {
653 btrfs_err(root->fs_info, "bad tree block level %d", 654 btrfs_err(fs_info, "bad tree block level %d",
654 (int)btrfs_header_level(eb)); 655 (int)btrfs_header_level(eb));
655 ret = -EIO; 656 ret = -EIO;
656 goto err; 657 goto err;
657 } 658 }
@@ -659,7 +660,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
659 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 660 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
660 eb, found_level); 661 eb, found_level);
661 662
662 ret = csum_tree_block(root->fs_info, eb, 1); 663 ret = csum_tree_block(fs_info, eb, 1);
663 if (ret) { 664 if (ret) {
664 ret = -EIO; 665 ret = -EIO;
665 goto err; 666 goto err;
@@ -680,7 +681,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
680err: 681err:
681 if (reads_done && 682 if (reads_done &&
682 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) 683 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
683 btree_readahead_hook(root, eb, eb->start, ret); 684 btree_readahead_hook(fs_info, eb, eb->start, ret);
684 685
685 if (ret) { 686 if (ret) {
686 /* 687 /*
@@ -699,14 +700,13 @@ out:
699static int btree_io_failed_hook(struct page *page, int failed_mirror) 700static int btree_io_failed_hook(struct page *page, int failed_mirror)
700{ 701{
701 struct extent_buffer *eb; 702 struct extent_buffer *eb;
702 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
703 703
704 eb = (struct extent_buffer *)page->private; 704 eb = (struct extent_buffer *)page->private;
705 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); 705 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
706 eb->read_mirror = failed_mirror; 706 eb->read_mirror = failed_mirror;
707 atomic_dec(&eb->io_pages); 707 atomic_dec(&eb->io_pages);
708 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) 708 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
709 btree_readahead_hook(root, eb, eb->start, -EIO); 709 btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO);
710 return -EIO; /* we fixed nothing */ 710 return -EIO; /* we fixed nothing */
711} 711}
712 712
@@ -2604,6 +2604,7 @@ int open_ctree(struct super_block *sb,
2604 atomic_set(&fs_info->nr_async_bios, 0); 2604 atomic_set(&fs_info->nr_async_bios, 0);
2605 atomic_set(&fs_info->defrag_running, 0); 2605 atomic_set(&fs_info->defrag_running, 0);
2606 atomic_set(&fs_info->qgroup_op_seq, 0); 2606 atomic_set(&fs_info->qgroup_op_seq, 0);
2607 atomic_set(&fs_info->reada_works_cnt, 0);
2607 atomic64_set(&fs_info->tree_mod_seq, 0); 2608 atomic64_set(&fs_info->tree_mod_seq, 0);
2608 fs_info->sb = sb; 2609 fs_info->sb = sb;
2609 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; 2610 fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index c5f1773c4794..bf69c008249c 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -72,7 +72,7 @@ struct reada_extent {
72 spinlock_t lock; 72 spinlock_t lock;
73 struct reada_zone *zones[BTRFS_MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
74 int nzones; 74 int nzones;
75 struct btrfs_device *scheduled_for; 75 int scheduled;
76}; 76};
77 77
78struct reada_zone { 78struct reada_zone {
@@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
101static void __reada_start_machine(struct btrfs_fs_info *fs_info); 101static void __reada_start_machine(struct btrfs_fs_info *fs_info);
102 102
103static int reada_add_block(struct reada_control *rc, u64 logical, 103static int reada_add_block(struct reada_control *rc, u64 logical,
104 struct btrfs_key *top, int level, u64 generation); 104 struct btrfs_key *top, u64 generation);
105 105
106/* recurses */ 106/* recurses */
107/* in case of err, eb might be NULL */ 107/* in case of err, eb might be NULL */
108static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 108static void __readahead_hook(struct btrfs_fs_info *fs_info,
109 u64 start, int err) 109 struct reada_extent *re, struct extent_buffer *eb,
110 u64 start, int err)
110{ 111{
111 int level = 0; 112 int level = 0;
112 int nritems; 113 int nritems;
113 int i; 114 int i;
114 u64 bytenr; 115 u64 bytenr;
115 u64 generation; 116 u64 generation;
116 struct reada_extent *re;
117 struct btrfs_fs_info *fs_info = root->fs_info;
118 struct list_head list; 117 struct list_head list;
119 unsigned long index = start >> PAGE_CACHE_SHIFT;
120 struct btrfs_device *for_dev;
121 118
122 if (eb) 119 if (eb)
123 level = btrfs_header_level(eb); 120 level = btrfs_header_level(eb);
124 121
125 /* find extent */
126 spin_lock(&fs_info->reada_lock);
127 re = radix_tree_lookup(&fs_info->reada_tree, index);
128 if (re)
129 re->refcnt++;
130 spin_unlock(&fs_info->reada_lock);
131
132 if (!re)
133 return -1;
134
135 spin_lock(&re->lock); 122 spin_lock(&re->lock);
136 /* 123 /*
137 * just take the full list from the extent. afterwards we 124 * just take the full list from the extent. afterwards we
138 * don't need the lock anymore 125 * don't need the lock anymore
139 */ 126 */
140 list_replace_init(&re->extctl, &list); 127 list_replace_init(&re->extctl, &list);
141 for_dev = re->scheduled_for; 128 re->scheduled = 0;
142 re->scheduled_for = NULL;
143 spin_unlock(&re->lock); 129 spin_unlock(&re->lock);
144 130
145 if (err == 0) { 131 /*
146 nritems = level ? btrfs_header_nritems(eb) : 0; 132 * this is the error case, the extent buffer has not been
147 generation = btrfs_header_generation(eb); 133 * read correctly. We won't access anything from it and
148 /* 134 * just cleanup our data structures. Effectively this will
149 * FIXME: currently we just set nritems to 0 if this is a leaf, 135 * cut the branch below this node from read ahead.
150 * effectively ignoring the content. In a next step we could 136 */
151 * trigger more readahead depending from the content, e.g. 137 if (err)
152 * fetch the checksums for the extents in the leaf. 138 goto cleanup;
153 */
154 } else {
155 /*
156 * this is the error case, the extent buffer has not been
157 * read correctly. We won't access anything from it and
158 * just cleanup our data structures. Effectively this will
159 * cut the branch below this node from read ahead.
160 */
161 nritems = 0;
162 generation = 0;
163 }
164 139
140 /*
141 * FIXME: currently we just set nritems to 0 if this is a leaf,
142 * effectively ignoring the content. In a next step we could
143 * trigger more readahead depending from the content, e.g.
144 * fetch the checksums for the extents in the leaf.
145 */
146 if (!level)
147 goto cleanup;
148
149 nritems = btrfs_header_nritems(eb);
150 generation = btrfs_header_generation(eb);
165 for (i = 0; i < nritems; i++) { 151 for (i = 0; i < nritems; i++) {
166 struct reada_extctl *rec; 152 struct reada_extctl *rec;
167 u64 n_gen; 153 u64 n_gen;
@@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
188 */ 174 */
189#ifdef DEBUG 175#ifdef DEBUG
190 if (rec->generation != generation) { 176 if (rec->generation != generation) {
191 btrfs_debug(root->fs_info, 177 btrfs_debug(fs_info,
192 "generation mismatch for (%llu,%d,%llu) %llu != %llu", 178 "generation mismatch for (%llu,%d,%llu) %llu != %llu",
193 key.objectid, key.type, key.offset, 179 key.objectid, key.type, key.offset,
194 rec->generation, generation); 180 rec->generation, generation);
195 } 181 }
196#endif 182#endif
197 if (rec->generation == generation && 183 if (rec->generation == generation &&
198 btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && 184 btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
199 btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) 185 btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
200 reada_add_block(rc, bytenr, &next_key, 186 reada_add_block(rc, bytenr, &next_key, n_gen);
201 level - 1, n_gen);
202 } 187 }
203 } 188 }
189
190cleanup:
204 /* 191 /*
205 * free extctl records 192 * free extctl records
206 */ 193 */
@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
222 209
223 reada_extent_put(fs_info, re); /* one ref for each entry */ 210 reada_extent_put(fs_info, re); /* one ref for each entry */
224 } 211 }
225 reada_extent_put(fs_info, re); /* our ref */
226 if (for_dev)
227 atomic_dec(&for_dev->reada_in_flight);
228 212
229 return 0; 213 return;
230} 214}
231 215
232/* 216/*
233 * start is passed separately in case eb in NULL, which may be the case with 217 * start is passed separately in case eb in NULL, which may be the case with
234 * failed I/O 218 * failed I/O
235 */ 219 */
236int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 220int btree_readahead_hook(struct btrfs_fs_info *fs_info,
237 u64 start, int err) 221 struct extent_buffer *eb, u64 start, int err)
238{ 222{
239 int ret; 223 int ret = 0;
224 struct reada_extent *re;
240 225
241 ret = __readahead_hook(root, eb, start, err); 226 /* find extent */
227 spin_lock(&fs_info->reada_lock);
228 re = radix_tree_lookup(&fs_info->reada_tree,
229 start >> PAGE_CACHE_SHIFT);
230 if (re)
231 re->refcnt++;
232 spin_unlock(&fs_info->reada_lock);
233 if (!re) {
234 ret = -1;
235 goto start_machine;
236 }
242 237
243 reada_start_machine(root->fs_info); 238 __readahead_hook(fs_info, re, eb, start, err);
239 reada_extent_put(fs_info, re); /* our ref */
244 240
241start_machine:
242 reada_start_machine(fs_info);
245 return ret; 243 return ret;
246} 244}
247 245
@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
260 spin_lock(&fs_info->reada_lock); 258 spin_lock(&fs_info->reada_lock);
261 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, 259 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
262 logical >> PAGE_CACHE_SHIFT, 1); 260 logical >> PAGE_CACHE_SHIFT, 1);
263 if (ret == 1) 261 if (ret == 1 && logical >= zone->start && logical <= zone->end) {
264 kref_get(&zone->refcnt); 262 kref_get(&zone->refcnt);
265 spin_unlock(&fs_info->reada_lock);
266
267 if (ret == 1) {
268 if (logical >= zone->start && logical < zone->end)
269 return zone;
270 spin_lock(&fs_info->reada_lock);
271 kref_put(&zone->refcnt, reada_zone_release);
272 spin_unlock(&fs_info->reada_lock); 263 spin_unlock(&fs_info->reada_lock);
264 return zone;
273 } 265 }
274 266
267 spin_unlock(&fs_info->reada_lock);
268
275 cache = btrfs_lookup_block_group(fs_info, logical); 269 cache = btrfs_lookup_block_group(fs_info, logical);
276 if (!cache) 270 if (!cache)
277 return NULL; 271 return NULL;
@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
307 kfree(zone); 301 kfree(zone);
308 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, 302 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
309 logical >> PAGE_CACHE_SHIFT, 1); 303 logical >> PAGE_CACHE_SHIFT, 1);
310 if (ret == 1) 304 if (ret == 1 && logical >= zone->start && logical <= zone->end)
311 kref_get(&zone->refcnt); 305 kref_get(&zone->refcnt);
306 else
307 zone = NULL;
312 } 308 }
313 spin_unlock(&fs_info->reada_lock); 309 spin_unlock(&fs_info->reada_lock);
314 310
@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
317 313
318static struct reada_extent *reada_find_extent(struct btrfs_root *root, 314static struct reada_extent *reada_find_extent(struct btrfs_root *root,
319 u64 logical, 315 u64 logical,
320 struct btrfs_key *top, int level) 316 struct btrfs_key *top)
321{ 317{
322 int ret; 318 int ret;
323 struct reada_extent *re = NULL; 319 struct reada_extent *re = NULL;
@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
330 u64 length; 326 u64 length;
331 int real_stripes; 327 int real_stripes;
332 int nzones = 0; 328 int nzones = 0;
333 int i;
334 unsigned long index = logical >> PAGE_CACHE_SHIFT; 329 unsigned long index = logical >> PAGE_CACHE_SHIFT;
335 int dev_replace_is_ongoing; 330 int dev_replace_is_ongoing;
331 int have_zone = 0;
336 332
337 spin_lock(&fs_info->reada_lock); 333 spin_lock(&fs_info->reada_lock);
338 re = radix_tree_lookup(&fs_info->reada_tree, index); 334 re = radix_tree_lookup(&fs_info->reada_tree, index);
@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
375 struct reada_zone *zone; 371 struct reada_zone *zone;
376 372
377 dev = bbio->stripes[nzones].dev; 373 dev = bbio->stripes[nzones].dev;
374
375 /* cannot read ahead on missing device. */
376 if (!dev->bdev)
377 continue;
378
378 zone = reada_find_zone(fs_info, dev, logical, bbio); 379 zone = reada_find_zone(fs_info, dev, logical, bbio);
379 if (!zone) 380 if (!zone)
380 break; 381 continue;
381 382
382 re->zones[nzones] = zone; 383 re->zones[re->nzones++] = zone;
383 spin_lock(&zone->lock); 384 spin_lock(&zone->lock);
384 if (!zone->elems) 385 if (!zone->elems)
385 kref_get(&zone->refcnt); 386 kref_get(&zone->refcnt);
@@ -389,8 +390,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
389 kref_put(&zone->refcnt, reada_zone_release); 390 kref_put(&zone->refcnt, reada_zone_release);
390 spin_unlock(&fs_info->reada_lock); 391 spin_unlock(&fs_info->reada_lock);
391 } 392 }
392 re->nzones = nzones; 393 if (re->nzones == 0) {
393 if (nzones == 0) {
394 /* not a single zone found, error and out */ 394 /* not a single zone found, error and out */
395 goto error; 395 goto error;
396 } 396 }
@@ -415,8 +415,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
415 prev_dev = NULL; 415 prev_dev = NULL;
416 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( 416 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
417 &fs_info->dev_replace); 417 &fs_info->dev_replace);
418 for (i = 0; i < nzones; ++i) { 418 for (nzones = 0; nzones < re->nzones; ++nzones) {
419 dev = bbio->stripes[i].dev; 419 dev = re->zones[nzones]->device;
420
420 if (dev == prev_dev) { 421 if (dev == prev_dev) {
421 /* 422 /*
422 * in case of DUP, just add the first zone. As both 423 * in case of DUP, just add the first zone. As both
@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
427 */ 428 */
428 continue; 429 continue;
429 } 430 }
430 if (!dev->bdev) { 431 if (!dev->bdev)
431 /* 432 continue;
432 * cannot read ahead on missing device, but for RAID5/6, 433
433 * REQ_GET_READ_MIRRORS return 1. So don't skip missing
434 * device for such case.
435 */
436 if (nzones > 1)
437 continue;
438 }
439 if (dev_replace_is_ongoing && 434 if (dev_replace_is_ongoing &&
440 dev == fs_info->dev_replace.tgtdev) { 435 dev == fs_info->dev_replace.tgtdev) {
441 /* 436 /*
@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
447 prev_dev = dev; 442 prev_dev = dev;
448 ret = radix_tree_insert(&dev->reada_extents, index, re); 443 ret = radix_tree_insert(&dev->reada_extents, index, re);
449 if (ret) { 444 if (ret) {
450 while (--i >= 0) { 445 while (--nzones >= 0) {
451 dev = bbio->stripes[i].dev; 446 dev = re->zones[nzones]->device;
452 BUG_ON(dev == NULL); 447 BUG_ON(dev == NULL);
453 /* ignore whether the entry was inserted */ 448 /* ignore whether the entry was inserted */
454 radix_tree_delete(&dev->reada_extents, index); 449 radix_tree_delete(&dev->reada_extents, index);
@@ -459,18 +454,21 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
459 btrfs_dev_replace_unlock(&fs_info->dev_replace); 454 btrfs_dev_replace_unlock(&fs_info->dev_replace);
460 goto error; 455 goto error;
461 } 456 }
457 have_zone = 1;
462 } 458 }
463 spin_unlock(&fs_info->reada_lock); 459 spin_unlock(&fs_info->reada_lock);
464 btrfs_dev_replace_unlock(&fs_info->dev_replace); 460 btrfs_dev_replace_unlock(&fs_info->dev_replace);
465 461
462 if (!have_zone)
463 goto error;
464
466 btrfs_put_bbio(bbio); 465 btrfs_put_bbio(bbio);
467 return re; 466 return re;
468 467
469error: 468error:
470 while (nzones) { 469 for (nzones = 0; nzones < re->nzones; ++nzones) {
471 struct reada_zone *zone; 470 struct reada_zone *zone;
472 471
473 --nzones;
474 zone = re->zones[nzones]; 472 zone = re->zones[nzones];
475 kref_get(&zone->refcnt); 473 kref_get(&zone->refcnt);
476 spin_lock(&zone->lock); 474 spin_lock(&zone->lock);
@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
531 kref_put(&zone->refcnt, reada_zone_release); 529 kref_put(&zone->refcnt, reada_zone_release);
532 spin_unlock(&fs_info->reada_lock); 530 spin_unlock(&fs_info->reada_lock);
533 } 531 }
534 if (re->scheduled_for)
535 atomic_dec(&re->scheduled_for->reada_in_flight);
536 532
537 kfree(re); 533 kfree(re);
538} 534}
@@ -556,13 +552,13 @@ static void reada_control_release(struct kref *kref)
556} 552}
557 553
558static int reada_add_block(struct reada_control *rc, u64 logical, 554static int reada_add_block(struct reada_control *rc, u64 logical,
559 struct btrfs_key *top, int level, u64 generation) 555 struct btrfs_key *top, u64 generation)
560{ 556{
561 struct btrfs_root *root = rc->root; 557 struct btrfs_root *root = rc->root;
562 struct reada_extent *re; 558 struct reada_extent *re;
563 struct reada_extctl *rec; 559 struct reada_extctl *rec;
564 560
565 re = reada_find_extent(root, logical, top, level); /* takes one ref */ 561 re = reada_find_extent(root, logical, top); /* takes one ref */
566 if (!re) 562 if (!re)
567 return -1; 563 return -1;
568 564
@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
662 u64 logical; 658 u64 logical;
663 int ret; 659 int ret;
664 int i; 660 int i;
665 int need_kick = 0;
666 661
667 spin_lock(&fs_info->reada_lock); 662 spin_lock(&fs_info->reada_lock);
668 if (dev->reada_curr_zone == NULL) { 663 if (dev->reada_curr_zone == NULL) {
@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
679 */ 674 */
680 ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, 675 ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
681 dev->reada_next >> PAGE_CACHE_SHIFT, 1); 676 dev->reada_next >> PAGE_CACHE_SHIFT, 1);
682 if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { 677 if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
683 ret = reada_pick_zone(dev); 678 ret = reada_pick_zone(dev);
684 if (!ret) { 679 if (!ret) {
685 spin_unlock(&fs_info->reada_lock); 680 spin_unlock(&fs_info->reada_lock);
@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
698 693
699 spin_unlock(&fs_info->reada_lock); 694 spin_unlock(&fs_info->reada_lock);
700 695
696 spin_lock(&re->lock);
697 if (re->scheduled || list_empty(&re->extctl)) {
698 spin_unlock(&re->lock);
699 reada_extent_put(fs_info, re);
700 return 0;
701 }
702 re->scheduled = 1;
703 spin_unlock(&re->lock);
704
701 /* 705 /*
702 * find mirror num 706 * find mirror num
703 */ 707 */
@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
709 } 713 }
710 logical = re->logical; 714 logical = re->logical;
711 715
712 spin_lock(&re->lock);
713 if (re->scheduled_for == NULL) {
714 re->scheduled_for = dev;
715 need_kick = 1;
716 }
717 spin_unlock(&re->lock);
718
719 reada_extent_put(fs_info, re);
720
721 if (!need_kick)
722 return 0;
723
724 atomic_inc(&dev->reada_in_flight); 716 atomic_inc(&dev->reada_in_flight);
725 ret = reada_tree_block_flagged(fs_info->extent_root, logical, 717 ret = reada_tree_block_flagged(fs_info->extent_root, logical,
726 mirror_num, &eb); 718 mirror_num, &eb);
727 if (ret) 719 if (ret)
728 __readahead_hook(fs_info->extent_root, NULL, logical, ret); 720 __readahead_hook(fs_info, re, NULL, logical, ret);
729 else if (eb) 721 else if (eb)
730 __readahead_hook(fs_info->extent_root, eb, eb->start, ret); 722 __readahead_hook(fs_info, re, eb, eb->start, ret);
731 723
732 if (eb) 724 if (eb)
733 free_extent_buffer(eb); 725 free_extent_buffer(eb);
734 726
727 atomic_dec(&dev->reada_in_flight);
728 reada_extent_put(fs_info, re);
729
735 return 1; 730 return 1;
736 731
737} 732}
@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work)
752 set_task_ioprio(current, BTRFS_IOPRIO_READA); 747 set_task_ioprio(current, BTRFS_IOPRIO_READA);
753 __reada_start_machine(fs_info); 748 __reada_start_machine(fs_info);
754 set_task_ioprio(current, old_ioprio); 749 set_task_ioprio(current, old_ioprio);
750
751 atomic_dec(&fs_info->reada_works_cnt);
755} 752}
756 753
757static void __reada_start_machine(struct btrfs_fs_info *fs_info) 754static void __reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -783,8 +780,12 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
783 * enqueue to workers to finish it. This will distribute the load to 780 * enqueue to workers to finish it. This will distribute the load to
784 * the cores. 781 * the cores.
785 */ 782 */
786 for (i = 0; i < 2; ++i) 783 for (i = 0; i < 2; ++i) {
787 reada_start_machine(fs_info); 784 reada_start_machine(fs_info);
785 if (atomic_read(&fs_info->reada_works_cnt) >
786 BTRFS_MAX_MIRRORS * 2)
787 break;
788 }
788} 789}
789 790
790static void reada_start_machine(struct btrfs_fs_info *fs_info) 791static void reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
801 rmw->fs_info = fs_info; 802 rmw->fs_info = fs_info;
802 803
803 btrfs_queue_work(fs_info->readahead_workers, &rmw->work); 804 btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
805 atomic_inc(&fs_info->reada_works_cnt);
804} 806}
805 807
806#ifdef DEBUG 808#ifdef DEBUG
@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
848 if (ret == 0) 850 if (ret == 0)
849 break; 851 break;
850 printk(KERN_DEBUG 852 printk(KERN_DEBUG
851 " re: logical %llu size %u empty %d for %lld", 853 " re: logical %llu size %u empty %d scheduled %d",
852 re->logical, fs_info->tree_root->nodesize, 854 re->logical, fs_info->tree_root->nodesize,
853 list_empty(&re->extctl), re->scheduled_for ? 855 list_empty(&re->extctl), re->scheduled);
854 re->scheduled_for->devid : -1);
855 856
856 for (i = 0; i < re->nzones; ++i) { 857 for (i = 0; i < re->nzones; ++i) {
857 printk(KERN_CONT " zone %llu-%llu devs", 858 printk(KERN_CONT " zone %llu-%llu devs",
@@ -878,27 +879,21 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
878 index, 1); 879 index, 1);
879 if (ret == 0) 880 if (ret == 0)
880 break; 881 break;
881 if (!re->scheduled_for) { 882 if (!re->scheduled) {
882 index = (re->logical >> PAGE_CACHE_SHIFT) + 1; 883 index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
883 continue; 884 continue;
884 } 885 }
885 printk(KERN_DEBUG 886 printk(KERN_DEBUG
886 "re: logical %llu size %u list empty %d for %lld", 887 "re: logical %llu size %u list empty %d scheduled %d",
887 re->logical, fs_info->tree_root->nodesize, 888 re->logical, fs_info->tree_root->nodesize,
888 list_empty(&re->extctl), 889 list_empty(&re->extctl), re->scheduled);
889 re->scheduled_for ? re->scheduled_for->devid : -1);
890 for (i = 0; i < re->nzones; ++i) { 890 for (i = 0; i < re->nzones; ++i) {
891 printk(KERN_CONT " zone %llu-%llu devs", 891 printk(KERN_CONT " zone %llu-%llu devs",
892 re->zones[i]->start, 892 re->zones[i]->start,
893 re->zones[i]->end); 893 re->zones[i]->end);
894 for (i = 0; i < re->nzones; ++i) { 894 for (j = 0; j < re->zones[i]->ndevs; ++j) {
895 printk(KERN_CONT " zone %llu-%llu devs", 895 printk(KERN_CONT " %lld",
896 re->zones[i]->start, 896 re->zones[i]->devs[j]->devid);
897 re->zones[i]->end);
898 for (j = 0; j < re->zones[i]->ndevs; ++j) {
899 printk(KERN_CONT " %lld",
900 re->zones[i]->devs[j]->devid);
901 }
902 } 897 }
903 } 898 }
904 printk(KERN_CONT "\n"); 899 printk(KERN_CONT "\n");
@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
917 struct reada_control *rc; 912 struct reada_control *rc;
918 u64 start; 913 u64 start;
919 u64 generation; 914 u64 generation;
920 int level;
921 int ret; 915 int ret;
922 struct extent_buffer *node; 916 struct extent_buffer *node;
923 static struct btrfs_key max_key = { 917 static struct btrfs_key max_key = {
@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
940 934
941 node = btrfs_root_node(root); 935 node = btrfs_root_node(root);
942 start = node->start; 936 start = node->start;
943 level = btrfs_header_level(node);
944 generation = btrfs_header_generation(node); 937 generation = btrfs_header_generation(node);
945 free_extent_buffer(node); 938 free_extent_buffer(node);
946 939
947 ret = reada_add_block(rc, start, &max_key, level, generation); 940 ret = reada_add_block(rc, start, &max_key, generation);
948 if (ret) { 941 if (ret) {
949 kfree(rc); 942 kfree(rc);
950 return ERR_PTR(ret); 943 return ERR_PTR(ret);
@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
959int btrfs_reada_wait(void *handle) 952int btrfs_reada_wait(void *handle)
960{ 953{
961 struct reada_control *rc = handle; 954 struct reada_control *rc = handle;
955 struct btrfs_fs_info *fs_info = rc->root->fs_info;
962 956
963 while (atomic_read(&rc->elems)) { 957 while (atomic_read(&rc->elems)) {
958 if (!atomic_read(&fs_info->reada_works_cnt))
959 reada_start_machine(fs_info);
964 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 960 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
965 5 * HZ); 961 5 * HZ);
966 dump_devs(rc->root->fs_info, 962 dump_devs(rc->root->fs_info,
@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle)
977int btrfs_reada_wait(void *handle) 973int btrfs_reada_wait(void *handle)
978{ 974{
979 struct reada_control *rc = handle; 975 struct reada_control *rc = handle;
976 struct btrfs_fs_info *fs_info = rc->root->fs_info;
980 977
981 while (atomic_read(&rc->elems)) { 978 while (atomic_read(&rc->elems)) {
982 wait_event(rc->wait, atomic_read(&rc->elems) == 0); 979 if (!atomic_read(&fs_info->reada_works_cnt))
980 reada_start_machine(fs_info);
981 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
982 (HZ + 9) / 10);
983 } 983 }
984 984
985 kref_put(&rc->refcnt, reada_control_release); 985 kref_put(&rc->refcnt, reada_control_release);