aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/reada.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/reada.c')
-rw-r--r--fs/btrfs/reada.c268
1 files changed, 134 insertions, 134 deletions
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 619f92963e27..b892914968c1 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -72,7 +72,7 @@ struct reada_extent {
72 spinlock_t lock; 72 spinlock_t lock;
73 struct reada_zone *zones[BTRFS_MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
74 int nzones; 74 int nzones;
75 struct btrfs_device *scheduled_for; 75 int scheduled;
76}; 76};
77 77
78struct reada_zone { 78struct reada_zone {
@@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
101static void __reada_start_machine(struct btrfs_fs_info *fs_info); 101static void __reada_start_machine(struct btrfs_fs_info *fs_info);
102 102
103static int reada_add_block(struct reada_control *rc, u64 logical, 103static int reada_add_block(struct reada_control *rc, u64 logical,
104 struct btrfs_key *top, int level, u64 generation); 104 struct btrfs_key *top, u64 generation);
105 105
106/* recurses */ 106/* recurses */
107/* in case of err, eb might be NULL */ 107/* in case of err, eb might be NULL */
108static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 108static void __readahead_hook(struct btrfs_fs_info *fs_info,
109 u64 start, int err) 109 struct reada_extent *re, struct extent_buffer *eb,
110 u64 start, int err)
110{ 111{
111 int level = 0; 112 int level = 0;
112 int nritems; 113 int nritems;
113 int i; 114 int i;
114 u64 bytenr; 115 u64 bytenr;
115 u64 generation; 116 u64 generation;
116 struct reada_extent *re;
117 struct btrfs_fs_info *fs_info = root->fs_info;
118 struct list_head list; 117 struct list_head list;
119 unsigned long index = start >> PAGE_CACHE_SHIFT;
120 struct btrfs_device *for_dev;
121 118
122 if (eb) 119 if (eb)
123 level = btrfs_header_level(eb); 120 level = btrfs_header_level(eb);
124 121
125 /* find extent */
126 spin_lock(&fs_info->reada_lock);
127 re = radix_tree_lookup(&fs_info->reada_tree, index);
128 if (re)
129 re->refcnt++;
130 spin_unlock(&fs_info->reada_lock);
131
132 if (!re)
133 return -1;
134
135 spin_lock(&re->lock); 122 spin_lock(&re->lock);
136 /* 123 /*
137 * just take the full list from the extent. afterwards we 124 * just take the full list from the extent. afterwards we
138 * don't need the lock anymore 125 * don't need the lock anymore
139 */ 126 */
140 list_replace_init(&re->extctl, &list); 127 list_replace_init(&re->extctl, &list);
141 for_dev = re->scheduled_for; 128 re->scheduled = 0;
142 re->scheduled_for = NULL;
143 spin_unlock(&re->lock); 129 spin_unlock(&re->lock);
144 130
145 if (err == 0) { 131 /*
146 nritems = level ? btrfs_header_nritems(eb) : 0; 132 * this is the error case, the extent buffer has not been
147 generation = btrfs_header_generation(eb); 133 * read correctly. We won't access anything from it and
148 /* 134 * just cleanup our data structures. Effectively this will
149 * FIXME: currently we just set nritems to 0 if this is a leaf, 135 * cut the branch below this node from read ahead.
150 * effectively ignoring the content. In a next step we could 136 */
151 * trigger more readahead depending from the content, e.g. 137 if (err)
152 * fetch the checksums for the extents in the leaf. 138 goto cleanup;
153 */
154 } else {
155 /*
156 * this is the error case, the extent buffer has not been
157 * read correctly. We won't access anything from it and
158 * just cleanup our data structures. Effectively this will
159 * cut the branch below this node from read ahead.
160 */
161 nritems = 0;
162 generation = 0;
163 }
164 139
140 /*
141 * FIXME: currently we just set nritems to 0 if this is a leaf,
142 * effectively ignoring the content. In a next step we could
143 * trigger more readahead depending from the content, e.g.
144 * fetch the checksums for the extents in the leaf.
145 */
146 if (!level)
147 goto cleanup;
148
149 nritems = btrfs_header_nritems(eb);
150 generation = btrfs_header_generation(eb);
165 for (i = 0; i < nritems; i++) { 151 for (i = 0; i < nritems; i++) {
166 struct reada_extctl *rec; 152 struct reada_extctl *rec;
167 u64 n_gen; 153 u64 n_gen;
@@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
188 */ 174 */
189#ifdef DEBUG 175#ifdef DEBUG
190 if (rec->generation != generation) { 176 if (rec->generation != generation) {
191 btrfs_debug(root->fs_info, 177 btrfs_debug(fs_info,
192 "generation mismatch for (%llu,%d,%llu) %llu != %llu", 178 "generation mismatch for (%llu,%d,%llu) %llu != %llu",
193 key.objectid, key.type, key.offset, 179 key.objectid, key.type, key.offset,
194 rec->generation, generation); 180 rec->generation, generation);
195 } 181 }
196#endif 182#endif
197 if (rec->generation == generation && 183 if (rec->generation == generation &&
198 btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && 184 btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
199 btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) 185 btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
200 reada_add_block(rc, bytenr, &next_key, 186 reada_add_block(rc, bytenr, &next_key, n_gen);
201 level - 1, n_gen);
202 } 187 }
203 } 188 }
189
190cleanup:
204 /* 191 /*
205 * free extctl records 192 * free extctl records
206 */ 193 */
@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
222 209
223 reada_extent_put(fs_info, re); /* one ref for each entry */ 210 reada_extent_put(fs_info, re); /* one ref for each entry */
224 } 211 }
225 reada_extent_put(fs_info, re); /* our ref */
226 if (for_dev)
227 atomic_dec(&for_dev->reada_in_flight);
228 212
229 return 0; 213 return;
230} 214}
231 215
232/* 216/*
233 * start is passed separately in case eb in NULL, which may be the case with 217 * start is passed separately in case eb in NULL, which may be the case with
234 * failed I/O 218 * failed I/O
235 */ 219 */
236int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, 220int btree_readahead_hook(struct btrfs_fs_info *fs_info,
237 u64 start, int err) 221 struct extent_buffer *eb, u64 start, int err)
238{ 222{
239 int ret; 223 int ret = 0;
224 struct reada_extent *re;
240 225
241 ret = __readahead_hook(root, eb, start, err); 226 /* find extent */
227 spin_lock(&fs_info->reada_lock);
228 re = radix_tree_lookup(&fs_info->reada_tree,
229 start >> PAGE_CACHE_SHIFT);
230 if (re)
231 re->refcnt++;
232 spin_unlock(&fs_info->reada_lock);
233 if (!re) {
234 ret = -1;
235 goto start_machine;
236 }
242 237
243 reada_start_machine(root->fs_info); 238 __readahead_hook(fs_info, re, eb, start, err);
239 reada_extent_put(fs_info, re); /* our ref */
244 240
241start_machine:
242 reada_start_machine(fs_info);
245 return ret; 243 return ret;
246} 244}
247 245
@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
260 spin_lock(&fs_info->reada_lock); 258 spin_lock(&fs_info->reada_lock);
261 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, 259 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
262 logical >> PAGE_CACHE_SHIFT, 1); 260 logical >> PAGE_CACHE_SHIFT, 1);
263 if (ret == 1) 261 if (ret == 1 && logical >= zone->start && logical <= zone->end) {
264 kref_get(&zone->refcnt); 262 kref_get(&zone->refcnt);
265 spin_unlock(&fs_info->reada_lock);
266
267 if (ret == 1) {
268 if (logical >= zone->start && logical < zone->end)
269 return zone;
270 spin_lock(&fs_info->reada_lock);
271 kref_put(&zone->refcnt, reada_zone_release);
272 spin_unlock(&fs_info->reada_lock); 263 spin_unlock(&fs_info->reada_lock);
264 return zone;
273 } 265 }
274 266
267 spin_unlock(&fs_info->reada_lock);
268
275 cache = btrfs_lookup_block_group(fs_info, logical); 269 cache = btrfs_lookup_block_group(fs_info, logical);
276 if (!cache) 270 if (!cache)
277 return NULL; 271 return NULL;
@@ -280,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
280 end = start + cache->key.offset - 1; 274 end = start + cache->key.offset - 1;
281 btrfs_put_block_group(cache); 275 btrfs_put_block_group(cache);
282 276
283 zone = kzalloc(sizeof(*zone), GFP_NOFS); 277 zone = kzalloc(sizeof(*zone), GFP_KERNEL);
284 if (!zone) 278 if (!zone)
285 return NULL; 279 return NULL;
286 280
@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
307 kfree(zone); 301 kfree(zone);
308 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, 302 ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
309 logical >> PAGE_CACHE_SHIFT, 1); 303 logical >> PAGE_CACHE_SHIFT, 1);
310 if (ret == 1) 304 if (ret == 1 && logical >= zone->start && logical <= zone->end)
311 kref_get(&zone->refcnt); 305 kref_get(&zone->refcnt);
306 else
307 zone = NULL;
312 } 308 }
313 spin_unlock(&fs_info->reada_lock); 309 spin_unlock(&fs_info->reada_lock);
314 310
@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
317 313
318static struct reada_extent *reada_find_extent(struct btrfs_root *root, 314static struct reada_extent *reada_find_extent(struct btrfs_root *root,
319 u64 logical, 315 u64 logical,
320 struct btrfs_key *top, int level) 316 struct btrfs_key *top)
321{ 317{
322 int ret; 318 int ret;
323 struct reada_extent *re = NULL; 319 struct reada_extent *re = NULL;
@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
330 u64 length; 326 u64 length;
331 int real_stripes; 327 int real_stripes;
332 int nzones = 0; 328 int nzones = 0;
333 int i;
334 unsigned long index = logical >> PAGE_CACHE_SHIFT; 329 unsigned long index = logical >> PAGE_CACHE_SHIFT;
335 int dev_replace_is_ongoing; 330 int dev_replace_is_ongoing;
331 int have_zone = 0;
336 332
337 spin_lock(&fs_info->reada_lock); 333 spin_lock(&fs_info->reada_lock);
338 re = radix_tree_lookup(&fs_info->reada_tree, index); 334 re = radix_tree_lookup(&fs_info->reada_tree, index);
@@ -343,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
343 if (re) 339 if (re)
344 return re; 340 return re;
345 341
346 re = kzalloc(sizeof(*re), GFP_NOFS); 342 re = kzalloc(sizeof(*re), GFP_KERNEL);
347 if (!re) 343 if (!re)
348 return NULL; 344 return NULL;
349 345
@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
375 struct reada_zone *zone; 371 struct reada_zone *zone;
376 372
377 dev = bbio->stripes[nzones].dev; 373 dev = bbio->stripes[nzones].dev;
374
375 /* cannot read ahead on missing device. */
376 if (!dev->bdev)
377 continue;
378
378 zone = reada_find_zone(fs_info, dev, logical, bbio); 379 zone = reada_find_zone(fs_info, dev, logical, bbio);
379 if (!zone) 380 if (!zone)
380 break; 381 continue;
381 382
382 re->zones[nzones] = zone; 383 re->zones[re->nzones++] = zone;
383 spin_lock(&zone->lock); 384 spin_lock(&zone->lock);
384 if (!zone->elems) 385 if (!zone->elems)
385 kref_get(&zone->refcnt); 386 kref_get(&zone->refcnt);
@@ -389,14 +390,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
389 kref_put(&zone->refcnt, reada_zone_release); 390 kref_put(&zone->refcnt, reada_zone_release);
390 spin_unlock(&fs_info->reada_lock); 391 spin_unlock(&fs_info->reada_lock);
391 } 392 }
392 re->nzones = nzones; 393 if (re->nzones == 0) {
393 if (nzones == 0) {
394 /* not a single zone found, error and out */ 394 /* not a single zone found, error and out */
395 goto error; 395 goto error;
396 } 396 }
397 397
398 /* insert extent in reada_tree + all per-device trees, all or nothing */ 398 /* insert extent in reada_tree + all per-device trees, all or nothing */
399 btrfs_dev_replace_lock(&fs_info->dev_replace); 399 btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
400 spin_lock(&fs_info->reada_lock); 400 spin_lock(&fs_info->reada_lock);
401 ret = radix_tree_insert(&fs_info->reada_tree, index, re); 401 ret = radix_tree_insert(&fs_info->reada_tree, index, re);
402 if (ret == -EEXIST) { 402 if (ret == -EEXIST) {
@@ -404,19 +404,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
404 BUG_ON(!re_exist); 404 BUG_ON(!re_exist);
405 re_exist->refcnt++; 405 re_exist->refcnt++;
406 spin_unlock(&fs_info->reada_lock); 406 spin_unlock(&fs_info->reada_lock);
407 btrfs_dev_replace_unlock(&fs_info->dev_replace); 407 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
408 goto error; 408 goto error;
409 } 409 }
410 if (ret) { 410 if (ret) {
411 spin_unlock(&fs_info->reada_lock); 411 spin_unlock(&fs_info->reada_lock);
412 btrfs_dev_replace_unlock(&fs_info->dev_replace); 412 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
413 goto error; 413 goto error;
414 } 414 }
415 prev_dev = NULL; 415 prev_dev = NULL;
416 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( 416 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
417 &fs_info->dev_replace); 417 &fs_info->dev_replace);
418 for (i = 0; i < nzones; ++i) { 418 for (nzones = 0; nzones < re->nzones; ++nzones) {
419 dev = bbio->stripes[i].dev; 419 dev = re->zones[nzones]->device;
420
420 if (dev == prev_dev) { 421 if (dev == prev_dev) {
421 /* 422 /*
422 * in case of DUP, just add the first zone. As both 423 * in case of DUP, just add the first zone. As both
@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
427 */ 428 */
428 continue; 429 continue;
429 } 430 }
430 if (!dev->bdev) { 431 if (!dev->bdev)
431 /* 432 continue;
432 * cannot read ahead on missing device, but for RAID5/6, 433
433 * REQ_GET_READ_MIRRORS return 1. So don't skip missing
434 * device for such case.
435 */
436 if (nzones > 1)
437 continue;
438 }
439 if (dev_replace_is_ongoing && 434 if (dev_replace_is_ongoing &&
440 dev == fs_info->dev_replace.tgtdev) { 435 dev == fs_info->dev_replace.tgtdev) {
441 /* 436 /*
@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
447 prev_dev = dev; 442 prev_dev = dev;
448 ret = radix_tree_insert(&dev->reada_extents, index, re); 443 ret = radix_tree_insert(&dev->reada_extents, index, re);
449 if (ret) { 444 if (ret) {
450 while (--i >= 0) { 445 while (--nzones >= 0) {
451 dev = bbio->stripes[i].dev; 446 dev = re->zones[nzones]->device;
452 BUG_ON(dev == NULL); 447 BUG_ON(dev == NULL);
453 /* ignore whether the entry was inserted */ 448 /* ignore whether the entry was inserted */
454 radix_tree_delete(&dev->reada_extents, index); 449 radix_tree_delete(&dev->reada_extents, index);
@@ -456,21 +451,24 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
456 BUG_ON(fs_info == NULL); 451 BUG_ON(fs_info == NULL);
457 radix_tree_delete(&fs_info->reada_tree, index); 452 radix_tree_delete(&fs_info->reada_tree, index);
458 spin_unlock(&fs_info->reada_lock); 453 spin_unlock(&fs_info->reada_lock);
459 btrfs_dev_replace_unlock(&fs_info->dev_replace); 454 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
460 goto error; 455 goto error;
461 } 456 }
457 have_zone = 1;
462 } 458 }
463 spin_unlock(&fs_info->reada_lock); 459 spin_unlock(&fs_info->reada_lock);
464 btrfs_dev_replace_unlock(&fs_info->dev_replace); 460 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
461
462 if (!have_zone)
463 goto error;
465 464
466 btrfs_put_bbio(bbio); 465 btrfs_put_bbio(bbio);
467 return re; 466 return re;
468 467
469error: 468error:
470 while (nzones) { 469 for (nzones = 0; nzones < re->nzones; ++nzones) {
471 struct reada_zone *zone; 470 struct reada_zone *zone;
472 471
473 --nzones;
474 zone = re->zones[nzones]; 472 zone = re->zones[nzones];
475 kref_get(&zone->refcnt); 473 kref_get(&zone->refcnt);
476 spin_lock(&zone->lock); 474 spin_lock(&zone->lock);
@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
531 kref_put(&zone->refcnt, reada_zone_release); 529 kref_put(&zone->refcnt, reada_zone_release);
532 spin_unlock(&fs_info->reada_lock); 530 spin_unlock(&fs_info->reada_lock);
533 } 531 }
534 if (re->scheduled_for)
535 atomic_dec(&re->scheduled_for->reada_in_flight);
536 532
537 kfree(re); 533 kfree(re);
538} 534}
@@ -556,17 +552,17 @@ static void reada_control_release(struct kref *kref)
556} 552}
557 553
558static int reada_add_block(struct reada_control *rc, u64 logical, 554static int reada_add_block(struct reada_control *rc, u64 logical,
559 struct btrfs_key *top, int level, u64 generation) 555 struct btrfs_key *top, u64 generation)
560{ 556{
561 struct btrfs_root *root = rc->root; 557 struct btrfs_root *root = rc->root;
562 struct reada_extent *re; 558 struct reada_extent *re;
563 struct reada_extctl *rec; 559 struct reada_extctl *rec;
564 560
565 re = reada_find_extent(root, logical, top, level); /* takes one ref */ 561 re = reada_find_extent(root, logical, top); /* takes one ref */
566 if (!re) 562 if (!re)
567 return -1; 563 return -1;
568 564
569 rec = kzalloc(sizeof(*rec), GFP_NOFS); 565 rec = kzalloc(sizeof(*rec), GFP_KERNEL);
570 if (!rec) { 566 if (!rec) {
571 reada_extent_put(root->fs_info, re); 567 reada_extent_put(root->fs_info, re);
572 return -ENOMEM; 568 return -ENOMEM;
@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
662 u64 logical; 658 u64 logical;
663 int ret; 659 int ret;
664 int i; 660 int i;
665 int need_kick = 0;
666 661
667 spin_lock(&fs_info->reada_lock); 662 spin_lock(&fs_info->reada_lock);
668 if (dev->reada_curr_zone == NULL) { 663 if (dev->reada_curr_zone == NULL) {
@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
679 */ 674 */
680 ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, 675 ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
681 dev->reada_next >> PAGE_CACHE_SHIFT, 1); 676 dev->reada_next >> PAGE_CACHE_SHIFT, 1);
682 if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { 677 if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
683 ret = reada_pick_zone(dev); 678 ret = reada_pick_zone(dev);
684 if (!ret) { 679 if (!ret) {
685 spin_unlock(&fs_info->reada_lock); 680 spin_unlock(&fs_info->reada_lock);
@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
698 693
699 spin_unlock(&fs_info->reada_lock); 694 spin_unlock(&fs_info->reada_lock);
700 695
696 spin_lock(&re->lock);
697 if (re->scheduled || list_empty(&re->extctl)) {
698 spin_unlock(&re->lock);
699 reada_extent_put(fs_info, re);
700 return 0;
701 }
702 re->scheduled = 1;
703 spin_unlock(&re->lock);
704
701 /* 705 /*
702 * find mirror num 706 * find mirror num
703 */ 707 */
@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
709 } 713 }
710 logical = re->logical; 714 logical = re->logical;
711 715
712 spin_lock(&re->lock);
713 if (re->scheduled_for == NULL) {
714 re->scheduled_for = dev;
715 need_kick = 1;
716 }
717 spin_unlock(&re->lock);
718
719 reada_extent_put(fs_info, re);
720
721 if (!need_kick)
722 return 0;
723
724 atomic_inc(&dev->reada_in_flight); 716 atomic_inc(&dev->reada_in_flight);
725 ret = reada_tree_block_flagged(fs_info->extent_root, logical, 717 ret = reada_tree_block_flagged(fs_info->extent_root, logical,
726 mirror_num, &eb); 718 mirror_num, &eb);
727 if (ret) 719 if (ret)
728 __readahead_hook(fs_info->extent_root, NULL, logical, ret); 720 __readahead_hook(fs_info, re, NULL, logical, ret);
729 else if (eb) 721 else if (eb)
730 __readahead_hook(fs_info->extent_root, eb, eb->start, ret); 722 __readahead_hook(fs_info, re, eb, eb->start, ret);
731 723
732 if (eb) 724 if (eb)
733 free_extent_buffer(eb); 725 free_extent_buffer(eb);
734 726
727 atomic_dec(&dev->reada_in_flight);
728 reada_extent_put(fs_info, re);
729
735 return 1; 730 return 1;
736 731
737} 732}
@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work)
752 set_task_ioprio(current, BTRFS_IOPRIO_READA); 747 set_task_ioprio(current, BTRFS_IOPRIO_READA);
753 __reada_start_machine(fs_info); 748 __reada_start_machine(fs_info);
754 set_task_ioprio(current, old_ioprio); 749 set_task_ioprio(current, old_ioprio);
750
751 atomic_dec(&fs_info->reada_works_cnt);
755} 752}
756 753
757static void __reada_start_machine(struct btrfs_fs_info *fs_info) 754static void __reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -783,15 +780,19 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
783 * enqueue to workers to finish it. This will distribute the load to 780 * enqueue to workers to finish it. This will distribute the load to
784 * the cores. 781 * the cores.
785 */ 782 */
786 for (i = 0; i < 2; ++i) 783 for (i = 0; i < 2; ++i) {
787 reada_start_machine(fs_info); 784 reada_start_machine(fs_info);
785 if (atomic_read(&fs_info->reada_works_cnt) >
786 BTRFS_MAX_MIRRORS * 2)
787 break;
788 }
788} 789}
789 790
790static void reada_start_machine(struct btrfs_fs_info *fs_info) 791static void reada_start_machine(struct btrfs_fs_info *fs_info)
791{ 792{
792 struct reada_machine_work *rmw; 793 struct reada_machine_work *rmw;
793 794
794 rmw = kzalloc(sizeof(*rmw), GFP_NOFS); 795 rmw = kzalloc(sizeof(*rmw), GFP_KERNEL);
795 if (!rmw) { 796 if (!rmw) {
796 /* FIXME we cannot handle this properly right now */ 797 /* FIXME we cannot handle this properly right now */
797 BUG(); 798 BUG();
@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
801 rmw->fs_info = fs_info; 802 rmw->fs_info = fs_info;
802 803
803 btrfs_queue_work(fs_info->readahead_workers, &rmw->work); 804 btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
805 atomic_inc(&fs_info->reada_works_cnt);
804} 806}
805 807
806#ifdef DEBUG 808#ifdef DEBUG
@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
848 if (ret == 0) 850 if (ret == 0)
849 break; 851 break;
850 printk(KERN_DEBUG 852 printk(KERN_DEBUG
851 " re: logical %llu size %u empty %d for %lld", 853 " re: logical %llu size %u empty %d scheduled %d",
852 re->logical, fs_info->tree_root->nodesize, 854 re->logical, fs_info->tree_root->nodesize,
853 list_empty(&re->extctl), re->scheduled_for ? 855 list_empty(&re->extctl), re->scheduled);
854 re->scheduled_for->devid : -1);
855 856
856 for (i = 0; i < re->nzones; ++i) { 857 for (i = 0; i < re->nzones; ++i) {
857 printk(KERN_CONT " zone %llu-%llu devs", 858 printk(KERN_CONT " zone %llu-%llu devs",
@@ -878,27 +879,21 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
878 index, 1); 879 index, 1);
879 if (ret == 0) 880 if (ret == 0)
880 break; 881 break;
881 if (!re->scheduled_for) { 882 if (!re->scheduled) {
882 index = (re->logical >> PAGE_CACHE_SHIFT) + 1; 883 index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
883 continue; 884 continue;
884 } 885 }
885 printk(KERN_DEBUG 886 printk(KERN_DEBUG
886 "re: logical %llu size %u list empty %d for %lld", 887 "re: logical %llu size %u list empty %d scheduled %d",
887 re->logical, fs_info->tree_root->nodesize, 888 re->logical, fs_info->tree_root->nodesize,
888 list_empty(&re->extctl), 889 list_empty(&re->extctl), re->scheduled);
889 re->scheduled_for ? re->scheduled_for->devid : -1);
890 for (i = 0; i < re->nzones; ++i) { 890 for (i = 0; i < re->nzones; ++i) {
891 printk(KERN_CONT " zone %llu-%llu devs", 891 printk(KERN_CONT " zone %llu-%llu devs",
892 re->zones[i]->start, 892 re->zones[i]->start,
893 re->zones[i]->end); 893 re->zones[i]->end);
894 for (i = 0; i < re->nzones; ++i) { 894 for (j = 0; j < re->zones[i]->ndevs; ++j) {
895 printk(KERN_CONT " zone %llu-%llu devs", 895 printk(KERN_CONT " %lld",
896 re->zones[i]->start, 896 re->zones[i]->devs[j]->devid);
897 re->zones[i]->end);
898 for (j = 0; j < re->zones[i]->ndevs; ++j) {
899 printk(KERN_CONT " %lld",
900 re->zones[i]->devs[j]->devid);
901 }
902 } 897 }
903 } 898 }
904 printk(KERN_CONT "\n"); 899 printk(KERN_CONT "\n");
@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
917 struct reada_control *rc; 912 struct reada_control *rc;
918 u64 start; 913 u64 start;
919 u64 generation; 914 u64 generation;
920 int level;
921 int ret; 915 int ret;
922 struct extent_buffer *node; 916 struct extent_buffer *node;
923 static struct btrfs_key max_key = { 917 static struct btrfs_key max_key = {
@@ -926,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
926 .offset = (u64)-1 920 .offset = (u64)-1
927 }; 921 };
928 922
929 rc = kzalloc(sizeof(*rc), GFP_NOFS); 923 rc = kzalloc(sizeof(*rc), GFP_KERNEL);
930 if (!rc) 924 if (!rc)
931 return ERR_PTR(-ENOMEM); 925 return ERR_PTR(-ENOMEM);
932 926
@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
940 934
941 node = btrfs_root_node(root); 935 node = btrfs_root_node(root);
942 start = node->start; 936 start = node->start;
943 level = btrfs_header_level(node);
944 generation = btrfs_header_generation(node); 937 generation = btrfs_header_generation(node);
945 free_extent_buffer(node); 938 free_extent_buffer(node);
946 939
947 ret = reada_add_block(rc, start, &max_key, level, generation); 940 ret = reada_add_block(rc, start, &max_key, generation);
948 if (ret) { 941 if (ret) {
949 kfree(rc); 942 kfree(rc);
950 return ERR_PTR(ret); 943 return ERR_PTR(ret);
@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
959int btrfs_reada_wait(void *handle) 952int btrfs_reada_wait(void *handle)
960{ 953{
961 struct reada_control *rc = handle; 954 struct reada_control *rc = handle;
955 struct btrfs_fs_info *fs_info = rc->root->fs_info;
962 956
963 while (atomic_read(&rc->elems)) { 957 while (atomic_read(&rc->elems)) {
958 if (!atomic_read(&fs_info->reada_works_cnt))
959 reada_start_machine(fs_info);
964 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 960 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
965 5 * HZ); 961 5 * HZ);
966 dump_devs(rc->root->fs_info, 962 dump_devs(rc->root->fs_info,
@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle)
977int btrfs_reada_wait(void *handle) 973int btrfs_reada_wait(void *handle)
978{ 974{
979 struct reada_control *rc = handle; 975 struct reada_control *rc = handle;
976 struct btrfs_fs_info *fs_info = rc->root->fs_info;
980 977
981 while (atomic_read(&rc->elems)) { 978 while (atomic_read(&rc->elems)) {
982 wait_event(rc->wait, atomic_read(&rc->elems) == 0); 979 if (!atomic_read(&fs_info->reada_works_cnt))
980 reada_start_machine(fs_info);
981 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
982 (HZ + 9) / 10);
983 } 983 }
984 984
985 kref_put(&rc->refcnt, reada_control_release); 985 kref_put(&rc->refcnt, reada_control_release);