diff options
Diffstat (limited to 'fs/btrfs/reada.c')
-rw-r--r-- | fs/btrfs/reada.c | 268 |
1 files changed, 134 insertions, 134 deletions
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 619f92963e27..b892914968c1 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -72,7 +72,7 @@ struct reada_extent { | |||
72 | spinlock_t lock; | 72 | spinlock_t lock; |
73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; | 73 | struct reada_zone *zones[BTRFS_MAX_MIRRORS]; |
74 | int nzones; | 74 | int nzones; |
75 | struct btrfs_device *scheduled_for; | 75 | int scheduled; |
76 | }; | 76 | }; |
77 | 77 | ||
78 | struct reada_zone { | 78 | struct reada_zone { |
@@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info); | |||
101 | static void __reada_start_machine(struct btrfs_fs_info *fs_info); | 101 | static void __reada_start_machine(struct btrfs_fs_info *fs_info); |
102 | 102 | ||
103 | static int reada_add_block(struct reada_control *rc, u64 logical, | 103 | static int reada_add_block(struct reada_control *rc, u64 logical, |
104 | struct btrfs_key *top, int level, u64 generation); | 104 | struct btrfs_key *top, u64 generation); |
105 | 105 | ||
106 | /* recurses */ | 106 | /* recurses */ |
107 | /* in case of err, eb might be NULL */ | 107 | /* in case of err, eb might be NULL */ |
108 | static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 108 | static void __readahead_hook(struct btrfs_fs_info *fs_info, |
109 | u64 start, int err) | 109 | struct reada_extent *re, struct extent_buffer *eb, |
110 | u64 start, int err) | ||
110 | { | 111 | { |
111 | int level = 0; | 112 | int level = 0; |
112 | int nritems; | 113 | int nritems; |
113 | int i; | 114 | int i; |
114 | u64 bytenr; | 115 | u64 bytenr; |
115 | u64 generation; | 116 | u64 generation; |
116 | struct reada_extent *re; | ||
117 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
118 | struct list_head list; | 117 | struct list_head list; |
119 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
120 | struct btrfs_device *for_dev; | ||
121 | 118 | ||
122 | if (eb) | 119 | if (eb) |
123 | level = btrfs_header_level(eb); | 120 | level = btrfs_header_level(eb); |
124 | 121 | ||
125 | /* find extent */ | ||
126 | spin_lock(&fs_info->reada_lock); | ||
127 | re = radix_tree_lookup(&fs_info->reada_tree, index); | ||
128 | if (re) | ||
129 | re->refcnt++; | ||
130 | spin_unlock(&fs_info->reada_lock); | ||
131 | |||
132 | if (!re) | ||
133 | return -1; | ||
134 | |||
135 | spin_lock(&re->lock); | 122 | spin_lock(&re->lock); |
136 | /* | 123 | /* |
137 | * just take the full list from the extent. afterwards we | 124 | * just take the full list from the extent. afterwards we |
138 | * don't need the lock anymore | 125 | * don't need the lock anymore |
139 | */ | 126 | */ |
140 | list_replace_init(&re->extctl, &list); | 127 | list_replace_init(&re->extctl, &list); |
141 | for_dev = re->scheduled_for; | 128 | re->scheduled = 0; |
142 | re->scheduled_for = NULL; | ||
143 | spin_unlock(&re->lock); | 129 | spin_unlock(&re->lock); |
144 | 130 | ||
145 | if (err == 0) { | 131 | /* |
146 | nritems = level ? btrfs_header_nritems(eb) : 0; | 132 | * this is the error case, the extent buffer has not been |
147 | generation = btrfs_header_generation(eb); | 133 | * read correctly. We won't access anything from it and |
148 | /* | 134 | * just cleanup our data structures. Effectively this will |
149 | * FIXME: currently we just set nritems to 0 if this is a leaf, | 135 | * cut the branch below this node from read ahead. |
150 | * effectively ignoring the content. In a next step we could | 136 | */ |
151 | * trigger more readahead depending from the content, e.g. | 137 | if (err) |
152 | * fetch the checksums for the extents in the leaf. | 138 | goto cleanup; |
153 | */ | ||
154 | } else { | ||
155 | /* | ||
156 | * this is the error case, the extent buffer has not been | ||
157 | * read correctly. We won't access anything from it and | ||
158 | * just cleanup our data structures. Effectively this will | ||
159 | * cut the branch below this node from read ahead. | ||
160 | */ | ||
161 | nritems = 0; | ||
162 | generation = 0; | ||
163 | } | ||
164 | 139 | ||
140 | /* | ||
141 | * FIXME: currently we just set nritems to 0 if this is a leaf, | ||
142 | * effectively ignoring the content. In a next step we could | ||
143 | * trigger more readahead depending from the content, e.g. | ||
144 | * fetch the checksums for the extents in the leaf. | ||
145 | */ | ||
146 | if (!level) | ||
147 | goto cleanup; | ||
148 | |||
149 | nritems = btrfs_header_nritems(eb); | ||
150 | generation = btrfs_header_generation(eb); | ||
165 | for (i = 0; i < nritems; i++) { | 151 | for (i = 0; i < nritems; i++) { |
166 | struct reada_extctl *rec; | 152 | struct reada_extctl *rec; |
167 | u64 n_gen; | 153 | u64 n_gen; |
@@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
188 | */ | 174 | */ |
189 | #ifdef DEBUG | 175 | #ifdef DEBUG |
190 | if (rec->generation != generation) { | 176 | if (rec->generation != generation) { |
191 | btrfs_debug(root->fs_info, | 177 | btrfs_debug(fs_info, |
192 | "generation mismatch for (%llu,%d,%llu) %llu != %llu", | 178 | "generation mismatch for (%llu,%d,%llu) %llu != %llu", |
193 | key.objectid, key.type, key.offset, | 179 | key.objectid, key.type, key.offset, |
194 | rec->generation, generation); | 180 | rec->generation, generation); |
195 | } | 181 | } |
196 | #endif | 182 | #endif |
197 | if (rec->generation == generation && | 183 | if (rec->generation == generation && |
198 | btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && | 184 | btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && |
199 | btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) | 185 | btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) |
200 | reada_add_block(rc, bytenr, &next_key, | 186 | reada_add_block(rc, bytenr, &next_key, n_gen); |
201 | level - 1, n_gen); | ||
202 | } | 187 | } |
203 | } | 188 | } |
189 | |||
190 | cleanup: | ||
204 | /* | 191 | /* |
205 | * free extctl records | 192 | * free extctl records |
206 | */ | 193 | */ |
@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | |||
222 | 209 | ||
223 | reada_extent_put(fs_info, re); /* one ref for each entry */ | 210 | reada_extent_put(fs_info, re); /* one ref for each entry */ |
224 | } | 211 | } |
225 | reada_extent_put(fs_info, re); /* our ref */ | ||
226 | if (for_dev) | ||
227 | atomic_dec(&for_dev->reada_in_flight); | ||
228 | 212 | ||
229 | return 0; | 213 | return; |
230 | } | 214 | } |
231 | 215 | ||
232 | /* | 216 | /* |
233 | * start is passed separately in case eb in NULL, which may be the case with | 217 | * start is passed separately in case eb in NULL, which may be the case with |
234 | * failed I/O | 218 | * failed I/O |
235 | */ | 219 | */ |
236 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 220 | int btree_readahead_hook(struct btrfs_fs_info *fs_info, |
237 | u64 start, int err) | 221 | struct extent_buffer *eb, u64 start, int err) |
238 | { | 222 | { |
239 | int ret; | 223 | int ret = 0; |
224 | struct reada_extent *re; | ||
240 | 225 | ||
241 | ret = __readahead_hook(root, eb, start, err); | 226 | /* find extent */ |
227 | spin_lock(&fs_info->reada_lock); | ||
228 | re = radix_tree_lookup(&fs_info->reada_tree, | ||
229 | start >> PAGE_CACHE_SHIFT); | ||
230 | if (re) | ||
231 | re->refcnt++; | ||
232 | spin_unlock(&fs_info->reada_lock); | ||
233 | if (!re) { | ||
234 | ret = -1; | ||
235 | goto start_machine; | ||
236 | } | ||
242 | 237 | ||
243 | reada_start_machine(root->fs_info); | 238 | __readahead_hook(fs_info, re, eb, start, err); |
239 | reada_extent_put(fs_info, re); /* our ref */ | ||
244 | 240 | ||
241 | start_machine: | ||
242 | reada_start_machine(fs_info); | ||
245 | return ret; | 243 | return ret; |
246 | } | 244 | } |
247 | 245 | ||
@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
260 | spin_lock(&fs_info->reada_lock); | 258 | spin_lock(&fs_info->reada_lock); |
261 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, | 259 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, |
262 | logical >> PAGE_CACHE_SHIFT, 1); | 260 | logical >> PAGE_CACHE_SHIFT, 1); |
263 | if (ret == 1) | 261 | if (ret == 1 && logical >= zone->start && logical <= zone->end) { |
264 | kref_get(&zone->refcnt); | 262 | kref_get(&zone->refcnt); |
265 | spin_unlock(&fs_info->reada_lock); | ||
266 | |||
267 | if (ret == 1) { | ||
268 | if (logical >= zone->start && logical < zone->end) | ||
269 | return zone; | ||
270 | spin_lock(&fs_info->reada_lock); | ||
271 | kref_put(&zone->refcnt, reada_zone_release); | ||
272 | spin_unlock(&fs_info->reada_lock); | 263 | spin_unlock(&fs_info->reada_lock); |
264 | return zone; | ||
273 | } | 265 | } |
274 | 266 | ||
267 | spin_unlock(&fs_info->reada_lock); | ||
268 | |||
275 | cache = btrfs_lookup_block_group(fs_info, logical); | 269 | cache = btrfs_lookup_block_group(fs_info, logical); |
276 | if (!cache) | 270 | if (!cache) |
277 | return NULL; | 271 | return NULL; |
@@ -280,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
280 | end = start + cache->key.offset - 1; | 274 | end = start + cache->key.offset - 1; |
281 | btrfs_put_block_group(cache); | 275 | btrfs_put_block_group(cache); |
282 | 276 | ||
283 | zone = kzalloc(sizeof(*zone), GFP_NOFS); | 277 | zone = kzalloc(sizeof(*zone), GFP_KERNEL); |
284 | if (!zone) | 278 | if (!zone) |
285 | return NULL; | 279 | return NULL; |
286 | 280 | ||
@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
307 | kfree(zone); | 301 | kfree(zone); |
308 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, | 302 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, |
309 | logical >> PAGE_CACHE_SHIFT, 1); | 303 | logical >> PAGE_CACHE_SHIFT, 1); |
310 | if (ret == 1) | 304 | if (ret == 1 && logical >= zone->start && logical <= zone->end) |
311 | kref_get(&zone->refcnt); | 305 | kref_get(&zone->refcnt); |
306 | else | ||
307 | zone = NULL; | ||
312 | } | 308 | } |
313 | spin_unlock(&fs_info->reada_lock); | 309 | spin_unlock(&fs_info->reada_lock); |
314 | 310 | ||
@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
317 | 313 | ||
318 | static struct reada_extent *reada_find_extent(struct btrfs_root *root, | 314 | static struct reada_extent *reada_find_extent(struct btrfs_root *root, |
319 | u64 logical, | 315 | u64 logical, |
320 | struct btrfs_key *top, int level) | 316 | struct btrfs_key *top) |
321 | { | 317 | { |
322 | int ret; | 318 | int ret; |
323 | struct reada_extent *re = NULL; | 319 | struct reada_extent *re = NULL; |
@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
330 | u64 length; | 326 | u64 length; |
331 | int real_stripes; | 327 | int real_stripes; |
332 | int nzones = 0; | 328 | int nzones = 0; |
333 | int i; | ||
334 | unsigned long index = logical >> PAGE_CACHE_SHIFT; | 329 | unsigned long index = logical >> PAGE_CACHE_SHIFT; |
335 | int dev_replace_is_ongoing; | 330 | int dev_replace_is_ongoing; |
331 | int have_zone = 0; | ||
336 | 332 | ||
337 | spin_lock(&fs_info->reada_lock); | 333 | spin_lock(&fs_info->reada_lock); |
338 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 334 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
@@ -343,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
343 | if (re) | 339 | if (re) |
344 | return re; | 340 | return re; |
345 | 341 | ||
346 | re = kzalloc(sizeof(*re), GFP_NOFS); | 342 | re = kzalloc(sizeof(*re), GFP_KERNEL); |
347 | if (!re) | 343 | if (!re) |
348 | return NULL; | 344 | return NULL; |
349 | 345 | ||
@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
375 | struct reada_zone *zone; | 371 | struct reada_zone *zone; |
376 | 372 | ||
377 | dev = bbio->stripes[nzones].dev; | 373 | dev = bbio->stripes[nzones].dev; |
374 | |||
375 | /* cannot read ahead on missing device. */ | ||
376 | if (!dev->bdev) | ||
377 | continue; | ||
378 | |||
378 | zone = reada_find_zone(fs_info, dev, logical, bbio); | 379 | zone = reada_find_zone(fs_info, dev, logical, bbio); |
379 | if (!zone) | 380 | if (!zone) |
380 | break; | 381 | continue; |
381 | 382 | ||
382 | re->zones[nzones] = zone; | 383 | re->zones[re->nzones++] = zone; |
383 | spin_lock(&zone->lock); | 384 | spin_lock(&zone->lock); |
384 | if (!zone->elems) | 385 | if (!zone->elems) |
385 | kref_get(&zone->refcnt); | 386 | kref_get(&zone->refcnt); |
@@ -389,14 +390,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
389 | kref_put(&zone->refcnt, reada_zone_release); | 390 | kref_put(&zone->refcnt, reada_zone_release); |
390 | spin_unlock(&fs_info->reada_lock); | 391 | spin_unlock(&fs_info->reada_lock); |
391 | } | 392 | } |
392 | re->nzones = nzones; | 393 | if (re->nzones == 0) { |
393 | if (nzones == 0) { | ||
394 | /* not a single zone found, error and out */ | 394 | /* not a single zone found, error and out */ |
395 | goto error; | 395 | goto error; |
396 | } | 396 | } |
397 | 397 | ||
398 | /* insert extent in reada_tree + all per-device trees, all or nothing */ | 398 | /* insert extent in reada_tree + all per-device trees, all or nothing */ |
399 | btrfs_dev_replace_lock(&fs_info->dev_replace); | 399 | btrfs_dev_replace_lock(&fs_info->dev_replace, 0); |
400 | spin_lock(&fs_info->reada_lock); | 400 | spin_lock(&fs_info->reada_lock); |
401 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); | 401 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); |
402 | if (ret == -EEXIST) { | 402 | if (ret == -EEXIST) { |
@@ -404,19 +404,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
404 | BUG_ON(!re_exist); | 404 | BUG_ON(!re_exist); |
405 | re_exist->refcnt++; | 405 | re_exist->refcnt++; |
406 | spin_unlock(&fs_info->reada_lock); | 406 | spin_unlock(&fs_info->reada_lock); |
407 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 407 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
408 | goto error; | 408 | goto error; |
409 | } | 409 | } |
410 | if (ret) { | 410 | if (ret) { |
411 | spin_unlock(&fs_info->reada_lock); | 411 | spin_unlock(&fs_info->reada_lock); |
412 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 412 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
413 | goto error; | 413 | goto error; |
414 | } | 414 | } |
415 | prev_dev = NULL; | 415 | prev_dev = NULL; |
416 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( | 416 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( |
417 | &fs_info->dev_replace); | 417 | &fs_info->dev_replace); |
418 | for (i = 0; i < nzones; ++i) { | 418 | for (nzones = 0; nzones < re->nzones; ++nzones) { |
419 | dev = bbio->stripes[i].dev; | 419 | dev = re->zones[nzones]->device; |
420 | |||
420 | if (dev == prev_dev) { | 421 | if (dev == prev_dev) { |
421 | /* | 422 | /* |
422 | * in case of DUP, just add the first zone. As both | 423 | * in case of DUP, just add the first zone. As both |
@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
427 | */ | 428 | */ |
428 | continue; | 429 | continue; |
429 | } | 430 | } |
430 | if (!dev->bdev) { | 431 | if (!dev->bdev) |
431 | /* | 432 | continue; |
432 | * cannot read ahead on missing device, but for RAID5/6, | 433 | |
433 | * REQ_GET_READ_MIRRORS return 1. So don't skip missing | ||
434 | * device for such case. | ||
435 | */ | ||
436 | if (nzones > 1) | ||
437 | continue; | ||
438 | } | ||
439 | if (dev_replace_is_ongoing && | 434 | if (dev_replace_is_ongoing && |
440 | dev == fs_info->dev_replace.tgtdev) { | 435 | dev == fs_info->dev_replace.tgtdev) { |
441 | /* | 436 | /* |
@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
447 | prev_dev = dev; | 442 | prev_dev = dev; |
448 | ret = radix_tree_insert(&dev->reada_extents, index, re); | 443 | ret = radix_tree_insert(&dev->reada_extents, index, re); |
449 | if (ret) { | 444 | if (ret) { |
450 | while (--i >= 0) { | 445 | while (--nzones >= 0) { |
451 | dev = bbio->stripes[i].dev; | 446 | dev = re->zones[nzones]->device; |
452 | BUG_ON(dev == NULL); | 447 | BUG_ON(dev == NULL); |
453 | /* ignore whether the entry was inserted */ | 448 | /* ignore whether the entry was inserted */ |
454 | radix_tree_delete(&dev->reada_extents, index); | 449 | radix_tree_delete(&dev->reada_extents, index); |
@@ -456,21 +451,24 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
456 | BUG_ON(fs_info == NULL); | 451 | BUG_ON(fs_info == NULL); |
457 | radix_tree_delete(&fs_info->reada_tree, index); | 452 | radix_tree_delete(&fs_info->reada_tree, index); |
458 | spin_unlock(&fs_info->reada_lock); | 453 | spin_unlock(&fs_info->reada_lock); |
459 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 454 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
460 | goto error; | 455 | goto error; |
461 | } | 456 | } |
457 | have_zone = 1; | ||
462 | } | 458 | } |
463 | spin_unlock(&fs_info->reada_lock); | 459 | spin_unlock(&fs_info->reada_lock); |
464 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | 460 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
461 | |||
462 | if (!have_zone) | ||
463 | goto error; | ||
465 | 464 | ||
466 | btrfs_put_bbio(bbio); | 465 | btrfs_put_bbio(bbio); |
467 | return re; | 466 | return re; |
468 | 467 | ||
469 | error: | 468 | error: |
470 | while (nzones) { | 469 | for (nzones = 0; nzones < re->nzones; ++nzones) { |
471 | struct reada_zone *zone; | 470 | struct reada_zone *zone; |
472 | 471 | ||
473 | --nzones; | ||
474 | zone = re->zones[nzones]; | 472 | zone = re->zones[nzones]; |
475 | kref_get(&zone->refcnt); | 473 | kref_get(&zone->refcnt); |
476 | spin_lock(&zone->lock); | 474 | spin_lock(&zone->lock); |
@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, | |||
531 | kref_put(&zone->refcnt, reada_zone_release); | 529 | kref_put(&zone->refcnt, reada_zone_release); |
532 | spin_unlock(&fs_info->reada_lock); | 530 | spin_unlock(&fs_info->reada_lock); |
533 | } | 531 | } |
534 | if (re->scheduled_for) | ||
535 | atomic_dec(&re->scheduled_for->reada_in_flight); | ||
536 | 532 | ||
537 | kfree(re); | 533 | kfree(re); |
538 | } | 534 | } |
@@ -556,17 +552,17 @@ static void reada_control_release(struct kref *kref) | |||
556 | } | 552 | } |
557 | 553 | ||
558 | static int reada_add_block(struct reada_control *rc, u64 logical, | 554 | static int reada_add_block(struct reada_control *rc, u64 logical, |
559 | struct btrfs_key *top, int level, u64 generation) | 555 | struct btrfs_key *top, u64 generation) |
560 | { | 556 | { |
561 | struct btrfs_root *root = rc->root; | 557 | struct btrfs_root *root = rc->root; |
562 | struct reada_extent *re; | 558 | struct reada_extent *re; |
563 | struct reada_extctl *rec; | 559 | struct reada_extctl *rec; |
564 | 560 | ||
565 | re = reada_find_extent(root, logical, top, level); /* takes one ref */ | 561 | re = reada_find_extent(root, logical, top); /* takes one ref */ |
566 | if (!re) | 562 | if (!re) |
567 | return -1; | 563 | return -1; |
568 | 564 | ||
569 | rec = kzalloc(sizeof(*rec), GFP_NOFS); | 565 | rec = kzalloc(sizeof(*rec), GFP_KERNEL); |
570 | if (!rec) { | 566 | if (!rec) { |
571 | reada_extent_put(root->fs_info, re); | 567 | reada_extent_put(root->fs_info, re); |
572 | return -ENOMEM; | 568 | return -ENOMEM; |
@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
662 | u64 logical; | 658 | u64 logical; |
663 | int ret; | 659 | int ret; |
664 | int i; | 660 | int i; |
665 | int need_kick = 0; | ||
666 | 661 | ||
667 | spin_lock(&fs_info->reada_lock); | 662 | spin_lock(&fs_info->reada_lock); |
668 | if (dev->reada_curr_zone == NULL) { | 663 | if (dev->reada_curr_zone == NULL) { |
@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
679 | */ | 674 | */ |
680 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, | 675 | ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, |
681 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); | 676 | dev->reada_next >> PAGE_CACHE_SHIFT, 1); |
682 | if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { | 677 | if (ret == 0 || re->logical > dev->reada_curr_zone->end) { |
683 | ret = reada_pick_zone(dev); | 678 | ret = reada_pick_zone(dev); |
684 | if (!ret) { | 679 | if (!ret) { |
685 | spin_unlock(&fs_info->reada_lock); | 680 | spin_unlock(&fs_info->reada_lock); |
@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
698 | 693 | ||
699 | spin_unlock(&fs_info->reada_lock); | 694 | spin_unlock(&fs_info->reada_lock); |
700 | 695 | ||
696 | spin_lock(&re->lock); | ||
697 | if (re->scheduled || list_empty(&re->extctl)) { | ||
698 | spin_unlock(&re->lock); | ||
699 | reada_extent_put(fs_info, re); | ||
700 | return 0; | ||
701 | } | ||
702 | re->scheduled = 1; | ||
703 | spin_unlock(&re->lock); | ||
704 | |||
701 | /* | 705 | /* |
702 | * find mirror num | 706 | * find mirror num |
703 | */ | 707 | */ |
@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, | |||
709 | } | 713 | } |
710 | logical = re->logical; | 714 | logical = re->logical; |
711 | 715 | ||
712 | spin_lock(&re->lock); | ||
713 | if (re->scheduled_for == NULL) { | ||
714 | re->scheduled_for = dev; | ||
715 | need_kick = 1; | ||
716 | } | ||
717 | spin_unlock(&re->lock); | ||
718 | |||
719 | reada_extent_put(fs_info, re); | ||
720 | |||
721 | if (!need_kick) | ||
722 | return 0; | ||
723 | |||
724 | atomic_inc(&dev->reada_in_flight); | 716 | atomic_inc(&dev->reada_in_flight); |
725 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, | 717 | ret = reada_tree_block_flagged(fs_info->extent_root, logical, |
726 | mirror_num, &eb); | 718 | mirror_num, &eb); |
727 | if (ret) | 719 | if (ret) |
728 | __readahead_hook(fs_info->extent_root, NULL, logical, ret); | 720 | __readahead_hook(fs_info, re, NULL, logical, ret); |
729 | else if (eb) | 721 | else if (eb) |
730 | __readahead_hook(fs_info->extent_root, eb, eb->start, ret); | 722 | __readahead_hook(fs_info, re, eb, eb->start, ret); |
731 | 723 | ||
732 | if (eb) | 724 | if (eb) |
733 | free_extent_buffer(eb); | 725 | free_extent_buffer(eb); |
734 | 726 | ||
727 | atomic_dec(&dev->reada_in_flight); | ||
728 | reada_extent_put(fs_info, re); | ||
729 | |||
735 | return 1; | 730 | return 1; |
736 | 731 | ||
737 | } | 732 | } |
@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work) | |||
752 | set_task_ioprio(current, BTRFS_IOPRIO_READA); | 747 | set_task_ioprio(current, BTRFS_IOPRIO_READA); |
753 | __reada_start_machine(fs_info); | 748 | __reada_start_machine(fs_info); |
754 | set_task_ioprio(current, old_ioprio); | 749 | set_task_ioprio(current, old_ioprio); |
750 | |||
751 | atomic_dec(&fs_info->reada_works_cnt); | ||
755 | } | 752 | } |
756 | 753 | ||
757 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) | 754 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) |
@@ -783,15 +780,19 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) | |||
783 | * enqueue to workers to finish it. This will distribute the load to | 780 | * enqueue to workers to finish it. This will distribute the load to |
784 | * the cores. | 781 | * the cores. |
785 | */ | 782 | */ |
786 | for (i = 0; i < 2; ++i) | 783 | for (i = 0; i < 2; ++i) { |
787 | reada_start_machine(fs_info); | 784 | reada_start_machine(fs_info); |
785 | if (atomic_read(&fs_info->reada_works_cnt) > | ||
786 | BTRFS_MAX_MIRRORS * 2) | ||
787 | break; | ||
788 | } | ||
788 | } | 789 | } |
789 | 790 | ||
790 | static void reada_start_machine(struct btrfs_fs_info *fs_info) | 791 | static void reada_start_machine(struct btrfs_fs_info *fs_info) |
791 | { | 792 | { |
792 | struct reada_machine_work *rmw; | 793 | struct reada_machine_work *rmw; |
793 | 794 | ||
794 | rmw = kzalloc(sizeof(*rmw), GFP_NOFS); | 795 | rmw = kzalloc(sizeof(*rmw), GFP_KERNEL); |
795 | if (!rmw) { | 796 | if (!rmw) { |
796 | /* FIXME we cannot handle this properly right now */ | 797 | /* FIXME we cannot handle this properly right now */ |
797 | BUG(); | 798 | BUG(); |
@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
801 | rmw->fs_info = fs_info; | 802 | rmw->fs_info = fs_info; |
802 | 803 | ||
803 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); | 804 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
805 | atomic_inc(&fs_info->reada_works_cnt); | ||
804 | } | 806 | } |
805 | 807 | ||
806 | #ifdef DEBUG | 808 | #ifdef DEBUG |
@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) | |||
848 | if (ret == 0) | 850 | if (ret == 0) |
849 | break; | 851 | break; |
850 | printk(KERN_DEBUG | 852 | printk(KERN_DEBUG |
851 | " re: logical %llu size %u empty %d for %lld", | 853 | " re: logical %llu size %u empty %d scheduled %d", |
852 | re->logical, fs_info->tree_root->nodesize, | 854 | re->logical, fs_info->tree_root->nodesize, |
853 | list_empty(&re->extctl), re->scheduled_for ? | 855 | list_empty(&re->extctl), re->scheduled); |
854 | re->scheduled_for->devid : -1); | ||
855 | 856 | ||
856 | for (i = 0; i < re->nzones; ++i) { | 857 | for (i = 0; i < re->nzones; ++i) { |
857 | printk(KERN_CONT " zone %llu-%llu devs", | 858 | printk(KERN_CONT " zone %llu-%llu devs", |
@@ -878,27 +879,21 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) | |||
878 | index, 1); | 879 | index, 1); |
879 | if (ret == 0) | 880 | if (ret == 0) |
880 | break; | 881 | break; |
881 | if (!re->scheduled_for) { | 882 | if (!re->scheduled) { |
882 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; | 883 | index = (re->logical >> PAGE_CACHE_SHIFT) + 1; |
883 | continue; | 884 | continue; |
884 | } | 885 | } |
885 | printk(KERN_DEBUG | 886 | printk(KERN_DEBUG |
886 | "re: logical %llu size %u list empty %d for %lld", | 887 | "re: logical %llu size %u list empty %d scheduled %d", |
887 | re->logical, fs_info->tree_root->nodesize, | 888 | re->logical, fs_info->tree_root->nodesize, |
888 | list_empty(&re->extctl), | 889 | list_empty(&re->extctl), re->scheduled); |
889 | re->scheduled_for ? re->scheduled_for->devid : -1); | ||
890 | for (i = 0; i < re->nzones; ++i) { | 890 | for (i = 0; i < re->nzones; ++i) { |
891 | printk(KERN_CONT " zone %llu-%llu devs", | 891 | printk(KERN_CONT " zone %llu-%llu devs", |
892 | re->zones[i]->start, | 892 | re->zones[i]->start, |
893 | re->zones[i]->end); | 893 | re->zones[i]->end); |
894 | for (i = 0; i < re->nzones; ++i) { | 894 | for (j = 0; j < re->zones[i]->ndevs; ++j) { |
895 | printk(KERN_CONT " zone %llu-%llu devs", | 895 | printk(KERN_CONT " %lld", |
896 | re->zones[i]->start, | 896 | re->zones[i]->devs[j]->devid); |
897 | re->zones[i]->end); | ||
898 | for (j = 0; j < re->zones[i]->ndevs; ++j) { | ||
899 | printk(KERN_CONT " %lld", | ||
900 | re->zones[i]->devs[j]->devid); | ||
901 | } | ||
902 | } | 897 | } |
903 | } | 898 | } |
904 | printk(KERN_CONT "\n"); | 899 | printk(KERN_CONT "\n"); |
@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
917 | struct reada_control *rc; | 912 | struct reada_control *rc; |
918 | u64 start; | 913 | u64 start; |
919 | u64 generation; | 914 | u64 generation; |
920 | int level; | ||
921 | int ret; | 915 | int ret; |
922 | struct extent_buffer *node; | 916 | struct extent_buffer *node; |
923 | static struct btrfs_key max_key = { | 917 | static struct btrfs_key max_key = { |
@@ -926,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
926 | .offset = (u64)-1 | 920 | .offset = (u64)-1 |
927 | }; | 921 | }; |
928 | 922 | ||
929 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 923 | rc = kzalloc(sizeof(*rc), GFP_KERNEL); |
930 | if (!rc) | 924 | if (!rc) |
931 | return ERR_PTR(-ENOMEM); | 925 | return ERR_PTR(-ENOMEM); |
932 | 926 | ||
@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
940 | 934 | ||
941 | node = btrfs_root_node(root); | 935 | node = btrfs_root_node(root); |
942 | start = node->start; | 936 | start = node->start; |
943 | level = btrfs_header_level(node); | ||
944 | generation = btrfs_header_generation(node); | 937 | generation = btrfs_header_generation(node); |
945 | free_extent_buffer(node); | 938 | free_extent_buffer(node); |
946 | 939 | ||
947 | ret = reada_add_block(rc, start, &max_key, level, generation); | 940 | ret = reada_add_block(rc, start, &max_key, generation); |
948 | if (ret) { | 941 | if (ret) { |
949 | kfree(rc); | 942 | kfree(rc); |
950 | return ERR_PTR(ret); | 943 | return ERR_PTR(ret); |
@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
959 | int btrfs_reada_wait(void *handle) | 952 | int btrfs_reada_wait(void *handle) |
960 | { | 953 | { |
961 | struct reada_control *rc = handle; | 954 | struct reada_control *rc = handle; |
955 | struct btrfs_fs_info *fs_info = rc->root->fs_info; | ||
962 | 956 | ||
963 | while (atomic_read(&rc->elems)) { | 957 | while (atomic_read(&rc->elems)) { |
958 | if (!atomic_read(&fs_info->reada_works_cnt)) | ||
959 | reada_start_machine(fs_info); | ||
964 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, | 960 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, |
965 | 5 * HZ); | 961 | 5 * HZ); |
966 | dump_devs(rc->root->fs_info, | 962 | dump_devs(rc->root->fs_info, |
@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle) | |||
977 | int btrfs_reada_wait(void *handle) | 973 | int btrfs_reada_wait(void *handle) |
978 | { | 974 | { |
979 | struct reada_control *rc = handle; | 975 | struct reada_control *rc = handle; |
976 | struct btrfs_fs_info *fs_info = rc->root->fs_info; | ||
980 | 977 | ||
981 | while (atomic_read(&rc->elems)) { | 978 | while (atomic_read(&rc->elems)) { |
982 | wait_event(rc->wait, atomic_read(&rc->elems) == 0); | 979 | if (!atomic_read(&fs_info->reada_works_cnt)) |
980 | reada_start_machine(fs_info); | ||
981 | wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, | ||
982 | (HZ + 9) / 10); | ||
983 | } | 983 | } |
984 | 984 | ||
985 | kref_put(&rc->refcnt, reada_control_release); | 985 | kref_put(&rc->refcnt, reada_control_release); |