diff options
author | Alexandre Oliva <oliva@gnu.org> | 2013-05-15 11:38:55 -0400 |
---|---|---|
committer | Josef Bacik <jbacik@fusionio.com> | 2013-05-17 21:40:35 -0400 |
commit | 17a5adccf3fd01added91f3bf9aa7ee9aa28843b (patch) | |
tree | 88db46d064c88cfee010cd870ca3a8ff262a4505 /fs/btrfs | |
parent | b216cbfb52c08300c203abf06ea9519d15d10045 (diff) |
btrfs: do away with non-whole_page extent I/O
end_bio_extent_readpage computes whole_page based on bv_offset and
bv_len, without taking into account that blk_update_request may modify
them when some of the blocks to be read into a page produce a read
error. This would cause the read to unlock only part of the file
range associated with the page, which would in turn leave the entire
page locked, which would not only keep the process blocked instead of
returning -EIO to it, but also prevent any further access to the file.
It turns out that btrfs always issues whole-page reads and writes.
The special handling of non-whole_page appears to be a mistake or a
left-over from a time when this wasn't the case. Indeed,
end_bio_extent_writepage distinguished between whole_page and
non-whole_page writes but behaved identically in both cases!
I've replaced the whole_page computations with warnings, just to be
sure that we're not issuing partial page reads or writes. The
warnings should probably just go away some time.
Signed-off-by: Alexandre Oliva <oliva@gnu.org>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/extent_io.c | 85 |
1 files changed, 30 insertions, 55 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e6e410002e5..ca4355ddea06 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1948,28 +1948,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | |||
1948 | } | 1948 | } |
1949 | 1949 | ||
1950 | /* | 1950 | /* |
1951 | * helper function to unlock a page if all the extents in the tree | ||
1952 | * for that page are unlocked | ||
1953 | */ | ||
1954 | static void check_page_locked(struct extent_io_tree *tree, struct page *page) | ||
1955 | { | ||
1956 | u64 start = page_offset(page); | ||
1957 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
1958 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) | ||
1959 | unlock_page(page); | ||
1960 | } | ||
1961 | |||
1962 | /* | ||
1963 | * helper function to end page writeback if all the extents | ||
1964 | * in the tree for that page are done with writeback | ||
1965 | */ | ||
1966 | static void check_page_writeback(struct extent_io_tree *tree, | ||
1967 | struct page *page) | ||
1968 | { | ||
1969 | end_page_writeback(page); | ||
1970 | } | ||
1971 | |||
1972 | /* | ||
1973 | * When IO fails, either with EIO or csum verification fails, we | 1951 | * When IO fails, either with EIO or csum verification fails, we |
1974 | * try other mirrors that might have a good copy of the data. This | 1952 | * try other mirrors that might have a good copy of the data. This |
1975 | * io_failure_record is used to record state as we go through all the | 1953 | * io_failure_record is used to record state as we go through all the |
@@ -2398,19 +2376,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
2398 | struct extent_io_tree *tree; | 2376 | struct extent_io_tree *tree; |
2399 | u64 start; | 2377 | u64 start; |
2400 | u64 end; | 2378 | u64 end; |
2401 | int whole_page; | ||
2402 | 2379 | ||
2403 | do { | 2380 | do { |
2404 | struct page *page = bvec->bv_page; | 2381 | struct page *page = bvec->bv_page; |
2405 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2382 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2406 | 2383 | ||
2407 | start = page_offset(page) + bvec->bv_offset; | 2384 | /* We always issue full-page reads, but if some block |
2408 | end = start + bvec->bv_len - 1; | 2385 | * in a page fails to read, blk_update_request() will |
2386 | * advance bv_offset and adjust bv_len to compensate. | ||
2387 | * Print a warning for nonzero offsets, and an error | ||
2388 | * if they don't add up to a full page. */ | ||
2389 | if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) | ||
2390 | printk("%s page write in btrfs with offset %u and length %u\n", | ||
2391 | bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE | ||
2392 | ? KERN_ERR "partial" : KERN_INFO "incomplete", | ||
2393 | bvec->bv_offset, bvec->bv_len); | ||
2409 | 2394 | ||
2410 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | 2395 | start = page_offset(page); |
2411 | whole_page = 1; | 2396 | end = start + bvec->bv_offset + bvec->bv_len - 1; |
2412 | else | ||
2413 | whole_page = 0; | ||
2414 | 2397 | ||
2415 | if (--bvec >= bio->bi_io_vec) | 2398 | if (--bvec >= bio->bi_io_vec) |
2416 | prefetchw(&bvec->bv_page->flags); | 2399 | prefetchw(&bvec->bv_page->flags); |
@@ -2418,10 +2401,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
2418 | if (end_extent_writepage(page, err, start, end)) | 2401 | if (end_extent_writepage(page, err, start, end)) |
2419 | continue; | 2402 | continue; |
2420 | 2403 | ||
2421 | if (whole_page) | 2404 | end_page_writeback(page); |
2422 | end_page_writeback(page); | ||
2423 | else | ||
2424 | check_page_writeback(tree, page); | ||
2425 | } while (bvec >= bio->bi_io_vec); | 2405 | } while (bvec >= bio->bi_io_vec); |
2426 | 2406 | ||
2427 | bio_put(bio); | 2407 | bio_put(bio); |
@@ -2446,7 +2426,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2446 | struct extent_io_tree *tree; | 2426 | struct extent_io_tree *tree; |
2447 | u64 start; | 2427 | u64 start; |
2448 | u64 end; | 2428 | u64 end; |
2449 | int whole_page; | ||
2450 | int mirror; | 2429 | int mirror; |
2451 | int ret; | 2430 | int ret; |
2452 | 2431 | ||
@@ -2463,13 +2442,19 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2463 | (long int)bio->bi_bdev); | 2442 | (long int)bio->bi_bdev); |
2464 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2443 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2465 | 2444 | ||
2466 | start = page_offset(page) + bvec->bv_offset; | 2445 | /* We always issue full-page reads, but if some block |
2467 | end = start + bvec->bv_len - 1; | 2446 | * in a page fails to read, blk_update_request() will |
2447 | * advance bv_offset and adjust bv_len to compensate. | ||
2448 | * Print a warning for nonzero offsets, and an error | ||
2449 | * if they don't add up to a full page. */ | ||
2450 | if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) | ||
2451 | printk("%s page read in btrfs with offset %u and length %u\n", | ||
2452 | bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE | ||
2453 | ? KERN_ERR "partial" : KERN_INFO "incomplete", | ||
2454 | bvec->bv_offset, bvec->bv_len); | ||
2468 | 2455 | ||
2469 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | 2456 | start = page_offset(page); |
2470 | whole_page = 1; | 2457 | end = start + bvec->bv_offset + bvec->bv_len - 1; |
2471 | else | ||
2472 | whole_page = 0; | ||
2473 | 2458 | ||
2474 | if (++bvec <= bvec_end) | 2459 | if (++bvec <= bvec_end) |
2475 | prefetchw(&bvec->bv_page->flags); | 2460 | prefetchw(&bvec->bv_page->flags); |
@@ -2528,23 +2513,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2528 | } | 2513 | } |
2529 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); | 2514 | unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); |
2530 | 2515 | ||
2531 | if (whole_page) { | 2516 | if (uptodate) { |
2532 | if (uptodate) { | 2517 | SetPageUptodate(page); |
2533 | SetPageUptodate(page); | ||
2534 | } else { | ||
2535 | ClearPageUptodate(page); | ||
2536 | SetPageError(page); | ||
2537 | } | ||
2538 | unlock_page(page); | ||
2539 | } else { | 2518 | } else { |
2540 | if (uptodate) { | 2519 | ClearPageUptodate(page); |
2541 | check_page_uptodate(tree, page); | 2520 | SetPageError(page); |
2542 | } else { | ||
2543 | ClearPageUptodate(page); | ||
2544 | SetPageError(page); | ||
2545 | } | ||
2546 | check_page_locked(tree, page); | ||
2547 | } | 2521 | } |
2522 | unlock_page(page); | ||
2548 | } while (bvec <= bvec_end); | 2523 | } while (bvec <= bvec_end); |
2549 | 2524 | ||
2550 | bio_put(bio); | 2525 | bio_put(bio); |