aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorAndreas Gruenbacher <agruenba@redhat.com>2017-06-29 14:43:20 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2017-07-03 01:46:13 -0400
commit334fd34d76f237c0ee58dfc400d2c4e34d660544 (patch)
tree66f3e447de5bc9efc02a7366145ba7730ed49c62 /fs/buffer.c
parent7175a11214f02e6184690c17cf5366012b667531 (diff)
vfs: Add page_cache_seek_hole_data helper
Both ext4 and xfs implement seeking for the next hole or piece of data in unwritten extents by scanning the page cache, and both versions share the same bug when iterating the buffers of a page: the start offset into the page isn't taken into account, so when a page fits more than two filesystem blocks, things will go wrong. For example, on a filesystem with a block size of 1k, the following command will fail: xfs_io -f -c "falloc 0 4k" \ -c "pwrite 1k 1k" \ -c "pwrite 3k 1k" \ -c "seek -a -r 0" foo In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048, SEEK_DATA) will return the correct result. Introduce a generic vfs helper for seeking in the page cache that gets this right. The next commits will replace the filesystem specific implementations. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> [hch: dropped the export] Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c124
1 files changed, 124 insertions, 0 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..b3674eb7c9c0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3492,6 +3492,130 @@ int bh_submit_read(struct buffer_head *bh)
3492} 3492}
3493EXPORT_SYMBOL(bh_submit_read); 3493EXPORT_SYMBOL(bh_submit_read);
3494 3494
3495/*
3496 * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
3497 *
3498 * Returns the offset within the file on success, and -ENOENT otherwise.
3499 */
3500static loff_t
3501page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
3502{
3503 loff_t offset = page_offset(page);
3504 struct buffer_head *bh, *head;
3505 bool seek_data = whence == SEEK_DATA;
3506
3507 if (lastoff < offset)
3508 lastoff = offset;
3509
3510 bh = head = page_buffers(page);
3511 do {
3512 offset += bh->b_size;
3513 if (lastoff >= offset)
3514 continue;
3515
3516 /*
3517 * Unwritten extents that have data in the page cache covering
3518 * them can be identified by the BH_Unwritten state flag.
3519 * Pages with multiple buffers might have a mix of holes, data
3520 * and unwritten extents - any buffer with valid data in it
3521 * should have BH_Uptodate flag set on it.
3522 */
3523
3524 if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
3525 return lastoff;
3526
3527 lastoff = offset;
3528 } while ((bh = bh->b_this_page) != head);
3529 return -ENOENT;
3530}
3531
3532/*
3533 * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
3534 *
3535 * Within unwritten extents, the page cache determines which parts are holes
3536 * and which are data: unwritten and uptodate buffer heads count as data;
3537 * everything else counts as a hole.
3538 *
3539 * Returns the resulting offset on successs, and -ENOENT otherwise.
3540 */
3541loff_t
3542page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
3543 int whence)
3544{
3545 pgoff_t index = offset >> PAGE_SHIFT;
3546 pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
3547 loff_t lastoff = offset;
3548 struct pagevec pvec;
3549
3550 if (length <= 0)
3551 return -ENOENT;
3552
3553 pagevec_init(&pvec, 0);
3554
3555 do {
3556 unsigned want, nr_pages, i;
3557
3558 want = min_t(unsigned, end - index, PAGEVEC_SIZE);
3559 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
3560 if (nr_pages == 0)
3561 break;
3562
3563 for (i = 0; i < nr_pages; i++) {
3564 struct page *page = pvec.pages[i];
3565
3566 /*
3567 * At this point, the page may be truncated or
3568 * invalidated (changing page->mapping to NULL), or
3569 * even swizzled back from swapper_space to tmpfs file
3570 * mapping. However, page->index will not change
3571 * because we have a reference on the page.
3572 *
3573 * If current page offset is beyond where we've ended,
3574 * we've found a hole.
3575 */
3576 if (whence == SEEK_HOLE &&
3577 lastoff < page_offset(page))
3578 goto check_range;
3579
3580 /* Searching done if the page index is out of range. */
3581 if (page->index >= end)
3582 goto not_found;
3583
3584 lock_page(page);
3585 if (likely(page->mapping == inode->i_mapping) &&
3586 page_has_buffers(page)) {
3587 lastoff = page_seek_hole_data(page, lastoff, whence);
3588 if (lastoff >= 0) {
3589 unlock_page(page);
3590 goto check_range;
3591 }
3592 }
3593 unlock_page(page);
3594 lastoff = page_offset(page) + PAGE_SIZE;
3595 }
3596
3597 /* Searching done if fewer pages returned than wanted. */
3598 if (nr_pages < want)
3599 break;
3600
3601 index = pvec.pages[i - 1]->index + 1;
3602 pagevec_release(&pvec);
3603 } while (index < end);
3604
3605 /* When no page at lastoff and we are not done, we found a hole. */
3606 if (whence != SEEK_HOLE)
3607 goto not_found;
3608
3609check_range:
3610 if (lastoff < offset + length)
3611 goto out;
3612not_found:
3613 lastoff = -ENOENT;
3614out:
3615 pagevec_release(&pvec);
3616 return lastoff;
3617}
3618
3495void __init buffer_init(void) 3619void __init buffer_init(void)
3496{ 3620{
3497 unsigned long nrpages; 3621 unsigned long nrpages;