diff options
author | Jan Kara <jack@suse.cz> | 2016-03-09 23:11:13 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2016-03-09 23:11:13 -0500 |
commit | 2d90c160e5f1d784e180f1e1458d56eee4d7f4f4 (patch) | |
tree | 883dc6e8433bbe315fa8fce00154d51ac52b3dd7 | |
parent | e3fb8eb14eafd2847c04cf48b52a705c36f4db98 (diff) |
ext4: more efficient SEEK_DATA implementation
Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as
ext4_seek_data() iterates hole block-by-block. Fix the problem by using
returned hole size from ext4_map_blocks() and thus skip the hole in one
go.
Update also SEEK_HOLE implementation to follow the same pattern as
SEEK_DATA to make future maintenance easier.
Furthermore we add cond_resched() to both ext4_seek_data() and
ext4_seek_hole() to avoid softlockups in case evil user creates huge
fragmented file and we have to go through lots of extents.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r-- | fs/ext4/ext4.h | 3 | ||||
-rw-r--r-- | fs/ext4/file.c | 97 | ||||
-rw-r--r-- | fs/ext4/inode.c | 67 |
3 files changed, 106 insertions, 61 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 70b8e0409566..5623eec7fd22 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -2546,6 +2546,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode, | |||
2546 | int used, int quota_claim); | 2546 | int used, int quota_claim); |
2547 | extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, | 2547 | extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, |
2548 | ext4_fsblk_t pblk, ext4_lblk_t len); | 2548 | ext4_fsblk_t pblk, ext4_lblk_t len); |
2549 | extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, | ||
2550 | unsigned int map_len, | ||
2551 | struct extent_status *result); | ||
2549 | 2552 | ||
2550 | /* indirect.c */ | 2553 | /* indirect.c */ |
2551 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 2554 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4a1153561580..e93a7efaf78f 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -426,7 +426,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
426 | */ | 426 | */ |
427 | static int ext4_find_unwritten_pgoff(struct inode *inode, | 427 | static int ext4_find_unwritten_pgoff(struct inode *inode, |
428 | int whence, | 428 | int whence, |
429 | struct ext4_map_blocks *map, | 429 | ext4_lblk_t end_blk, |
430 | loff_t *offset) | 430 | loff_t *offset) |
431 | { | 431 | { |
432 | struct pagevec pvec; | 432 | struct pagevec pvec; |
@@ -441,7 +441,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, | |||
441 | blkbits = inode->i_sb->s_blocksize_bits; | 441 | blkbits = inode->i_sb->s_blocksize_bits; |
442 | startoff = *offset; | 442 | startoff = *offset; |
443 | lastoff = startoff; | 443 | lastoff = startoff; |
444 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; | 444 | endoff = (loff_t)end_blk << blkbits; |
445 | 445 | ||
446 | index = startoff >> PAGE_CACHE_SHIFT; | 446 | index = startoff >> PAGE_CACHE_SHIFT; |
447 | end = endoff >> PAGE_CACHE_SHIFT; | 447 | end = endoff >> PAGE_CACHE_SHIFT; |
@@ -559,12 +559,11 @@ out: | |||
559 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | 559 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) |
560 | { | 560 | { |
561 | struct inode *inode = file->f_mapping->host; | 561 | struct inode *inode = file->f_mapping->host; |
562 | struct ext4_map_blocks map; | ||
563 | struct extent_status es; | 562 | struct extent_status es; |
564 | ext4_lblk_t start, last, end; | 563 | ext4_lblk_t start, last, end; |
565 | loff_t dataoff, isize; | 564 | loff_t dataoff, isize; |
566 | int blkbits; | 565 | int blkbits; |
567 | int ret = 0; | 566 | int ret; |
568 | 567 | ||
569 | inode_lock(inode); | 568 | inode_lock(inode); |
570 | 569 | ||
@@ -581,41 +580,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
581 | dataoff = offset; | 580 | dataoff = offset; |
582 | 581 | ||
583 | do { | 582 | do { |
584 | map.m_lblk = last; | 583 | ret = ext4_get_next_extent(inode, last, end - last + 1, &es); |
585 | map.m_len = end - last + 1; | 584 | if (ret <= 0) { |
586 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 585 | /* No extent found -> no data */ |
587 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 586 | if (ret == 0) |
588 | if (last != start) | 587 | ret = -ENXIO; |
589 | dataoff = (loff_t)last << blkbits; | 588 | inode_unlock(inode); |
590 | break; | 589 | return ret; |
591 | } | 590 | } |
592 | 591 | ||
593 | /* | 592 | last = es.es_lblk; |
594 | * If there is a delay extent at this offset, | 593 | if (last != start) |
595 | * it will be as a data. | 594 | dataoff = (loff_t)last << blkbits; |
596 | */ | 595 | if (!ext4_es_is_unwritten(&es)) |
597 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | ||
598 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
599 | if (last != start) | ||
600 | dataoff = (loff_t)last << blkbits; | ||
601 | break; | 596 | break; |
602 | } | ||
603 | 597 | ||
604 | /* | 598 | /* |
605 | * If there is a unwritten extent at this offset, | 599 | * If there is a unwritten extent at this offset, |
606 | * it will be as a data or a hole according to page | 600 | * it will be as a data or a hole according to page |
607 | * cache that has data or not. | 601 | * cache that has data or not. |
608 | */ | 602 | */ |
609 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | 603 | if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, |
610 | int unwritten; | 604 | es.es_lblk + es.es_len, &dataoff)) |
611 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, | 605 | break; |
612 | &map, &dataoff); | 606 | last += es.es_len; |
613 | if (unwritten) | ||
614 | break; | ||
615 | } | ||
616 | |||
617 | last++; | ||
618 | dataoff = (loff_t)last << blkbits; | 607 | dataoff = (loff_t)last << blkbits; |
608 | cond_resched(); | ||
619 | } while (last <= end); | 609 | } while (last <= end); |
620 | 610 | ||
621 | inode_unlock(inode); | 611 | inode_unlock(inode); |
@@ -632,12 +622,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
632 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | 622 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) |
633 | { | 623 | { |
634 | struct inode *inode = file->f_mapping->host; | 624 | struct inode *inode = file->f_mapping->host; |
635 | struct ext4_map_blocks map; | ||
636 | struct extent_status es; | 625 | struct extent_status es; |
637 | ext4_lblk_t start, last, end; | 626 | ext4_lblk_t start, last, end; |
638 | loff_t holeoff, isize; | 627 | loff_t holeoff, isize; |
639 | int blkbits; | 628 | int blkbits; |
640 | int ret = 0; | 629 | int ret; |
641 | 630 | ||
642 | inode_lock(inode); | 631 | inode_lock(inode); |
643 | 632 | ||
@@ -654,44 +643,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
654 | holeoff = offset; | 643 | holeoff = offset; |
655 | 644 | ||
656 | do { | 645 | do { |
657 | map.m_lblk = last; | 646 | ret = ext4_get_next_extent(inode, last, end - last + 1, &es); |
658 | map.m_len = end - last + 1; | 647 | if (ret < 0) { |
659 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 648 | inode_unlock(inode); |
660 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 649 | return ret; |
661 | last += ret; | ||
662 | holeoff = (loff_t)last << blkbits; | ||
663 | continue; | ||
664 | } | 650 | } |
665 | 651 | /* Found a hole? */ | |
666 | /* | 652 | if (ret == 0 || es.es_lblk > last) { |
667 | * If there is a delay extent at this offset, | 653 | if (last != start) |
668 | * we will skip this extent. | 654 | holeoff = (loff_t)last << blkbits; |
669 | */ | 655 | break; |
670 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | ||
671 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
672 | last = es.es_lblk + es.es_len; | ||
673 | holeoff = (loff_t)last << blkbits; | ||
674 | continue; | ||
675 | } | 656 | } |
676 | |||
677 | /* | 657 | /* |
678 | * If there is a unwritten extent at this offset, | 658 | * If there is a unwritten extent at this offset, |
679 | * it will be as a data or a hole according to page | 659 | * it will be as a data or a hole according to page |
680 | * cache that has data or not. | 660 | * cache that has data or not. |
681 | */ | 661 | */ |
682 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | 662 | if (ext4_es_is_unwritten(&es) && |
683 | int unwritten; | 663 | ext4_find_unwritten_pgoff(inode, SEEK_HOLE, |
684 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, | 664 | last + es.es_len, &holeoff)) |
685 | &map, &holeoff); | 665 | break; |
686 | if (!unwritten) { | ||
687 | last += ret; | ||
688 | holeoff = (loff_t)last << blkbits; | ||
689 | continue; | ||
690 | } | ||
691 | } | ||
692 | 666 | ||
693 | /* find a hole */ | 667 | last += es.es_len; |
694 | break; | 668 | holeoff = (loff_t)last << blkbits; |
669 | cond_resched(); | ||
695 | } while (last <= end); | 670 | } while (last <= end); |
696 | 671 | ||
697 | inode_unlock(inode); | 672 | inode_unlock(inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index fddc6ddc53a8..ce2c4c62386f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -5596,3 +5596,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5596 | 5596 | ||
5597 | return err; | 5597 | return err; |
5598 | } | 5598 | } |
5599 | |||
5600 | /* | ||
5601 | * Find the first extent at or after @lblk in an inode that is not a hole. | ||
5602 | * Search for @map_len blocks at most. The extent is returned in @result. | ||
5603 | * | ||
5604 | * The function returns 1 if we found an extent. The function returns 0 in | ||
5605 | * case there is no extent at or after @lblk and in that case also sets | ||
5606 | * @result->es_len to 0. In case of error, the error code is returned. | ||
5607 | */ | ||
5608 | int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, | ||
5609 | unsigned int map_len, struct extent_status *result) | ||
5610 | { | ||
5611 | struct ext4_map_blocks map; | ||
5612 | struct extent_status es = {}; | ||
5613 | int ret; | ||
5614 | |||
5615 | map.m_lblk = lblk; | ||
5616 | map.m_len = map_len; | ||
5617 | |||
5618 | /* | ||
5619 | * For non-extent based files this loop may iterate several times since | ||
5620 | * we do not determine full hole size. | ||
5621 | */ | ||
5622 | while (map.m_len > 0) { | ||
5623 | ret = ext4_map_blocks(NULL, inode, &map, 0); | ||
5624 | if (ret < 0) | ||
5625 | return ret; | ||
5626 | /* There's extent covering m_lblk? Just return it. */ | ||
5627 | if (ret > 0) { | ||
5628 | int status; | ||
5629 | |||
5630 | ext4_es_store_pblock(result, map.m_pblk); | ||
5631 | result->es_lblk = map.m_lblk; | ||
5632 | result->es_len = map.m_len; | ||
5633 | if (map.m_flags & EXT4_MAP_UNWRITTEN) | ||
5634 | status = EXTENT_STATUS_UNWRITTEN; | ||
5635 | else | ||
5636 | status = EXTENT_STATUS_WRITTEN; | ||
5637 | ext4_es_store_status(result, status); | ||
5638 | return 1; | ||
5639 | } | ||
5640 | ext4_es_find_delayed_extent_range(inode, map.m_lblk, | ||
5641 | map.m_lblk + map.m_len - 1, | ||
5642 | &es); | ||
5643 | /* Is delalloc data before next block in extent tree? */ | ||
5644 | if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) { | ||
5645 | ext4_lblk_t offset = 0; | ||
5646 | |||
5647 | if (es.es_lblk < lblk) | ||
5648 | offset = lblk - es.es_lblk; | ||
5649 | result->es_lblk = es.es_lblk + offset; | ||
5650 | ext4_es_store_pblock(result, | ||
5651 | ext4_es_pblock(&es) + offset); | ||
5652 | result->es_len = es.es_len - offset; | ||
5653 | ext4_es_store_status(result, ext4_es_status(&es)); | ||
5654 | |||
5655 | return 1; | ||
5656 | } | ||
5657 | /* There's a hole at m_lblk, advance us after it */ | ||
5658 | map.m_lblk += map.m_len; | ||
5659 | map_len -= map.m_len; | ||
5660 | map.m_len = map_len; | ||
5661 | cond_resched(); | ||
5662 | } | ||
5663 | result->es_len = 0; | ||
5664 | return 0; | ||
5665 | } | ||