aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-03-09 23:11:13 -0500
committerTheodore Ts'o <tytso@mit.edu>2016-03-09 23:11:13 -0500
commit2d90c160e5f1d784e180f1e1458d56eee4d7f4f4 (patch)
tree883dc6e8433bbe315fa8fce00154d51ac52b3dd7
parente3fb8eb14eafd2847c04cf48b52a705c36f4db98 (diff)
ext4: more efficient SEEK_DATA implementation
Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as ext4_seek_data() iterates hole block-by-block. Fix the problem by using returned hole size from ext4_map_blocks() and thus skip the hole in one go. Update also SEEK_HOLE implementation to follow the same pattern as SEEK_DATA to make future maintenance easier. Furthermore we add cond_resched() to both ext4_seek_data() and ext4_seek_hole() to avoid softlockups in case evil user creates huge fragmented file and we have to go through lots of extents. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/file.c97
-rw-r--r--fs/ext4/inode.c67
3 files changed, 106 insertions, 61 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 70b8e0409566..5623eec7fd22 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2546,6 +2546,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
2546 int used, int quota_claim); 2546 int used, int quota_claim);
2547extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, 2547extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
2548 ext4_fsblk_t pblk, ext4_lblk_t len); 2548 ext4_fsblk_t pblk, ext4_lblk_t len);
2549extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
2550 unsigned int map_len,
2551 struct extent_status *result);
2549 2552
2550/* indirect.c */ 2553/* indirect.c */
2551extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 2554extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4a1153561580..e93a7efaf78f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -426,7 +426,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
426 */ 426 */
427static int ext4_find_unwritten_pgoff(struct inode *inode, 427static int ext4_find_unwritten_pgoff(struct inode *inode,
428 int whence, 428 int whence,
429 struct ext4_map_blocks *map, 429 ext4_lblk_t end_blk,
430 loff_t *offset) 430 loff_t *offset)
431{ 431{
432 struct pagevec pvec; 432 struct pagevec pvec;
@@ -441,7 +441,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
441 blkbits = inode->i_sb->s_blocksize_bits; 441 blkbits = inode->i_sb->s_blocksize_bits;
442 startoff = *offset; 442 startoff = *offset;
443 lastoff = startoff; 443 lastoff = startoff;
444 endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; 444 endoff = (loff_t)end_blk << blkbits;
445 445
446 index = startoff >> PAGE_CACHE_SHIFT; 446 index = startoff >> PAGE_CACHE_SHIFT;
447 end = endoff >> PAGE_CACHE_SHIFT; 447 end = endoff >> PAGE_CACHE_SHIFT;
@@ -559,12 +559,11 @@ out:
559static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) 559static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
560{ 560{
561 struct inode *inode = file->f_mapping->host; 561 struct inode *inode = file->f_mapping->host;
562 struct ext4_map_blocks map;
563 struct extent_status es; 562 struct extent_status es;
564 ext4_lblk_t start, last, end; 563 ext4_lblk_t start, last, end;
565 loff_t dataoff, isize; 564 loff_t dataoff, isize;
566 int blkbits; 565 int blkbits;
567 int ret = 0; 566 int ret;
568 567
569 inode_lock(inode); 568 inode_lock(inode);
570 569
@@ -581,41 +580,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
581 dataoff = offset; 580 dataoff = offset;
582 581
583 do { 582 do {
584 map.m_lblk = last; 583 ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
585 map.m_len = end - last + 1; 584 if (ret <= 0) {
586 ret = ext4_map_blocks(NULL, inode, &map, 0); 585 /* No extent found -> no data */
587 if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { 586 if (ret == 0)
588 if (last != start) 587 ret = -ENXIO;
589 dataoff = (loff_t)last << blkbits; 588 inode_unlock(inode);
590 break; 589 return ret;
591 } 590 }
592 591
593 /* 592 last = es.es_lblk;
594 * If there is a delay extent at this offset, 593 if (last != start)
595 * it will be as a data. 594 dataoff = (loff_t)last << blkbits;
596 */ 595 if (!ext4_es_is_unwritten(&es))
597 ext4_es_find_delayed_extent_range(inode, last, last, &es);
598 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
599 if (last != start)
600 dataoff = (loff_t)last << blkbits;
601 break; 596 break;
602 }
603 597
604 /* 598 /*
605 * If there is a unwritten extent at this offset, 599 * If there is a unwritten extent at this offset,
606 * it will be as a data or a hole according to page 600 * it will be as a data or a hole according to page
607 * cache that has data or not. 601 * cache that has data or not.
608 */ 602 */
609 if (map.m_flags & EXT4_MAP_UNWRITTEN) { 603 if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
610 int unwritten; 604 es.es_lblk + es.es_len, &dataoff))
611 unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, 605 break;
612 &map, &dataoff); 606 last += es.es_len;
613 if (unwritten)
614 break;
615 }
616
617 last++;
618 dataoff = (loff_t)last << blkbits; 607 dataoff = (loff_t)last << blkbits;
608 cond_resched();
619 } while (last <= end); 609 } while (last <= end);
620 610
621 inode_unlock(inode); 611 inode_unlock(inode);
@@ -632,12 +622,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
632static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) 622static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
633{ 623{
634 struct inode *inode = file->f_mapping->host; 624 struct inode *inode = file->f_mapping->host;
635 struct ext4_map_blocks map;
636 struct extent_status es; 625 struct extent_status es;
637 ext4_lblk_t start, last, end; 626 ext4_lblk_t start, last, end;
638 loff_t holeoff, isize; 627 loff_t holeoff, isize;
639 int blkbits; 628 int blkbits;
640 int ret = 0; 629 int ret;
641 630
642 inode_lock(inode); 631 inode_lock(inode);
643 632
@@ -654,44 +643,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
654 holeoff = offset; 643 holeoff = offset;
655 644
656 do { 645 do {
657 map.m_lblk = last; 646 ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
658 map.m_len = end - last + 1; 647 if (ret < 0) {
659 ret = ext4_map_blocks(NULL, inode, &map, 0); 648 inode_unlock(inode);
660 if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { 649 return ret;
661 last += ret;
662 holeoff = (loff_t)last << blkbits;
663 continue;
664 } 650 }
665 651 /* Found a hole? */
666 /* 652 if (ret == 0 || es.es_lblk > last) {
667 * If there is a delay extent at this offset, 653 if (last != start)
668 * we will skip this extent. 654 holeoff = (loff_t)last << blkbits;
669 */ 655 break;
670 ext4_es_find_delayed_extent_range(inode, last, last, &es);
671 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
672 last = es.es_lblk + es.es_len;
673 holeoff = (loff_t)last << blkbits;
674 continue;
675 } 656 }
676
677 /* 657 /*
678 * If there is a unwritten extent at this offset, 658 * If there is a unwritten extent at this offset,
679 * it will be as a data or a hole according to page 659 * it will be as a data or a hole according to page
680 * cache that has data or not. 660 * cache that has data or not.
681 */ 661 */
682 if (map.m_flags & EXT4_MAP_UNWRITTEN) { 662 if (ext4_es_is_unwritten(&es) &&
683 int unwritten; 663 ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
684 unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, 664 last + es.es_len, &holeoff))
685 &map, &holeoff); 665 break;
686 if (!unwritten) {
687 last += ret;
688 holeoff = (loff_t)last << blkbits;
689 continue;
690 }
691 }
692 666
693 /* find a hole */ 667 last += es.es_len;
694 break; 668 holeoff = (loff_t)last << blkbits;
669 cond_resched();
695 } while (last <= end); 670 } while (last <= end);
696 671
697 inode_unlock(inode); 672 inode_unlock(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fddc6ddc53a8..ce2c4c62386f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5596,3 +5596,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
5596 5596
5597 return err; 5597 return err;
5598} 5598}
5599
5600/*
5601 * Find the first extent at or after @lblk in an inode that is not a hole.
5602 * Search for @map_len blocks at most. The extent is returned in @result.
5603 *
5604 * The function returns 1 if we found an extent. The function returns 0 in
5605 * case there is no extent at or after @lblk and in that case also sets
5606 * @result->es_len to 0. In case of error, the error code is returned.
5607 */
5608int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
5609 unsigned int map_len, struct extent_status *result)
5610{
5611 struct ext4_map_blocks map;
5612 struct extent_status es = {};
5613 int ret;
5614
5615 map.m_lblk = lblk;
5616 map.m_len = map_len;
5617
5618 /*
5619 * For non-extent based files this loop may iterate several times since
5620 * we do not determine full hole size.
5621 */
5622 while (map.m_len > 0) {
5623 ret = ext4_map_blocks(NULL, inode, &map, 0);
5624 if (ret < 0)
5625 return ret;
5626 /* There's extent covering m_lblk? Just return it. */
5627 if (ret > 0) {
5628 int status;
5629
5630 ext4_es_store_pblock(result, map.m_pblk);
5631 result->es_lblk = map.m_lblk;
5632 result->es_len = map.m_len;
5633 if (map.m_flags & EXT4_MAP_UNWRITTEN)
5634 status = EXTENT_STATUS_UNWRITTEN;
5635 else
5636 status = EXTENT_STATUS_WRITTEN;
5637 ext4_es_store_status(result, status);
5638 return 1;
5639 }
5640 ext4_es_find_delayed_extent_range(inode, map.m_lblk,
5641 map.m_lblk + map.m_len - 1,
5642 &es);
5643 /* Is delalloc data before next block in extent tree? */
5644 if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
5645 ext4_lblk_t offset = 0;
5646
5647 if (es.es_lblk < lblk)
5648 offset = lblk - es.es_lblk;
5649 result->es_lblk = es.es_lblk + offset;
5650 ext4_es_store_pblock(result,
5651 ext4_es_pblock(&es) + offset);
5652 result->es_len = es.es_len - offset;
5653 ext4_es_store_status(result, ext4_es_status(&es));
5654
5655 return 1;
5656 }
5657 /* There's a hole at m_lblk, advance us after it */
5658 map.m_lblk += map.m_len;
5659 map_len -= map.m_len;
5660 map.m_len = map_len;
5661 cond_resched();
5662 }
5663 result->es_len = 0;
5664 return 0;
5665}