aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYongqiang Yang <xiaoqiangnk@gmail.com>2011-02-27 17:25:47 -0500
committerTheodore Ts'o <tytso@mit.edu>2011-02-27 17:25:47 -0500
commit6d9c85eb700bd3ac59e63bb9de463dea1aca084c (patch)
treec87def25fd7c97b94e8d78700dd0794abe5c8a50 /fs
parent4dd89fc6251a6bda2c18e71e7d266e983806579d (diff)
ext4: make FIEMAP and delayed allocation play well together
Fix the FIEMAP ioctl so that it returns all of the page ranges which are still subject to delayed allocation. We were missing some cases if the file was sparse. Reported by Chris Mason <chris.mason@oracle.com>: >We've had reports on btrfs that cp is giving us files full of zeros >instead of actually copying them. It was tracked down to a bug with >the btrfs fiemap implementation where it was returning holes for >delalloc ranges. > >Newer versions of cp are trusting fiemap to tell it where the holes >are, which does seem like a pretty neat trick. > >I decided to give xfs and ext4 a shot with a few tests cases too, xfs >passed with all the ones btrfs was getting wrong, and ext4 got the basic >delalloc case right. >$ mkfs.ext4 /dev/xxx >$ mount /dev/xxx /mnt >$ dd if=/dev/zero of=/mnt/foo bs=1M count=1 >$ fiemap-test foo >ext: 0 logical: [ 0.. 255] phys: 0.. 255 >flags: 0x007 tot: 256 > >Horray! But once we throw a hole in, things go bad: >$ mkfs.ext4 /dev/xxx >$ mount /dev/xxx /mnt >$ dd if=/dev/zero of=/mnt/foo bs=1M count=1 seek=1 >$ fiemap-test foo >< no output > > >We've got a delalloc extent after the hole and ext4 fiemap didn't find >it. If I run sync to kick the delalloc out: >$sync >$ fiemap-test foo >ext: 0 logical: [ 256.. 511] phys: 34048.. 34303 >flags: 0x001 tot: 256 > >fiemap-test is sitting in my /usr/local/bin, and I have no idea how it >got there. It's full of pretty comments so I know it isn't mine, but >you can grab it here: > >http://oss.oracle.com/~mason/fiemap-test.c > >xfsqa has a fiemap program too. After Fix, test results are as follows: ext: 0 logical: [ 256.. 511] phys: 0.. 255 flags: 0x007 tot: 256 ext: 0 logical: [ 256.. 511] phys: 33280.. 33535 flags: 0x001 tot: 256 $ mkfs.ext4 /dev/xxx $ mount /dev/xxx /mnt $ dd if=/dev/zero of=/mnt/foo bs=1M count=1 seek=1 $ sync $ dd if=/dev/zero of=/mnt/foo bs=1M count=1 seek=3 $ dd if=/dev/zero of=/mnt/foo bs=1M count=1 seek=5 $ fiemap-test foo ext: 0 logical: [ 256.. 511] phys: 33280.. 33535 flags: 0x000 tot: 256 ext: 1 logical: [ 768.. 1023] phys: 0.. 255 flags: 0x006 tot: 256 ext: 2 logical: [ 1280.. 1535] phys: 0.. 255 flags: 0x007 tot: 256 Tested-by: Eric Sandeen <sandeen@redhat.com> Reviewed-by: Andreas Dilger <adilger@dilger.ca> Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/extents.c187
1 files changed, 148 insertions, 39 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d16f6b5a140b..9ea1bc64ca6d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3775,6 +3775,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3775 } 3775 }
3776 return ret > 0 ? ret2 : ret; 3776 return ret > 0 ? ret2 : ret;
3777} 3777}
3778
3778/* 3779/*
3779 * Callback function called for each extent to gather FIEMAP information. 3780 * Callback function called for each extent to gather FIEMAP information.
3780 */ 3781 */
@@ -3782,38 +3783,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3782 struct ext4_ext_cache *newex, struct ext4_extent *ex, 3783 struct ext4_ext_cache *newex, struct ext4_extent *ex,
3783 void *data) 3784 void *data)
3784{ 3785{
3785 struct fiemap_extent_info *fieinfo = data;
3786 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
3787 __u64 logical; 3786 __u64 logical;
3788 __u64 physical; 3787 __u64 physical;
3789 __u64 length; 3788 __u64 length;
3789 loff_t size;
3790 __u32 flags = 0; 3790 __u32 flags = 0;
3791 int error; 3791 int ret = 0;
3792 struct fiemap_extent_info *fieinfo = data;
3793 unsigned char blksize_bits;
3792 3794
3793 logical = (__u64)newex->ec_block << blksize_bits; 3795 blksize_bits = inode->i_sb->s_blocksize_bits;
3796 logical = (__u64)newex->ec_block << blksize_bits;
3794 3797
3795 if (newex->ec_start == 0) { 3798 if (newex->ec_start == 0) {
3796 pgoff_t offset; 3799 /*
3797 struct page *page; 3800 * No extent in extent-tree contains block @newex->ec_start,
3801 * then the block may stay in 1)a hole or 2)delayed-extent.
3802 *
3803 * Holes or delayed-extents are processed as follows.
3804 * 1. lookup dirty pages with specified range in pagecache.
3805 * If no page is got, then there is no delayed-extent and
3806 * return with EXT_CONTINUE.
3807 * 2. find the 1st mapped buffer,
3808 * 3. check if the mapped buffer is both in the request range
3809 * and a delayed buffer. If not, there is no delayed-extent,
3810 * then return.
3811 * 4. a delayed-extent is found, the extent will be collected.
3812 */
3813 ext4_lblk_t end = 0;
3814 pgoff_t last_offset;
3815 pgoff_t offset;
3816 pgoff_t index;
3817 struct page **pages = NULL;
3798 struct buffer_head *bh = NULL; 3818 struct buffer_head *bh = NULL;
3819 struct buffer_head *head = NULL;
3820 unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
3821
3822 pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
3823 if (pages == NULL)
3824 return -ENOMEM;
3799 3825
3800 offset = logical >> PAGE_SHIFT; 3826 offset = logical >> PAGE_SHIFT;
3801 page = find_get_page(inode->i_mapping, offset); 3827repeat:
3802 if (!page || !page_has_buffers(page)) 3828 last_offset = offset;
3803 return EXT_CONTINUE; 3829 head = NULL;
3830 ret = find_get_pages_tag(inode->i_mapping, &offset,
3831 PAGECACHE_TAG_DIRTY, nr_pages, pages);
3832
3833 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
3834 /* First time, try to find a mapped buffer. */
3835 if (ret == 0) {
3836out:
3837 for (index = 0; index < ret; index++)
3838 page_cache_release(pages[index]);
3839 /* just a hole. */
3840 kfree(pages);
3841 return EXT_CONTINUE;
3842 }
3804 3843
3805 bh = page_buffers(page); 3844 /* Try to find the 1st mapped buffer. */
3845 end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
3846 blksize_bits;
3847 if (!page_has_buffers(pages[0]))
3848 goto out;
3849 head = page_buffers(pages[0]);
3850 if (!head)
3851 goto out;
3806 3852
3807 if (!bh) 3853 bh = head;
3808 return EXT_CONTINUE; 3854 do {
3855 if (buffer_mapped(bh)) {
3856 /* get the 1st mapped buffer. */
3857 if (end > newex->ec_block +
3858 newex->ec_len)
3859 /* The buffer is out of
3860 * the request range.
3861 */
3862 goto out;
3863 goto found_mapped_buffer;
3864 }
3865 bh = bh->b_this_page;
3866 end++;
3867 } while (bh != head);
3809 3868
3810 if (buffer_delay(bh)) { 3869 /* No mapped buffer found. */
3811 flags |= FIEMAP_EXTENT_DELALLOC; 3870 goto out;
3812 page_cache_release(page);
3813 } else { 3871 } else {
3814 page_cache_release(page); 3872 /*Find contiguous delayed buffers. */
3815 return EXT_CONTINUE; 3873 if (ret > 0 && pages[0]->index == last_offset)
3874 head = page_buffers(pages[0]);
3875 bh = head;
3876 }
3877
3878found_mapped_buffer:
3879 if (bh != NULL && buffer_delay(bh)) {
3880 /* 1st or contiguous delayed buffer found. */
3881 if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
3882 /*
3883 * 1st delayed buffer found, record
3884 * the start of extent.
3885 */
3886 flags |= FIEMAP_EXTENT_DELALLOC;
3887 newex->ec_block = end;
3888 logical = (__u64)end << blksize_bits;
3889 }
3890 /* Find contiguous delayed buffers. */
3891 do {
3892 if (!buffer_delay(bh))
3893 goto found_delayed_extent;
3894 bh = bh->b_this_page;
3895 end++;
3896 } while (bh != head);
3897
3898 for (index = 1; index < ret; index++) {
3899 if (!page_has_buffers(pages[index])) {
3900 bh = NULL;
3901 break;
3902 }
3903 head = page_buffers(pages[index]);
3904 if (!head) {
3905 bh = NULL;
3906 break;
3907 }
3908 if (pages[index]->index !=
3909 pages[0]->index + index) {
3910 /* Blocks are not contiguous. */
3911 bh = NULL;
3912 break;
3913 }
3914 bh = head;
3915 do {
3916 if (!buffer_delay(bh))
3917 /* Delayed-extent ends. */
3918 goto found_delayed_extent;
3919 bh = bh->b_this_page;
3920 end++;
3921 } while (bh != head);
3922 }
3923 } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
3924 /* a hole found. */
3925 goto out;
3926
3927found_delayed_extent:
3928 newex->ec_len = min(end - newex->ec_block,
3929 (ext4_lblk_t)EXT_INIT_MAX_LEN);
3930 if (ret == nr_pages && bh != NULL &&
3931 newex->ec_len < EXT_INIT_MAX_LEN &&
3932 buffer_delay(bh)) {
3933 /* Have not collected an extent and continue. */
3934 for (index = 0; index < ret; index++)
3935 page_cache_release(pages[index]);
3936 goto repeat;
3816 } 3937 }
3938
3939 for (index = 0; index < ret; index++)
3940 page_cache_release(pages[index]);
3941 kfree(pages);
3817 } 3942 }
3818 3943
3819 physical = (__u64)newex->ec_start << blksize_bits; 3944 physical = (__u64)newex->ec_start << blksize_bits;
@@ -3822,32 +3947,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3822 if (ex && ext4_ext_is_uninitialized(ex)) 3947 if (ex && ext4_ext_is_uninitialized(ex))
3823 flags |= FIEMAP_EXTENT_UNWRITTEN; 3948 flags |= FIEMAP_EXTENT_UNWRITTEN;
3824 3949
3825 /* 3950 size = i_size_read(inode);
3826 * If this extent reaches EXT_MAX_BLOCK, it must be last. 3951 if (logical + length >= size)
3827 *
3828 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
3829 * this also indicates no more allocated blocks.
3830 *
3831 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3832 */
3833 if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
3834 newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
3835 loff_t size = i_size_read(inode);
3836 loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
3837
3838 flags |= FIEMAP_EXTENT_LAST; 3952 flags |= FIEMAP_EXTENT_LAST;
3839 if ((flags & FIEMAP_EXTENT_DELALLOC) &&
3840 logical+length > size)
3841 length = (size - logical + bs - 1) & ~(bs-1);
3842 }
3843 3953
3844 error = fiemap_fill_next_extent(fieinfo, logical, physical, 3954 ret = fiemap_fill_next_extent(fieinfo, logical, physical,
3845 length, flags); 3955 length, flags);
3846 if (error < 0) 3956 if (ret < 0)
3847 return error; 3957 return ret;
3848 if (error == 1) 3958 if (ret == 1)
3849 return EXT_BREAK; 3959 return EXT_BREAK;
3850
3851 return EXT_CONTINUE; 3960 return EXT_CONTINUE;
3852} 3961}
3853 3962