From 1db913823c0f8360fccbd24ca67eb073966a5ffd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 22 Jan 2010 17:06:20 -0500 Subject: ext4: Handle -EDQUOT error on write We need to release the journal before we do a write_inode. Otherwise we could deadlock. Signed-off-by: Aneesh Kumar K.V --- fs/ext4/inode.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c818972c8302..60b3a19e9927 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1835,24 +1835,12 @@ repeat: * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, md_needed + 1)) { - /* - * We tend to badly over-estimate the amount of - * metadata blocks which are needed, so if we have - * reserved any metadata blocks, try to force out the - * inode and see if we have any better luck. - */ - if (md_reserved && retries++ <= 3) - goto retry; + if (vfs_dq_reserve_block(inode, md_needed + 1)) return -EDQUOT; - } if (ext4_claim_free_blocks(sbi, md_needed + 1)) { vfs_dq_release_reservation_block(inode, md_needed + 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - retry: - if (md_reserved) - write_inode_now(inode, (retries == 3)); yield(); goto repeat; } @@ -3032,7 +3020,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int ret, retries = 0; + int ret, retries = 0, quota_retries = 0; struct page *page; pgoff_t index; unsigned from, to; @@ -3091,6 +3079,22 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; + + if ((ret == -EDQUOT) && + EXT4_I(inode)->i_reserved_meta_blocks && + (quota_retries++ < 3)) { + /* + * Since we often over-estimate the number of meta + * data blocks required, we may sometimes get a + * spurios out of quota error even though there would + * be enough space once we write the data blocks and + * find out how many meta data blocks were _really_ + * required. So try forcing the inode write to see if + * that helps. + */ + write_inode_now(inode, (quota_retries == 3)); + goto retry; + } out: return ret; } -- cgit v1.2.2 From 5f634d064c709ea02c3cdaa850a08323a4a4bf28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Jan 2010 04:00:31 -0500 Subject: ext4: Fix quota accounting error with fallocate When we fallocate a region of the file which we had recently written, and which is still in the page cache marked as delayed allocated blocks we need to make sure we don't do the quota update on writepage path. This is because the needed quota updated would have already be done by fallocate. Signed-off-by: Aneesh Kumar K.V --- fs/ext4/ext4.h | 2 ++ fs/ext4/extents.c | 21 +++++++++++++++++++++ fs/ext4/inode.c | 44 +++++++++++++++++++++++++++++++------------- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af7b62699ea9..b98de17e542a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1443,6 +1443,8 @@ extern int ext4_block_truncate_page(handle_t *handle, extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); +extern void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7d7b74e94687..3b6ff72026f0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3132,7 +3132,19 @@ out: unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, newblock + max_blocks, allocated - max_blocks); + allocated = max_blocks; } + + /* + * If we have done fallocate with the offset that is already + * delayed allocated, we would have block reservation + * and quota reservation done in the delayed write path. + * But fallocate would have already updated quota and block + * count for this offset. So cancel these reservation + */ + if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + ext4_da_update_reserve_space(inode, allocated, 0); + map_out: set_buffer_mapped(bh_result); out1: @@ -3368,8 +3380,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); + if (allocated > max_blocks) + allocated = max_blocks; set_buffer_new(bh_result); + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + ext4_da_update_reserve_space(inode, allocated, 1); + /* * Cache the extent and update transaction to commit on fdatasync only * when it is _not_ an uninitialized extent. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60b3a19e9927..c955f6490b78 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1053,11 +1053,12 @@ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) * Called with i_data_sem down, which is important since we can call * ext4_discard_preallocations() from here. */ -static void ext4_da_update_reserve_space(struct inode *inode, int used) +void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - int mdb_free = 0; + int mdb_free = 0, allocated_meta_blocks = 0; spin_lock(&ei->i_block_reservation_lock); if (unlikely(used > ei->i_reserved_data_blocks)) { @@ -1073,6 +1074,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) ei->i_reserved_data_blocks -= used; used += ei->i_allocated_meta_blocks; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; + allocated_meta_blocks = ei->i_allocated_meta_blocks; ei->i_allocated_meta_blocks = 0; percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); @@ -1090,9 +1092,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* Update quota subsystem */ - vfs_dq_claim_block(inode, used); - if (mdb_free) - vfs_dq_release_reservation_block(inode, mdb_free); + if (quota_claim) { + vfs_dq_claim_block(inode, used); + if (mdb_free) + vfs_dq_release_reservation_block(inode, mdb_free); + } else { + /* + * We did fallocate with an offset that is already delayed + * allocated. So on delayed allocated writeback we should + * not update the quota for allocated blocks. But then + * converting an fallocate region to initialized region would + * have caused a metadata allocation. So claim quota for + * that + */ + if (allocated_meta_blocks) + vfs_dq_claim_block(inode, allocated_meta_blocks); + vfs_dq_release_reservation_block(inode, mdb_free + used); + } /* * If we have done all the pending block allocations and if @@ -1292,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, */ EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; } - } + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. We don't + * support fallocate for non extent files. So we can update + * reserve space here. + */ + if ((retval > 0) && + (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) + ext4_da_update_reserve_space(inode, retval, 1); + } if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 0; - /* - * Update reserved blocks/metadata blocks after successful - * block allocation which had been deferred till now. - */ - if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) - ext4_da_update_reserve_space(inode, retval); - up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && buffer_mapped(bh)) { int ret = check_block_validity(inode, "file system " -- cgit v1.2.2 From 1296cc85c26e94eb865d03f82140f27d598de467 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 15 Jan 2010 01:27:59 -0500 Subject: ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag We should update reserve space if it is delalloc buffer and that is indicated by EXT4_GET_BLOCKS_DELALLOC_RESERVE flag. So use EXT4_GET_BLOCKS_DELALLOC_RESERVE in place of EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE Signed-off-by: Aneesh Kumar K.V --- fs/ext4/ext4.h | 7 ++----- fs/ext4/extents.c | 4 ++-- fs/ext4/inode.c | 8 ++++---- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b98de17e542a..874d169a193e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -361,14 +361,11 @@ struct ext4_new_group_data { so set the magic i_delalloc_reserve_flag after taking the inode allocation semaphore for */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 - /* Call ext4_da_update_reserve_space() after successfully - allocating the blocks */ -#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 /* caller is from the direct IO path, request to creation of an unitialized extents if not allocated, split the uninitialized extent if blocks has been preallocated already*/ -#define EXT4_GET_BLOCKS_DIO 0x0010 -#define EXT4_GET_BLOCKS_CONVERT 0x0020 +#define EXT4_GET_BLOCKS_DIO 0x0008 +#define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* Convert extent to initialized after direct IO complete */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3b6ff72026f0..765a4826b118 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3142,7 +3142,7 @@ out: * But fallocate would have already updated quota and block * count for this offset. So cancel these reservation */ - if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ext4_da_update_reserve_space(inode, allocated, 0); map_out: @@ -3388,7 +3388,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * Update reserved blocks/metadata blocks after successful * block allocation which had been deferred till now. */ - if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ext4_da_update_reserve_space(inode, allocated, 1); /* diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c955f6490b78..e11952404e02 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1316,7 +1316,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * reserve space here. */ if ((retval > 0) && - (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) + (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) ext4_da_update_reserve_space(inode, retval, 1); } if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) @@ -2219,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * variables are updated after the blocks have been allocated. */ new.b_state = 0; - get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | - EXT4_GET_BLOCKS_DELALLOC_RESERVE); + get_blocks_flags = EXT4_GET_BLOCKS_CREATE; if (mpd->b_state & (1 << BH_Delay)) - get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; + get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; + blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, &new, get_blocks_flags); if (blks < 0) { -- cgit v1.2.2