aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
authorAditya Kali <adityakali@google.com>2011-09-09 19:04:51 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-09-09 19:04:51 -0400
commit7b415bf60f6afb0499fd3dc0ee33444f54e28567 (patch)
tree9c64fef2b8d60ce64865af6e4c2cc6008026e28c /fs/ext4/extents.c
parent27baebb849d46d901e756e6502b0a65a62e43771 (diff)
ext4: Fix bigalloc quota accounting and i_blocks value
With bigalloc changes, the i_blocks value was not correctly set (it was still set to number of blocks being used, but in case of bigalloc, we want i_blocks to represent the number of clusters being used). Since the quota subsystem sets the i_blocks value, this patch fixes the quota accounting and makes sure that the i_blocks value is set correctly. Signed-off-by: Aditya Kali <adityakali@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c306
1 files changed, 302 insertions, 4 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd4479c08031..c4e005864534 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2686,6 +2686,21 @@ again:
2686 } 2686 }
2687 } 2687 }
2688 2688
2689 /* If we still have something in the partial cluster and we have removed
2690 * even the first extent, then we should free the blocks in the partial
2691 * cluster as well. */
2692 if (partial_cluster && path->p_hdr->eh_entries == 0) {
2693 int flags = EXT4_FREE_BLOCKS_FORGET;
2694
2695 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2696 flags |= EXT4_FREE_BLOCKS_METADATA;
2697
2698 ext4_free_blocks(handle, inode, NULL,
2699 EXT4_C2B(EXT4_SB(sb), partial_cluster),
2700 EXT4_SB(sb)->s_cluster_ratio, flags);
2701 partial_cluster = 0;
2702 }
2703
2689 /* TODO: flexible tree reduction should be here */ 2704 /* TODO: flexible tree reduction should be here */
2690 if (path->p_hdr->eh_entries == 0) { 2705 if (path->p_hdr->eh_entries == 0) {
2691 /* 2706 /*
@@ -3233,6 +3248,195 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3233 return ext4_mark_inode_dirty(handle, inode); 3248 return ext4_mark_inode_dirty(handle, inode);
3234} 3249}
3235 3250
3251/**
3252 * ext4_find_delalloc_range: find delayed allocated block in the given range.
3253 *
3254 * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
3255 * whether there are any buffers marked for delayed allocation. It returns '1'
3256 * on the first delalloc'ed buffer head found. If no buffer head in the given
3257 * range is marked for delalloc, it returns 0.
3258 * lblk_start should always be <= lblk_end.
3259 * search_hint_reverse is to indicate that searching in reverse from lblk_end to
3260 * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
3261 * block sooner). This is useful when blocks are truncated sequentially from
3262 * lblk_start towards lblk_end.
3263 */
3264static int ext4_find_delalloc_range(struct inode *inode,
3265 ext4_lblk_t lblk_start,
3266 ext4_lblk_t lblk_end,
3267 int search_hint_reverse)
3268{
3269 struct address_space *mapping = inode->i_mapping;
3270 struct buffer_head *head, *bh = NULL;
3271 struct page *page;
3272 ext4_lblk_t i, pg_lblk;
3273 pgoff_t index;
3274
3275 /* reverse search wont work if fs block size is less than page size */
3276 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3277 search_hint_reverse = 0;
3278
3279 if (search_hint_reverse)
3280 i = lblk_end;
3281 else
3282 i = lblk_start;
3283
3284 index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
3285
3286 while ((i >= lblk_start) && (i <= lblk_end)) {
3287 page = find_get_page(mapping, index);
3288 if (!page || !PageDirty(page))
3289 goto nextpage;
3290
3291 if (PageWriteback(page)) {
3292 /*
3293 * This might be a race with allocation and writeout. In
3294 * this case we just assume that the rest of the range
3295 * will eventually be written and there wont be any
3296 * delalloc blocks left.
3297 * TODO: the above assumption is troublesome, but might
3298 * work better in practice. other option could be note
3299 * somewhere that the cluster is getting written out and
3300 * detect that here.
3301 */
3302 page_cache_release(page);
3303 return 0;
3304 }
3305
3306 if (!page_has_buffers(page))
3307 goto nextpage;
3308
3309 head = page_buffers(page);
3310 if (!head)
3311 goto nextpage;
3312
3313 bh = head;
3314 pg_lblk = index << (PAGE_CACHE_SHIFT -
3315 inode->i_blkbits);
3316 do {
3317 if (unlikely(pg_lblk < lblk_start)) {
3318 /*
3319 * This is possible when fs block size is less
3320 * than page size and our cluster starts/ends in
3321 * middle of the page. So we need to skip the
3322 * initial few blocks till we reach the 'lblk'
3323 */
3324 pg_lblk++;
3325 continue;
3326 }
3327
3328 if (buffer_delay(bh)) {
3329 page_cache_release(page);
3330 return 1;
3331 }
3332 if (search_hint_reverse)
3333 i--;
3334 else
3335 i++;
3336 } while ((i >= lblk_start) && (i <= lblk_end) &&
3337 ((bh = bh->b_this_page) != head));
3338nextpage:
3339 if (page)
3340 page_cache_release(page);
3341 /*
3342 * Move to next page. 'i' will be the first lblk in the next
3343 * page.
3344 */
3345 if (search_hint_reverse)
3346 index--;
3347 else
3348 index++;
3349 i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
3350 }
3351
3352 return 0;
3353}
3354
3355int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
3356 int search_hint_reverse)
3357{
3358 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3359 ext4_lblk_t lblk_start, lblk_end;
3360 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
3361 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3362
3363 return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3364 search_hint_reverse);
3365}
3366
3367/**
3368 * Determines how many complete clusters (out of those specified by the 'map')
3369 * are under delalloc and were reserved quota for.
3370 * This function is called when we are writing out the blocks that were
3371 * originally written with their allocation delayed, but then the space was
3372 * allocated using fallocate() before the delayed allocation could be resolved.
3373 * The cases to look for are:
3374 * ('=' indicated delayed allocated blocks
3375 * '-' indicates non-delayed allocated blocks)
3376 * (a) partial clusters towards beginning and/or end outside of allocated range
3377 * are not delalloc'ed.
3378 * Ex:
3379 * |----c---=|====c====|====c====|===-c----|
3380 * |++++++ allocated ++++++|
3381 * ==> 4 complete clusters in above example
3382 *
3383 * (b) partial cluster (outside of allocated range) towards either end is
3384 * marked for delayed allocation. In this case, we will exclude that
3385 * cluster.
3386 * Ex:
3387 * |----====c========|========c========|
3388 * |++++++ allocated ++++++|
3389 * ==> 1 complete clusters in above example
3390 *
3391 * Ex:
3392 * |================c================|
3393 * |++++++ allocated ++++++|
3394 * ==> 0 complete clusters in above example
3395 *
3396 * The ext4_da_update_reserve_space will be called only if we
3397 * determine here that there were some "entire" clusters that span
3398 * this 'allocated' range.
3399 * In the non-bigalloc case, this function will just end up returning num_blks
3400 * without ever calling ext4_find_delalloc_range.
3401 */
3402static unsigned int
3403get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3404 unsigned int num_blks)
3405{
3406 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3407 ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
3408 ext4_lblk_t lblk_from, lblk_to, c_offset;
3409 unsigned int allocated_clusters = 0;
3410
3411 alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
3412 alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
3413
3414 /* max possible clusters for this allocation */
3415 allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
3416
3417 /* Check towards left side */
3418 c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
3419 if (c_offset) {
3420 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
3421 lblk_to = lblk_from + c_offset - 1;
3422
3423 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3424 allocated_clusters--;
3425 }
3426
3427 /* Now check towards right. */
3428 c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
3429 if (allocated_clusters && c_offset) {
3430 lblk_from = lblk_start + num_blks;
3431 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
3432
3433 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3434 allocated_clusters--;
3435 }
3436
3437 return allocated_clusters;
3438}
3439
3236static int 3440static int
3237ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3441ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3238 struct ext4_map_blocks *map, 3442 struct ext4_map_blocks *map,
@@ -3338,8 +3542,15 @@ out:
3338 * But fallocate would have already updated quota and block 3542 * But fallocate would have already updated quota and block
3339 * count for this offset. So cancel these reservation 3543 * count for this offset. So cancel these reservation
3340 */ 3544 */
3341 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3545 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3342 ext4_da_update_reserve_space(inode, allocated, 0); 3546 unsigned int reserved_clusters;
3547 reserved_clusters = get_reserved_cluster_alloc(inode,
3548 map->m_lblk, map->m_len);
3549 if (reserved_clusters)
3550 ext4_da_update_reserve_space(inode,
3551 reserved_clusters,
3552 0);
3553 }
3343 3554
3344map_out: 3555map_out:
3345 map->m_flags |= EXT4_MAP_MAPPED; 3556 map->m_flags |= EXT4_MAP_MAPPED;
@@ -3484,6 +3695,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3484 ext4_fsblk_t newblock = 0; 3695 ext4_fsblk_t newblock = 0;
3485 int free_on_err = 0, err = 0, depth, ret; 3696 int free_on_err = 0, err = 0, depth, ret;
3486 unsigned int allocated = 0, offset = 0; 3697 unsigned int allocated = 0, offset = 0;
3698 unsigned int allocated_clusters = 0, reserved_clusters = 0;
3487 unsigned int punched_out = 0; 3699 unsigned int punched_out = 0;
3488 unsigned int result = 0; 3700 unsigned int result = 0;
3489 struct ext4_allocation_request ar; 3701 struct ext4_allocation_request ar;
@@ -3499,6 +3711,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3499 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3711 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
3500 ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3712 ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3501 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3713 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3714 if ((sbi->s_cluster_ratio > 1) &&
3715 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3716 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3717
3502 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3718 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3503 /* 3719 /*
3504 * block isn't allocated yet and 3720 * block isn't allocated yet and
@@ -3509,6 +3725,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3509 /* we should allocate requested block */ 3725 /* we should allocate requested block */
3510 } else { 3726 } else {
3511 /* block is already allocated */ 3727 /* block is already allocated */
3728 if (sbi->s_cluster_ratio > 1)
3729 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3512 newblock = map->m_lblk 3730 newblock = map->m_lblk
3513 - le32_to_cpu(newex.ee_block) 3731 - le32_to_cpu(newex.ee_block)
3514 + ext4_ext_pblock(&newex); 3732 + ext4_ext_pblock(&newex);
@@ -3665,6 +3883,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3665 } 3883 }
3666 } 3884 }
3667 3885
3886 if ((sbi->s_cluster_ratio > 1) &&
3887 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3888 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3889
3668 /* 3890 /*
3669 * requested block isn't allocated yet; 3891 * requested block isn't allocated yet;
3670 * we couldn't try to create block if create flag is zero 3892 * we couldn't try to create block if create flag is zero
@@ -3681,6 +3903,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3681 /* 3903 /*
3682 * Okay, we need to do block allocation. 3904 * Okay, we need to do block allocation.
3683 */ 3905 */
3906 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
3684 newex.ee_block = cpu_to_le32(map->m_lblk); 3907 newex.ee_block = cpu_to_le32(map->m_lblk);
3685 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 3908 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3686 3909
@@ -3692,6 +3915,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3692 get_implied_cluster_alloc(sbi, map, ex, path)) { 3915 get_implied_cluster_alloc(sbi, map, ex, path)) {
3693 ar.len = allocated = map->m_len; 3916 ar.len = allocated = map->m_len;
3694 newblock = map->m_pblk; 3917 newblock = map->m_pblk;
3918 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3695 goto got_allocated_blocks; 3919 goto got_allocated_blocks;
3696 } 3920 }
3697 3921
@@ -3712,6 +3936,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3712 get_implied_cluster_alloc(sbi, map, ex2, path)) { 3936 get_implied_cluster_alloc(sbi, map, ex2, path)) {
3713 ar.len = allocated = map->m_len; 3937 ar.len = allocated = map->m_len;
3714 newblock = map->m_pblk; 3938 newblock = map->m_pblk;
3939 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3715 goto got_allocated_blocks; 3940 goto got_allocated_blocks;
3716 } 3941 }
3717 3942
@@ -3765,6 +3990,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3765 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 3990 ext_debug("allocate new block: goal %llu, found %llu/%u\n",
3766 ar.goal, newblock, allocated); 3991 ar.goal, newblock, allocated);
3767 free_on_err = 1; 3992 free_on_err = 1;
3993 allocated_clusters = ar.len;
3768 ar.len = EXT4_C2B(sbi, ar.len) - offset; 3994 ar.len = EXT4_C2B(sbi, ar.len) - offset;
3769 if (ar.len > allocated) 3995 if (ar.len > allocated)
3770 ar.len = allocated; 3996 ar.len = allocated;
@@ -3822,8 +4048,80 @@ got_allocated_blocks:
3822 * Update reserved blocks/metadata blocks after successful 4048 * Update reserved blocks/metadata blocks after successful
3823 * block allocation which had been deferred till now. 4049 * block allocation which had been deferred till now.
3824 */ 4050 */
3825 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 4051 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3826 ext4_da_update_reserve_space(inode, allocated, 1); 4052 /*
4053 * Check how many clusters we had reserved this allocted range.
4054 */
4055 reserved_clusters = get_reserved_cluster_alloc(inode,
4056 map->m_lblk, allocated);
4057 if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
4058 if (reserved_clusters) {
4059 /*
4060 * We have clusters reserved for this range.
4061 * But since we are not doing actual allocation
4062 * and are simply using blocks from previously
4063 * allocated cluster, we should release the
4064 * reservation and not claim quota.
4065 */
4066 ext4_da_update_reserve_space(inode,
4067 reserved_clusters, 0);
4068 }
4069 } else {
4070 BUG_ON(allocated_clusters < reserved_clusters);
4071 /* We will claim quota for all newly allocated blocks.*/
4072 ext4_da_update_reserve_space(inode, allocated_clusters,
4073 1);
4074 if (reserved_clusters < allocated_clusters) {
4075 int reservation = allocated_clusters -
4076 reserved_clusters;
4077 /*
4078 * It seems we claimed few clusters outside of
4079 * the range of this allocation. We should give
4080 * it back to the reservation pool. This can
4081 * happen in the following case:
4082 *
4083 * * Suppose s_cluster_ratio is 4 (i.e., each
4084 * cluster has 4 blocks. Thus, the clusters
4085 * are [0-3],[4-7],[8-11]...
4086 * * First comes delayed allocation write for
4087 * logical blocks 10 & 11. Since there were no
4088 * previous delayed allocated blocks in the
4089 * range [8-11], we would reserve 1 cluster
4090 * for this write.
4091 * * Next comes write for logical blocks 3 to 8.
4092 * In this case, we will reserve 2 clusters
4093 * (for [0-3] and [4-7]; and not for [8-11] as
4094 * that range has a delayed allocated blocks.
4095 * Thus total reserved clusters now becomes 3.
4096 * * Now, during the delayed allocation writeout
4097 * time, we will first write blocks [3-8] and
4098 * allocate 3 clusters for writing these
4099 * blocks. Also, we would claim all these
4100 * three clusters above.
4101 * * Now when we come here to writeout the
4102 * blocks [10-11], we would expect to claim
4103 * the reservation of 1 cluster we had made
4104 * (and we would claim it since there are no
4105 * more delayed allocated blocks in the range
4106 * [8-11]. But our reserved cluster count had
4107 * already gone to 0.
4108 *
4109 * Thus, at the step 4 above when we determine
4110 * that there are still some unwritten delayed
4111 * allocated blocks outside of our current
4112 * block range, we should increment the
4113 * reserved clusters count so that when the
4114 * remaining blocks finally gets written, we
4115 * could claim them.
4116 */
4117 while (reservation) {
4118 ext4_da_reserve_space(inode,
4119 map->m_lblk);
4120 reservation--;
4121 }
4122 }
4123 }
4124 }
3827 4125
3828 /* 4126 /*
3829 * Cache the extent and update transaction to commit on fdatasync only 4127 * Cache the extent and update transaction to commit on fdatasync only