aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAditya Kali <adityakali@google.com>2011-09-09 19:04:51 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-09-09 19:04:51 -0400
commit7b415bf60f6afb0499fd3dc0ee33444f54e28567 (patch)
tree9c64fef2b8d60ce64865af6e4c2cc6008026e28c
parent27baebb849d46d901e756e6502b0a65a62e43771 (diff)
ext4: Fix bigalloc quota accounting and i_blocks value
With bigalloc changes, the i_blocks value was not correctly set (it was still set to number of blocks being used, but in case of bigalloc, we want i_blocks to represent the number of clusters being used). Since the quota subsystem sets the i_blocks value, this patch fixes the quota accounting and makes sure that the i_blocks value is set correctly. Signed-off-by: Aditya Kali <adityakali@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/balloc.c5
-rw-r--r--fs/ext4/ext4.h16
-rw-r--r--fs/ext4/ext4_extents.h2
-rw-r--r--fs/ext4/extents.c306
-rw-r--r--fs/ext4/inode.c54
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/super.c3
7 files changed, 366 insertions, 25 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 9080a857cda9..bf42b3219e3c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -485,7 +485,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
485 * @handle: handle to this transaction 485 * @handle: handle to this transaction
486 * @inode: file inode 486 * @inode: file inode
487 * @goal: given target block(filesystem wide) 487 * @goal: given target block(filesystem wide)
488 * @count: pointer to total number of blocks needed 488 * @count: pointer to total number of clusters needed
489 * @errp: error code 489 * @errp: error code
490 * 490 *
491 * Return 1st allocated block number on success, *count stores total account 491 * Return 1st allocated block number on success, *count stores total account
@@ -517,7 +517,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
517 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 517 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
518 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 518 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
519 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 519 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
520 dquot_alloc_block_nofail(inode, ar.len); 520 dquot_alloc_block_nofail(inode,
521 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
521 } 522 }
522 return ret; 523 return ret;
523} 524}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d2584224c89a..a6307f7c9807 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -144,9 +144,17 @@ struct ext4_allocation_request {
144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
146#define EXT4_MAP_UNINIT (1 << BH_Uninit) 146#define EXT4_MAP_UNINIT (1 << BH_Uninit)
147/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
148 * ext4_map_blocks wants to know whether or not the underlying cluster has
149 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
150 * the requested mapping was from previously mapped (or delayed allocated)
151 * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
152 * should never appear on buffer_head's state flags.
153 */
154#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
147#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 155#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
148 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 156 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
149 EXT4_MAP_UNINIT) 157 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
150 158
151struct ext4_map_blocks { 159struct ext4_map_blocks {
152 ext4_fsblk_t m_pblk; 160 ext4_fsblk_t m_pblk;
@@ -1884,6 +1892,7 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1884extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1892extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1885extern void ext4_da_update_reserve_space(struct inode *inode, 1893extern void ext4_da_update_reserve_space(struct inode *inode,
1886 int used, int quota_claim); 1894 int used, int quota_claim);
1895extern int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock);
1887 1896
1888/* indirect.c */ 1897/* indirect.c */
1889extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 1898extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2284,6 +2293,11 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2284enum ext4_state_bits { 2293enum ext4_state_bits {
2285 BH_Uninit /* blocks are allocated but uninitialized on disk */ 2294 BH_Uninit /* blocks are allocated but uninitialized on disk */
2286 = BH_JBDPrivateStart, 2295 = BH_JBDPrivateStart,
2296 BH_AllocFromCluster, /* allocated blocks were part of already
2297 * allocated cluster. Note that this flag will
2298 * never, ever appear in a buffer_head's state
2299 * flag. See EXT4_MAP_FROM_CLUSTER to see where
2300 * this is used. */
2287}; 2301};
2288 2302
2289BUFFER_FNS(Uninit, uninit) 2303BUFFER_FNS(Uninit, uninit)
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 095c36f3b612..a52db3a69a30 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
290 struct ext4_ext_path *); 290 struct ext4_ext_path *);
291extern void ext4_ext_drop_refs(struct ext4_ext_path *); 291extern void ext4_ext_drop_refs(struct ext4_ext_path *);
292extern int ext4_ext_check_inode(struct inode *inode); 292extern int ext4_ext_check_inode(struct inode *inode);
293extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
294 int search_hint_reverse);
293#endif /* _EXT4_EXTENTS */ 295#endif /* _EXT4_EXTENTS */
294 296
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd4479c08031..c4e005864534 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2686,6 +2686,21 @@ again:
2686 } 2686 }
2687 } 2687 }
2688 2688
2689 /* If we still have something in the partial cluster and we have removed
2690 * even the first extent, then we should free the blocks in the partial
2691 * cluster as well. */
2692 if (partial_cluster && path->p_hdr->eh_entries == 0) {
2693 int flags = EXT4_FREE_BLOCKS_FORGET;
2694
2695 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2696 flags |= EXT4_FREE_BLOCKS_METADATA;
2697
2698 ext4_free_blocks(handle, inode, NULL,
2699 EXT4_C2B(EXT4_SB(sb), partial_cluster),
2700 EXT4_SB(sb)->s_cluster_ratio, flags);
2701 partial_cluster = 0;
2702 }
2703
2689 /* TODO: flexible tree reduction should be here */ 2704 /* TODO: flexible tree reduction should be here */
2690 if (path->p_hdr->eh_entries == 0) { 2705 if (path->p_hdr->eh_entries == 0) {
2691 /* 2706 /*
@@ -3233,6 +3248,195 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3233 return ext4_mark_inode_dirty(handle, inode); 3248 return ext4_mark_inode_dirty(handle, inode);
3234} 3249}
3235 3250
3251/**
3252 * ext4_find_delalloc_range: find delayed allocated block in the given range.
3253 *
3254 * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
3255 * whether there are any buffers marked for delayed allocation. It returns '1'
3256 * on the first delalloc'ed buffer head found. If no buffer head in the given
3257 * range is marked for delalloc, it returns 0.
3258 * lblk_start should always be <= lblk_end.
3259 * search_hint_reverse is to indicate that searching in reverse from lblk_end to
3260 * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
3261 * block sooner). This is useful when blocks are truncated sequentially from
3262 * lblk_start towards lblk_end.
3263 */
3264static int ext4_find_delalloc_range(struct inode *inode,
3265 ext4_lblk_t lblk_start,
3266 ext4_lblk_t lblk_end,
3267 int search_hint_reverse)
3268{
3269 struct address_space *mapping = inode->i_mapping;
3270 struct buffer_head *head, *bh = NULL;
3271 struct page *page;
3272 ext4_lblk_t i, pg_lblk;
3273 pgoff_t index;
3274
3275 /* reverse search wont work if fs block size is less than page size */
3276 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3277 search_hint_reverse = 0;
3278
3279 if (search_hint_reverse)
3280 i = lblk_end;
3281 else
3282 i = lblk_start;
3283
3284 index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
3285
3286 while ((i >= lblk_start) && (i <= lblk_end)) {
3287 page = find_get_page(mapping, index);
3288 if (!page || !PageDirty(page))
3289 goto nextpage;
3290
3291 if (PageWriteback(page)) {
3292 /*
3293 * This might be a race with allocation and writeout. In
3294 * this case we just assume that the rest of the range
3295 * will eventually be written and there wont be any
3296 * delalloc blocks left.
3297 * TODO: the above assumption is troublesome, but might
3298 * work better in practice. other option could be note
3299 * somewhere that the cluster is getting written out and
3300 * detect that here.
3301 */
3302 page_cache_release(page);
3303 return 0;
3304 }
3305
3306 if (!page_has_buffers(page))
3307 goto nextpage;
3308
3309 head = page_buffers(page);
3310 if (!head)
3311 goto nextpage;
3312
3313 bh = head;
3314 pg_lblk = index << (PAGE_CACHE_SHIFT -
3315 inode->i_blkbits);
3316 do {
3317 if (unlikely(pg_lblk < lblk_start)) {
3318 /*
3319 * This is possible when fs block size is less
3320 * than page size and our cluster starts/ends in
3321 * middle of the page. So we need to skip the
3322 * initial few blocks till we reach the 'lblk'
3323 */
3324 pg_lblk++;
3325 continue;
3326 }
3327
3328 if (buffer_delay(bh)) {
3329 page_cache_release(page);
3330 return 1;
3331 }
3332 if (search_hint_reverse)
3333 i--;
3334 else
3335 i++;
3336 } while ((i >= lblk_start) && (i <= lblk_end) &&
3337 ((bh = bh->b_this_page) != head));
3338nextpage:
3339 if (page)
3340 page_cache_release(page);
3341 /*
3342 * Move to next page. 'i' will be the first lblk in the next
3343 * page.
3344 */
3345 if (search_hint_reverse)
3346 index--;
3347 else
3348 index++;
3349 i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
3350 }
3351
3352 return 0;
3353}
3354
3355int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
3356 int search_hint_reverse)
3357{
3358 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3359 ext4_lblk_t lblk_start, lblk_end;
3360 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
3361 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3362
3363 return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3364 search_hint_reverse);
3365}
3366
3367/**
3368 * Determines how many complete clusters (out of those specified by the 'map')
3369 * are under delalloc and were reserved quota for.
3370 * This function is called when we are writing out the blocks that were
3371 * originally written with their allocation delayed, but then the space was
3372 * allocated using fallocate() before the delayed allocation could be resolved.
3373 * The cases to look for are:
3374 * ('=' indicated delayed allocated blocks
3375 * '-' indicates non-delayed allocated blocks)
3376 * (a) partial clusters towards beginning and/or end outside of allocated range
3377 * are not delalloc'ed.
3378 * Ex:
3379 * |----c---=|====c====|====c====|===-c----|
3380 * |++++++ allocated ++++++|
3381 * ==> 4 complete clusters in above example
3382 *
3383 * (b) partial cluster (outside of allocated range) towards either end is
3384 * marked for delayed allocation. In this case, we will exclude that
3385 * cluster.
3386 * Ex:
3387 * |----====c========|========c========|
3388 * |++++++ allocated ++++++|
3389 * ==> 1 complete clusters in above example
3390 *
3391 * Ex:
3392 * |================c================|
3393 * |++++++ allocated ++++++|
3394 * ==> 0 complete clusters in above example
3395 *
3396 * The ext4_da_update_reserve_space will be called only if we
3397 * determine here that there were some "entire" clusters that span
3398 * this 'allocated' range.
3399 * In the non-bigalloc case, this function will just end up returning num_blks
3400 * without ever calling ext4_find_delalloc_range.
3401 */
3402static unsigned int
3403get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3404 unsigned int num_blks)
3405{
3406 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3407 ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
3408 ext4_lblk_t lblk_from, lblk_to, c_offset;
3409 unsigned int allocated_clusters = 0;
3410
3411 alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
3412 alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
3413
3414 /* max possible clusters for this allocation */
3415 allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
3416
3417 /* Check towards left side */
3418 c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
3419 if (c_offset) {
3420 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
3421 lblk_to = lblk_from + c_offset - 1;
3422
3423 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3424 allocated_clusters--;
3425 }
3426
3427 /* Now check towards right. */
3428 c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
3429 if (allocated_clusters && c_offset) {
3430 lblk_from = lblk_start + num_blks;
3431 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
3432
3433 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3434 allocated_clusters--;
3435 }
3436
3437 return allocated_clusters;
3438}
3439
3236static int 3440static int
3237ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3441ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3238 struct ext4_map_blocks *map, 3442 struct ext4_map_blocks *map,
@@ -3338,8 +3542,15 @@ out:
3338 * But fallocate would have already updated quota and block 3542 * But fallocate would have already updated quota and block
3339 * count for this offset. So cancel these reservation 3543 * count for this offset. So cancel these reservation
3340 */ 3544 */
3341 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3545 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3342 ext4_da_update_reserve_space(inode, allocated, 0); 3546 unsigned int reserved_clusters;
3547 reserved_clusters = get_reserved_cluster_alloc(inode,
3548 map->m_lblk, map->m_len);
3549 if (reserved_clusters)
3550 ext4_da_update_reserve_space(inode,
3551 reserved_clusters,
3552 0);
3553 }
3343 3554
3344map_out: 3555map_out:
3345 map->m_flags |= EXT4_MAP_MAPPED; 3556 map->m_flags |= EXT4_MAP_MAPPED;
@@ -3484,6 +3695,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3484 ext4_fsblk_t newblock = 0; 3695 ext4_fsblk_t newblock = 0;
3485 int free_on_err = 0, err = 0, depth, ret; 3696 int free_on_err = 0, err = 0, depth, ret;
3486 unsigned int allocated = 0, offset = 0; 3697 unsigned int allocated = 0, offset = 0;
3698 unsigned int allocated_clusters = 0, reserved_clusters = 0;
3487 unsigned int punched_out = 0; 3699 unsigned int punched_out = 0;
3488 unsigned int result = 0; 3700 unsigned int result = 0;
3489 struct ext4_allocation_request ar; 3701 struct ext4_allocation_request ar;
@@ -3499,6 +3711,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3499 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3711 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
3500 ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3712 ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3501 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3713 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3714 if ((sbi->s_cluster_ratio > 1) &&
3715 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3716 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3717
3502 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3718 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3503 /* 3719 /*
3504 * block isn't allocated yet and 3720 * block isn't allocated yet and
@@ -3509,6 +3725,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3509 /* we should allocate requested block */ 3725 /* we should allocate requested block */
3510 } else { 3726 } else {
3511 /* block is already allocated */ 3727 /* block is already allocated */
3728 if (sbi->s_cluster_ratio > 1)
3729 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3512 newblock = map->m_lblk 3730 newblock = map->m_lblk
3513 - le32_to_cpu(newex.ee_block) 3731 - le32_to_cpu(newex.ee_block)
3514 + ext4_ext_pblock(&newex); 3732 + ext4_ext_pblock(&newex);
@@ -3665,6 +3883,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3665 } 3883 }
3666 } 3884 }
3667 3885
3886 if ((sbi->s_cluster_ratio > 1) &&
3887 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3888 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3889
3668 /* 3890 /*
3669 * requested block isn't allocated yet; 3891 * requested block isn't allocated yet;
3670 * we couldn't try to create block if create flag is zero 3892 * we couldn't try to create block if create flag is zero
@@ -3681,6 +3903,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3681 /* 3903 /*
3682 * Okay, we need to do block allocation. 3904 * Okay, we need to do block allocation.
3683 */ 3905 */
3906 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
3684 newex.ee_block = cpu_to_le32(map->m_lblk); 3907 newex.ee_block = cpu_to_le32(map->m_lblk);
3685 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 3908 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3686 3909
@@ -3692,6 +3915,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3692 get_implied_cluster_alloc(sbi, map, ex, path)) { 3915 get_implied_cluster_alloc(sbi, map, ex, path)) {
3693 ar.len = allocated = map->m_len; 3916 ar.len = allocated = map->m_len;
3694 newblock = map->m_pblk; 3917 newblock = map->m_pblk;
3918 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3695 goto got_allocated_blocks; 3919 goto got_allocated_blocks;
3696 } 3920 }
3697 3921
@@ -3712,6 +3936,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3712 get_implied_cluster_alloc(sbi, map, ex2, path)) { 3936 get_implied_cluster_alloc(sbi, map, ex2, path)) {
3713 ar.len = allocated = map->m_len; 3937 ar.len = allocated = map->m_len;
3714 newblock = map->m_pblk; 3938 newblock = map->m_pblk;
3939 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3715 goto got_allocated_blocks; 3940 goto got_allocated_blocks;
3716 } 3941 }
3717 3942
@@ -3765,6 +3990,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3765 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 3990 ext_debug("allocate new block: goal %llu, found %llu/%u\n",
3766 ar.goal, newblock, allocated); 3991 ar.goal, newblock, allocated);
3767 free_on_err = 1; 3992 free_on_err = 1;
3993 allocated_clusters = ar.len;
3768 ar.len = EXT4_C2B(sbi, ar.len) - offset; 3994 ar.len = EXT4_C2B(sbi, ar.len) - offset;
3769 if (ar.len > allocated) 3995 if (ar.len > allocated)
3770 ar.len = allocated; 3996 ar.len = allocated;
@@ -3822,8 +4048,80 @@ got_allocated_blocks:
3822 * Update reserved blocks/metadata blocks after successful 4048 * Update reserved blocks/metadata blocks after successful
3823 * block allocation which had been deferred till now. 4049 * block allocation which had been deferred till now.
3824 */ 4050 */
3825 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 4051 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3826 ext4_da_update_reserve_space(inode, allocated, 1); 4052 /*
4053 * Check how many clusters we had reserved this allocted range.
4054 */
4055 reserved_clusters = get_reserved_cluster_alloc(inode,
4056 map->m_lblk, allocated);
4057 if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
4058 if (reserved_clusters) {
4059 /*
4060 * We have clusters reserved for this range.
4061 * But since we are not doing actual allocation
4062 * and are simply using blocks from previously
4063 * allocated cluster, we should release the
4064 * reservation and not claim quota.
4065 */
4066 ext4_da_update_reserve_space(inode,
4067 reserved_clusters, 0);
4068 }
4069 } else {
4070 BUG_ON(allocated_clusters < reserved_clusters);
4071 /* We will claim quota for all newly allocated blocks.*/
4072 ext4_da_update_reserve_space(inode, allocated_clusters,
4073 1);
4074 if (reserved_clusters < allocated_clusters) {
4075 int reservation = allocated_clusters -
4076 reserved_clusters;
4077 /*
4078 * It seems we claimed few clusters outside of
4079 * the range of this allocation. We should give
4080 * it back to the reservation pool. This can
4081 * happen in the following case:
4082 *
4083 * * Suppose s_cluster_ratio is 4 (i.e., each
4084 * cluster has 4 blocks. Thus, the clusters
4085 * are [0-3],[4-7],[8-11]...
4086 * * First comes delayed allocation write for
4087 * logical blocks 10 & 11. Since there were no
4088 * previous delayed allocated blocks in the
4089 * range [8-11], we would reserve 1 cluster
4090 * for this write.
4091 * * Next comes write for logical blocks 3 to 8.
4092 * In this case, we will reserve 2 clusters
4093 * (for [0-3] and [4-7]; and not for [8-11] as
4094 * that range has a delayed allocated blocks.
4095 * Thus total reserved clusters now becomes 3.
4096 * * Now, during the delayed allocation writeout
4097 * time, we will first write blocks [3-8] and
4098 * allocate 3 clusters for writing these
4099 * blocks. Also, we would claim all these
4100 * three clusters above.
4101 * * Now when we come here to writeout the
4102 * blocks [10-11], we would expect to claim
4103 * the reservation of 1 cluster we had made
4104 * (and we would claim it since there are no
4105 * more delayed allocated blocks in the range
4106 * [8-11]. But our reserved cluster count had
4107 * already gone to 0.
4108 *
4109 * Thus, at the step 4 above when we determine
4110 * that there are still some unwritten delayed
4111 * allocated blocks outside of our current
4112 * block range, we should increment the
4113 * reserved clusters count so that when the
4114 * remaining blocks finally gets written, we
4115 * could claim them.
4116 */
4117 while (reservation) {
4118 ext4_da_reserve_space(inode,
4119 map->m_lblk);
4120 reservation--;
4121 }
4122 }
4123 }
4124 }
3827 4125
3828 /* 4126 /*
3829 * Cache the extent and update transaction to commit on fdatasync only 4127 * Cache the extent and update transaction to commit on fdatasync only
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 40f51aae42fe..d1c17e47c1c6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -300,14 +300,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
300 300
301 /* Update quota subsystem for data blocks */ 301 /* Update quota subsystem for data blocks */
302 if (quota_claim) 302 if (quota_claim)
303 dquot_claim_block(inode, used); 303 dquot_claim_block(inode, EXT4_C2B(sbi, used));
304 else { 304 else {
305 /* 305 /*
306 * We did fallocate with an offset that is already delayed 306 * We did fallocate with an offset that is already delayed
307 * allocated. So on delayed allocated writeback we should 307 * allocated. So on delayed allocated writeback we should
308 * not re-claim the quota for fallocated blocks. 308 * not re-claim the quota for fallocated blocks.
309 */ 309 */
310 dquot_release_reservation_block(inode, used); 310 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
311 } 311 }
312 312
313 /* 313 /*
@@ -1037,14 +1037,14 @@ static int ext4_journalled_write_end(struct file *file,
1037} 1037}
1038 1038
1039/* 1039/*
1040 * Reserve a single block located at lblock 1040 * Reserve a single cluster located at lblock
1041 */ 1041 */
1042static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1042int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1043{ 1043{
1044 int retries = 0; 1044 int retries = 0;
1045 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1045 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1046 struct ext4_inode_info *ei = EXT4_I(inode); 1046 struct ext4_inode_info *ei = EXT4_I(inode);
1047 unsigned long md_needed; 1047 unsigned int md_needed;
1048 int ret; 1048 int ret;
1049 1049
1050 /* 1050 /*
@@ -1054,7 +1054,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1054 */ 1054 */
1055repeat: 1055repeat:
1056 spin_lock(&ei->i_block_reservation_lock); 1056 spin_lock(&ei->i_block_reservation_lock);
1057 md_needed = ext4_calc_metadata_amount(inode, lblock); 1057 md_needed = EXT4_NUM_B2C(sbi,
1058 ext4_calc_metadata_amount(inode, lblock));
1058 trace_ext4_da_reserve_space(inode, md_needed); 1059 trace_ext4_da_reserve_space(inode, md_needed);
1059 spin_unlock(&ei->i_block_reservation_lock); 1060 spin_unlock(&ei->i_block_reservation_lock);
1060 1061
@@ -1063,7 +1064,7 @@ repeat:
1063 * us from metadata over-estimation, though we may go over by 1064 * us from metadata over-estimation, though we may go over by
1064 * a small amount in the end. Here we just reserve for data. 1065 * a small amount in the end. Here we just reserve for data.
1065 */ 1066 */
1066 ret = dquot_reserve_block(inode, 1); 1067 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1067 if (ret) 1068 if (ret)
1068 return ret; 1069 return ret;
1069 /* 1070 /*
@@ -1071,7 +1072,7 @@ repeat:
1071 * we cannot afford to run out of free blocks. 1072 * we cannot afford to run out of free blocks.
1072 */ 1073 */
1073 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { 1074 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
1074 dquot_release_reservation_block(inode, 1); 1075 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1075 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1076 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1076 yield(); 1077 yield();
1077 goto repeat; 1078 goto repeat;
@@ -1118,6 +1119,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1118 * We can release all of the reserved metadata blocks 1119 * We can release all of the reserved metadata blocks
1119 * only when we have written all of the delayed 1120 * only when we have written all of the delayed
1120 * allocation blocks. 1121 * allocation blocks.
1122 * Note that in case of bigalloc, i_reserved_meta_blocks,
1123 * i_reserved_data_blocks, etc. refer to number of clusters.
1121 */ 1124 */
1122 percpu_counter_sub(&sbi->s_dirtyclusters_counter, 1125 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1123 ei->i_reserved_meta_blocks); 1126 ei->i_reserved_meta_blocks);
@@ -1130,7 +1133,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1130 1133
1131 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1134 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1132 1135
1133 dquot_release_reservation_block(inode, to_free); 1136 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1134} 1137}
1135 1138
1136static void ext4_da_page_release_reservation(struct page *page, 1139static void ext4_da_page_release_reservation(struct page *page,
@@ -1139,6 +1142,9 @@ static void ext4_da_page_release_reservation(struct page *page,
1139 int to_release = 0; 1142 int to_release = 0;
1140 struct buffer_head *head, *bh; 1143 struct buffer_head *head, *bh;
1141 unsigned int curr_off = 0; 1144 unsigned int curr_off = 0;
1145 struct inode *inode = page->mapping->host;
1146 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1147 int num_clusters;
1142 1148
1143 head = page_buffers(page); 1149 head = page_buffers(page);
1144 bh = head; 1150 bh = head;
@@ -1151,7 +1157,20 @@ static void ext4_da_page_release_reservation(struct page *page,
1151 } 1157 }
1152 curr_off = next_off; 1158 curr_off = next_off;
1153 } while ((bh = bh->b_this_page) != head); 1159 } while ((bh = bh->b_this_page) != head);
1154 ext4_da_release_space(page->mapping->host, to_release); 1160
1161 /* If we have released all the blocks belonging to a cluster, then we
1162 * need to release the reserved space for that cluster. */
1163 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1164 while (num_clusters > 0) {
1165 ext4_fsblk_t lblk;
1166 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1167 ((num_clusters - 1) << sbi->s_cluster_bits);
1168 if (sbi->s_cluster_ratio == 1 ||
1169 !ext4_find_delalloc_cluster(inode, lblk, 1))
1170 ext4_da_release_space(inode, 1);
1171
1172 num_clusters--;
1173 }
1155} 1174}
1156 1175
1157/* 1176/*
@@ -1352,7 +1371,8 @@ static void ext4_print_free_blocks(struct inode *inode)
1352 (long long) EXT4_C2B(EXT4_SB(inode->i_sb), 1371 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1353 percpu_counter_sum(&sbi->s_freeclusters_counter))); 1372 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1354 printk(KERN_CRIT "dirty_blocks=%lld\n", 1373 printk(KERN_CRIT "dirty_blocks=%lld\n",
1355 (long long) percpu_counter_sum(&sbi->s_dirtyclusters_counter)); 1374 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1375 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1356 printk(KERN_CRIT "Block reservation details\n"); 1376 printk(KERN_CRIT "Block reservation details\n");
1357 printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1377 printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
1358 EXT4_I(inode)->i_reserved_data_blocks); 1378 EXT4_I(inode)->i_reserved_data_blocks);
@@ -1626,10 +1646,14 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1626 /* 1646 /*
1627 * XXX: __block_write_begin() unmaps passed block, is it OK? 1647 * XXX: __block_write_begin() unmaps passed block, is it OK?
1628 */ 1648 */
1629 ret = ext4_da_reserve_space(inode, iblock); 1649 /* If the block was allocated from previously allocated cluster,
1630 if (ret) 1650 * then we dont need to reserve it again. */
1631 /* not enough space to reserve */ 1651 if (!(map.m_flags & EXT4_MAP_FROM_CLUSTER)) {
1632 return ret; 1652 ret = ext4_da_reserve_space(inode, iblock);
1653 if (ret)
1654 /* not enough space to reserve */
1655 return ret;
1656 }
1633 1657
1634 map_bh(bh, inode->i_sb, invalid_block); 1658 map_bh(bh, inode->i_sb, invalid_block);
1635 set_buffer_new(bh); 1659 set_buffer_new(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 63dd56703342..5e1215d38331 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4718,6 +4718,9 @@ do_more:
4718 4718
4719 freed += count; 4719 freed += count;
4720 4720
4721 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4722 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4723
4721 /* We dirtied the bitmap block */ 4724 /* We dirtied the bitmap block */
4722 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4725 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4723 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4726 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4736,8 +4739,6 @@ do_more:
4736 } 4739 }
4737 ext4_mark_super_dirty(sb); 4740 ext4_mark_super_dirty(sb);
4738error_return: 4741error_return:
4739 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4740 dquot_free_block(inode, freed);
4741 brelse(bitmap_bh); 4742 brelse(bitmap_bh);
4742 ext4_std_error(sb, err); 4743 ext4_std_error(sb, err);
4743 return; 4744 return;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6810957e0ac7..66b8cfa15636 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2473,7 +2473,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2473 char *buf) 2473 char *buf)
2474{ 2474{
2475 return snprintf(buf, PAGE_SIZE, "%llu\n", 2475 return snprintf(buf, PAGE_SIZE, "%llu\n",
2476 (s64) percpu_counter_sum(&sbi->s_dirtyclusters_counter)); 2476 (s64) EXT4_C2B(sbi,
2477 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2477} 2478}
2478 2479
2479static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2480static ssize_t session_write_kbytes_show(struct ext4_attr *a,