diff options
author | Aditya Kali <adityakali@google.com> | 2011-09-09 19:04:51 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-09-09 19:04:51 -0400 |
commit | 7b415bf60f6afb0499fd3dc0ee33444f54e28567 (patch) | |
tree | 9c64fef2b8d60ce64865af6e4c2cc6008026e28c | |
parent | 27baebb849d46d901e756e6502b0a65a62e43771 (diff) |
ext4: Fix bigalloc quota accounting and i_blocks value
With bigalloc changes, the i_blocks value was not correctly set (it was still
set to number of blocks being used, but in case of bigalloc, we want i_blocks
to represent the number of clusters being used). Since the quota subsystem sets
the i_blocks value, this patch fixes the quota accounting and makes sure that
the i_blocks value is set correctly.
Signed-off-by: Aditya Kali <adityakali@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/balloc.c | 5 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 16 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 2 | ||||
-rw-r--r-- | fs/ext4/extents.c | 306 | ||||
-rw-r--r-- | fs/ext4/inode.c | 54 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 5 | ||||
-rw-r--r-- | fs/ext4/super.c | 3 |
7 files changed, 366 insertions, 25 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9080a857cda9..bf42b3219e3c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -485,7 +485,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
485 | * @handle: handle to this transaction | 485 | * @handle: handle to this transaction |
486 | * @inode: file inode | 486 | * @inode: file inode |
487 | * @goal: given target block(filesystem wide) | 487 | * @goal: given target block(filesystem wide) |
488 | * @count: pointer to total number of blocks needed | 488 | * @count: pointer to total number of clusters needed |
489 | * @errp: error code | 489 | * @errp: error code |
490 | * | 490 | * |
491 | * Return 1st allocated block number on success, *count stores total account | 491 | * Return 1st allocated block number on success, *count stores total account |
@@ -517,7 +517,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
517 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 517 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
518 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; | 518 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; |
519 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 519 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
520 | dquot_alloc_block_nofail(inode, ar.len); | 520 | dquot_alloc_block_nofail(inode, |
521 | EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); | ||
521 | } | 522 | } |
522 | return ret; | 523 | return ret; |
523 | } | 524 | } |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d2584224c89a..a6307f7c9807 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -144,9 +144,17 @@ struct ext4_allocation_request { | |||
144 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | 144 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) |
145 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | 145 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) |
146 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) | 146 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) |
147 | /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of | ||
148 | * ext4_map_blocks wants to know whether or not the underlying cluster has | ||
149 | * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that | ||
150 | * the requested mapping was from previously mapped (or delayed allocated) | ||
151 | * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster | ||
152 | * should never appear on buffer_head's state flags. | ||
153 | */ | ||
154 | #define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster) | ||
147 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | 155 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ |
148 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ | 156 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ |
149 | EXT4_MAP_UNINIT) | 157 | EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER) |
150 | 158 | ||
151 | struct ext4_map_blocks { | 159 | struct ext4_map_blocks { |
152 | ext4_fsblk_t m_pblk; | 160 | ext4_fsblk_t m_pblk; |
@@ -1884,6 +1892,7 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |||
1884 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1892 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1885 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1893 | extern void ext4_da_update_reserve_space(struct inode *inode, |
1886 | int used, int quota_claim); | 1894 | int used, int quota_claim); |
1895 | extern int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock); | ||
1887 | 1896 | ||
1888 | /* indirect.c */ | 1897 | /* indirect.c */ |
1889 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | 1898 | extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
@@ -2284,6 +2293,11 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | |||
2284 | enum ext4_state_bits { | 2293 | enum ext4_state_bits { |
2285 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2294 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
2286 | = BH_JBDPrivateStart, | 2295 | = BH_JBDPrivateStart, |
2296 | BH_AllocFromCluster, /* allocated blocks were part of already | ||
2297 | * allocated cluster. Note that this flag will | ||
2298 | * never, ever appear in a buffer_head's state | ||
2299 | * flag. See EXT4_MAP_FROM_CLUSTER to see where | ||
2300 | * this is used. */ | ||
2287 | }; | 2301 | }; |
2288 | 2302 | ||
2289 | BUFFER_FNS(Uninit, uninit) | 2303 | BUFFER_FNS(Uninit, uninit) |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 095c36f3b612..a52db3a69a30 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | |||
290 | struct ext4_ext_path *); | 290 | struct ext4_ext_path *); |
291 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); | 291 | extern void ext4_ext_drop_refs(struct ext4_ext_path *); |
292 | extern int ext4_ext_check_inode(struct inode *inode); | 292 | extern int ext4_ext_check_inode(struct inode *inode); |
293 | extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, | ||
294 | int search_hint_reverse); | ||
293 | #endif /* _EXT4_EXTENTS */ | 295 | #endif /* _EXT4_EXTENTS */ |
294 | 296 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index cd4479c08031..c4e005864534 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2686,6 +2686,21 @@ again: | |||
2686 | } | 2686 | } |
2687 | } | 2687 | } |
2688 | 2688 | ||
2689 | /* If we still have something in the partial cluster and we have removed | ||
2690 | * even the first extent, then we should free the blocks in the partial | ||
2691 | * cluster as well. */ | ||
2692 | if (partial_cluster && path->p_hdr->eh_entries == 0) { | ||
2693 | int flags = EXT4_FREE_BLOCKS_FORGET; | ||
2694 | |||
2695 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
2696 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
2697 | |||
2698 | ext4_free_blocks(handle, inode, NULL, | ||
2699 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | ||
2700 | EXT4_SB(sb)->s_cluster_ratio, flags); | ||
2701 | partial_cluster = 0; | ||
2702 | } | ||
2703 | |||
2689 | /* TODO: flexible tree reduction should be here */ | 2704 | /* TODO: flexible tree reduction should be here */ |
2690 | if (path->p_hdr->eh_entries == 0) { | 2705 | if (path->p_hdr->eh_entries == 0) { |
2691 | /* | 2706 | /* |
@@ -3233,6 +3248,195 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3233 | return ext4_mark_inode_dirty(handle, inode); | 3248 | return ext4_mark_inode_dirty(handle, inode); |
3234 | } | 3249 | } |
3235 | 3250 | ||
3251 | /** | ||
3252 | * ext4_find_delalloc_range: find delayed allocated block in the given range. | ||
3253 | * | ||
3254 | * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns | ||
3255 | * whether there are any buffers marked for delayed allocation. It returns '1' | ||
3256 | * on the first delalloc'ed buffer head found. If no buffer head in the given | ||
3257 | * range is marked for delalloc, it returns 0. | ||
3258 | * lblk_start should always be <= lblk_end. | ||
3259 | * search_hint_reverse is to indicate that searching in reverse from lblk_end to | ||
3260 | * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed | ||
3261 | * block sooner). This is useful when blocks are truncated sequentially from | ||
3262 | * lblk_start towards lblk_end. | ||
3263 | */ | ||
3264 | static int ext4_find_delalloc_range(struct inode *inode, | ||
3265 | ext4_lblk_t lblk_start, | ||
3266 | ext4_lblk_t lblk_end, | ||
3267 | int search_hint_reverse) | ||
3268 | { | ||
3269 | struct address_space *mapping = inode->i_mapping; | ||
3270 | struct buffer_head *head, *bh = NULL; | ||
3271 | struct page *page; | ||
3272 | ext4_lblk_t i, pg_lblk; | ||
3273 | pgoff_t index; | ||
3274 | |||
3275 | /* reverse search wont work if fs block size is less than page size */ | ||
3276 | if (inode->i_blkbits < PAGE_CACHE_SHIFT) | ||
3277 | search_hint_reverse = 0; | ||
3278 | |||
3279 | if (search_hint_reverse) | ||
3280 | i = lblk_end; | ||
3281 | else | ||
3282 | i = lblk_start; | ||
3283 | |||
3284 | index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
3285 | |||
3286 | while ((i >= lblk_start) && (i <= lblk_end)) { | ||
3287 | page = find_get_page(mapping, index); | ||
3288 | if (!page || !PageDirty(page)) | ||
3289 | goto nextpage; | ||
3290 | |||
3291 | if (PageWriteback(page)) { | ||
3292 | /* | ||
3293 | * This might be a race with allocation and writeout. In | ||
3294 | * this case we just assume that the rest of the range | ||
3295 | * will eventually be written and there wont be any | ||
3296 | * delalloc blocks left. | ||
3297 | * TODO: the above assumption is troublesome, but might | ||
3298 | * work better in practice. other option could be note | ||
3299 | * somewhere that the cluster is getting written out and | ||
3300 | * detect that here. | ||
3301 | */ | ||
3302 | page_cache_release(page); | ||
3303 | return 0; | ||
3304 | } | ||
3305 | |||
3306 | if (!page_has_buffers(page)) | ||
3307 | goto nextpage; | ||
3308 | |||
3309 | head = page_buffers(page); | ||
3310 | if (!head) | ||
3311 | goto nextpage; | ||
3312 | |||
3313 | bh = head; | ||
3314 | pg_lblk = index << (PAGE_CACHE_SHIFT - | ||
3315 | inode->i_blkbits); | ||
3316 | do { | ||
3317 | if (unlikely(pg_lblk < lblk_start)) { | ||
3318 | /* | ||
3319 | * This is possible when fs block size is less | ||
3320 | * than page size and our cluster starts/ends in | ||
3321 | * middle of the page. So we need to skip the | ||
3322 | * initial few blocks till we reach the 'lblk' | ||
3323 | */ | ||
3324 | pg_lblk++; | ||
3325 | continue; | ||
3326 | } | ||
3327 | |||
3328 | if (buffer_delay(bh)) { | ||
3329 | page_cache_release(page); | ||
3330 | return 1; | ||
3331 | } | ||
3332 | if (search_hint_reverse) | ||
3333 | i--; | ||
3334 | else | ||
3335 | i++; | ||
3336 | } while ((i >= lblk_start) && (i <= lblk_end) && | ||
3337 | ((bh = bh->b_this_page) != head)); | ||
3338 | nextpage: | ||
3339 | if (page) | ||
3340 | page_cache_release(page); | ||
3341 | /* | ||
3342 | * Move to next page. 'i' will be the first lblk in the next | ||
3343 | * page. | ||
3344 | */ | ||
3345 | if (search_hint_reverse) | ||
3346 | index--; | ||
3347 | else | ||
3348 | index++; | ||
3349 | i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
3350 | } | ||
3351 | |||
3352 | return 0; | ||
3353 | } | ||
3354 | |||
3355 | int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, | ||
3356 | int search_hint_reverse) | ||
3357 | { | ||
3358 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3359 | ext4_lblk_t lblk_start, lblk_end; | ||
3360 | lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); | ||
3361 | lblk_end = lblk_start + sbi->s_cluster_ratio - 1; | ||
3362 | |||
3363 | return ext4_find_delalloc_range(inode, lblk_start, lblk_end, | ||
3364 | search_hint_reverse); | ||
3365 | } | ||
3366 | |||
3367 | /** | ||
3368 | * Determines how many complete clusters (out of those specified by the 'map') | ||
3369 | * are under delalloc and were reserved quota for. | ||
3370 | * This function is called when we are writing out the blocks that were | ||
3371 | * originally written with their allocation delayed, but then the space was | ||
3372 | * allocated using fallocate() before the delayed allocation could be resolved. | ||
3373 | * The cases to look for are: | ||
3374 | * ('=' indicated delayed allocated blocks | ||
3375 | * '-' indicates non-delayed allocated blocks) | ||
3376 | * (a) partial clusters towards beginning and/or end outside of allocated range | ||
3377 | * are not delalloc'ed. | ||
3378 | * Ex: | ||
3379 | * |----c---=|====c====|====c====|===-c----| | ||
3380 | * |++++++ allocated ++++++| | ||
3381 | * ==> 4 complete clusters in above example | ||
3382 | * | ||
3383 | * (b) partial cluster (outside of allocated range) towards either end is | ||
3384 | * marked for delayed allocation. In this case, we will exclude that | ||
3385 | * cluster. | ||
3386 | * Ex: | ||
3387 | * |----====c========|========c========| | ||
3388 | * |++++++ allocated ++++++| | ||
3389 | * ==> 1 complete clusters in above example | ||
3390 | * | ||
3391 | * Ex: | ||
3392 | * |================c================| | ||
3393 | * |++++++ allocated ++++++| | ||
3394 | * ==> 0 complete clusters in above example | ||
3395 | * | ||
3396 | * The ext4_da_update_reserve_space will be called only if we | ||
3397 | * determine here that there were some "entire" clusters that span | ||
3398 | * this 'allocated' range. | ||
3399 | * In the non-bigalloc case, this function will just end up returning num_blks | ||
3400 | * without ever calling ext4_find_delalloc_range. | ||
3401 | */ | ||
3402 | static unsigned int | ||
3403 | get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | ||
3404 | unsigned int num_blks) | ||
3405 | { | ||
3406 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3407 | ext4_lblk_t alloc_cluster_start, alloc_cluster_end; | ||
3408 | ext4_lblk_t lblk_from, lblk_to, c_offset; | ||
3409 | unsigned int allocated_clusters = 0; | ||
3410 | |||
3411 | alloc_cluster_start = EXT4_B2C(sbi, lblk_start); | ||
3412 | alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1); | ||
3413 | |||
3414 | /* max possible clusters for this allocation */ | ||
3415 | allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1; | ||
3416 | |||
3417 | /* Check towards left side */ | ||
3418 | c_offset = lblk_start & (sbi->s_cluster_ratio - 1); | ||
3419 | if (c_offset) { | ||
3420 | lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); | ||
3421 | lblk_to = lblk_from + c_offset - 1; | ||
3422 | |||
3423 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) | ||
3424 | allocated_clusters--; | ||
3425 | } | ||
3426 | |||
3427 | /* Now check towards right. */ | ||
3428 | c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); | ||
3429 | if (allocated_clusters && c_offset) { | ||
3430 | lblk_from = lblk_start + num_blks; | ||
3431 | lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; | ||
3432 | |||
3433 | if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) | ||
3434 | allocated_clusters--; | ||
3435 | } | ||
3436 | |||
3437 | return allocated_clusters; | ||
3438 | } | ||
3439 | |||
3236 | static int | 3440 | static int |
3237 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3441 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3238 | struct ext4_map_blocks *map, | 3442 | struct ext4_map_blocks *map, |
@@ -3338,8 +3542,15 @@ out: | |||
3338 | * But fallocate would have already updated quota and block | 3542 | * But fallocate would have already updated quota and block |
3339 | * count for this offset. So cancel these reservation | 3543 | * count for this offset. So cancel these reservation |
3340 | */ | 3544 | */ |
3341 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 3545 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
3342 | ext4_da_update_reserve_space(inode, allocated, 0); | 3546 | unsigned int reserved_clusters; |
3547 | reserved_clusters = get_reserved_cluster_alloc(inode, | ||
3548 | map->m_lblk, map->m_len); | ||
3549 | if (reserved_clusters) | ||
3550 | ext4_da_update_reserve_space(inode, | ||
3551 | reserved_clusters, | ||
3552 | 0); | ||
3553 | } | ||
3343 | 3554 | ||
3344 | map_out: | 3555 | map_out: |
3345 | map->m_flags |= EXT4_MAP_MAPPED; | 3556 | map->m_flags |= EXT4_MAP_MAPPED; |
@@ -3484,6 +3695,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3484 | ext4_fsblk_t newblock = 0; | 3695 | ext4_fsblk_t newblock = 0; |
3485 | int free_on_err = 0, err = 0, depth, ret; | 3696 | int free_on_err = 0, err = 0, depth, ret; |
3486 | unsigned int allocated = 0, offset = 0; | 3697 | unsigned int allocated = 0, offset = 0; |
3698 | unsigned int allocated_clusters = 0, reserved_clusters = 0; | ||
3487 | unsigned int punched_out = 0; | 3699 | unsigned int punched_out = 0; |
3488 | unsigned int result = 0; | 3700 | unsigned int result = 0; |
3489 | struct ext4_allocation_request ar; | 3701 | struct ext4_allocation_request ar; |
@@ -3499,6 +3711,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3499 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && | 3711 | if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && |
3500 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | 3712 | ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
3501 | if (!newex.ee_start_lo && !newex.ee_start_hi) { | 3713 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
3714 | if ((sbi->s_cluster_ratio > 1) && | ||
3715 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | ||
3716 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3717 | |||
3502 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3718 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
3503 | /* | 3719 | /* |
3504 | * block isn't allocated yet and | 3720 | * block isn't allocated yet and |
@@ -3509,6 +3725,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3509 | /* we should allocate requested block */ | 3725 | /* we should allocate requested block */ |
3510 | } else { | 3726 | } else { |
3511 | /* block is already allocated */ | 3727 | /* block is already allocated */ |
3728 | if (sbi->s_cluster_ratio > 1) | ||
3729 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3512 | newblock = map->m_lblk | 3730 | newblock = map->m_lblk |
3513 | - le32_to_cpu(newex.ee_block) | 3731 | - le32_to_cpu(newex.ee_block) |
3514 | + ext4_ext_pblock(&newex); | 3732 | + ext4_ext_pblock(&newex); |
@@ -3665,6 +3883,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3665 | } | 3883 | } |
3666 | } | 3884 | } |
3667 | 3885 | ||
3886 | if ((sbi->s_cluster_ratio > 1) && | ||
3887 | ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) | ||
3888 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3889 | |||
3668 | /* | 3890 | /* |
3669 | * requested block isn't allocated yet; | 3891 | * requested block isn't allocated yet; |
3670 | * we couldn't try to create block if create flag is zero | 3892 | * we couldn't try to create block if create flag is zero |
@@ -3681,6 +3903,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3681 | /* | 3903 | /* |
3682 | * Okay, we need to do block allocation. | 3904 | * Okay, we need to do block allocation. |
3683 | */ | 3905 | */ |
3906 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
3684 | newex.ee_block = cpu_to_le32(map->m_lblk); | 3907 | newex.ee_block = cpu_to_le32(map->m_lblk); |
3685 | cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | 3908 | cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); |
3686 | 3909 | ||
@@ -3692,6 +3915,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3692 | get_implied_cluster_alloc(sbi, map, ex, path)) { | 3915 | get_implied_cluster_alloc(sbi, map, ex, path)) { |
3693 | ar.len = allocated = map->m_len; | 3916 | ar.len = allocated = map->m_len; |
3694 | newblock = map->m_pblk; | 3917 | newblock = map->m_pblk; |
3918 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3695 | goto got_allocated_blocks; | 3919 | goto got_allocated_blocks; |
3696 | } | 3920 | } |
3697 | 3921 | ||
@@ -3712,6 +3936,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3712 | get_implied_cluster_alloc(sbi, map, ex2, path)) { | 3936 | get_implied_cluster_alloc(sbi, map, ex2, path)) { |
3713 | ar.len = allocated = map->m_len; | 3937 | ar.len = allocated = map->m_len; |
3714 | newblock = map->m_pblk; | 3938 | newblock = map->m_pblk; |
3939 | map->m_flags |= EXT4_MAP_FROM_CLUSTER; | ||
3715 | goto got_allocated_blocks; | 3940 | goto got_allocated_blocks; |
3716 | } | 3941 | } |
3717 | 3942 | ||
@@ -3765,6 +3990,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3765 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", | 3990 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
3766 | ar.goal, newblock, allocated); | 3991 | ar.goal, newblock, allocated); |
3767 | free_on_err = 1; | 3992 | free_on_err = 1; |
3993 | allocated_clusters = ar.len; | ||
3768 | ar.len = EXT4_C2B(sbi, ar.len) - offset; | 3994 | ar.len = EXT4_C2B(sbi, ar.len) - offset; |
3769 | if (ar.len > allocated) | 3995 | if (ar.len > allocated) |
3770 | ar.len = allocated; | 3996 | ar.len = allocated; |
@@ -3822,8 +4048,80 @@ got_allocated_blocks: | |||
3822 | * Update reserved blocks/metadata blocks after successful | 4048 | * Update reserved blocks/metadata blocks after successful |
3823 | * block allocation which had been deferred till now. | 4049 | * block allocation which had been deferred till now. |
3824 | */ | 4050 | */ |
3825 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 4051 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
3826 | ext4_da_update_reserve_space(inode, allocated, 1); | 4052 | /* |
4053 | * Check how many clusters we had reserved this allocted range. | ||
4054 | */ | ||
4055 | reserved_clusters = get_reserved_cluster_alloc(inode, | ||
4056 | map->m_lblk, allocated); | ||
4057 | if (map->m_flags & EXT4_MAP_FROM_CLUSTER) { | ||
4058 | if (reserved_clusters) { | ||
4059 | /* | ||
4060 | * We have clusters reserved for this range. | ||
4061 | * But since we are not doing actual allocation | ||
4062 | * and are simply using blocks from previously | ||
4063 | * allocated cluster, we should release the | ||
4064 | * reservation and not claim quota. | ||
4065 | */ | ||
4066 | ext4_da_update_reserve_space(inode, | ||
4067 | reserved_clusters, 0); | ||
4068 | } | ||
4069 | } else { | ||
4070 | BUG_ON(allocated_clusters < reserved_clusters); | ||
4071 | /* We will claim quota for all newly allocated blocks.*/ | ||
4072 | ext4_da_update_reserve_space(inode, allocated_clusters, | ||
4073 | 1); | ||
4074 | if (reserved_clusters < allocated_clusters) { | ||
4075 | int reservation = allocated_clusters - | ||
4076 | reserved_clusters; | ||
4077 | /* | ||
4078 | * It seems we claimed few clusters outside of | ||
4079 | * the range of this allocation. We should give | ||
4080 | * it back to the reservation pool. This can | ||
4081 | * happen in the following case: | ||
4082 | * | ||
4083 | * * Suppose s_cluster_ratio is 4 (i.e., each | ||
4084 | * cluster has 4 blocks. Thus, the clusters | ||
4085 | * are [0-3],[4-7],[8-11]... | ||
4086 | * * First comes delayed allocation write for | ||
4087 | * logical blocks 10 & 11. Since there were no | ||
4088 | * previous delayed allocated blocks in the | ||
4089 | * range [8-11], we would reserve 1 cluster | ||
4090 | * for this write. | ||
4091 | * * Next comes write for logical blocks 3 to 8. | ||
4092 | * In this case, we will reserve 2 clusters | ||
4093 | * (for [0-3] and [4-7]; and not for [8-11] as | ||
4094 | * that range has a delayed allocated blocks. | ||
4095 | * Thus total reserved clusters now becomes 3. | ||
4096 | * * Now, during the delayed allocation writeout | ||
4097 | * time, we will first write blocks [3-8] and | ||
4098 | * allocate 3 clusters for writing these | ||
4099 | * blocks. Also, we would claim all these | ||
4100 | * three clusters above. | ||
4101 | * * Now when we come here to writeout the | ||
4102 | * blocks [10-11], we would expect to claim | ||
4103 | * the reservation of 1 cluster we had made | ||
4104 | * (and we would claim it since there are no | ||
4105 | * more delayed allocated blocks in the range | ||
4106 | * [8-11]. But our reserved cluster count had | ||
4107 | * already gone to 0. | ||
4108 | * | ||
4109 | * Thus, at the step 4 above when we determine | ||
4110 | * that there are still some unwritten delayed | ||
4111 | * allocated blocks outside of our current | ||
4112 | * block range, we should increment the | ||
4113 | * reserved clusters count so that when the | ||
4114 | * remaining blocks finally gets written, we | ||
4115 | * could claim them. | ||
4116 | */ | ||
4117 | while (reservation) { | ||
4118 | ext4_da_reserve_space(inode, | ||
4119 | map->m_lblk); | ||
4120 | reservation--; | ||
4121 | } | ||
4122 | } | ||
4123 | } | ||
4124 | } | ||
3827 | 4125 | ||
3828 | /* | 4126 | /* |
3829 | * Cache the extent and update transaction to commit on fdatasync only | 4127 | * Cache the extent and update transaction to commit on fdatasync only |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 40f51aae42fe..d1c17e47c1c6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -300,14 +300,14 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
300 | 300 | ||
301 | /* Update quota subsystem for data blocks */ | 301 | /* Update quota subsystem for data blocks */ |
302 | if (quota_claim) | 302 | if (quota_claim) |
303 | dquot_claim_block(inode, used); | 303 | dquot_claim_block(inode, EXT4_C2B(sbi, used)); |
304 | else { | 304 | else { |
305 | /* | 305 | /* |
306 | * We did fallocate with an offset that is already delayed | 306 | * We did fallocate with an offset that is already delayed |
307 | * allocated. So on delayed allocated writeback we should | 307 | * allocated. So on delayed allocated writeback we should |
308 | * not re-claim the quota for fallocated blocks. | 308 | * not re-claim the quota for fallocated blocks. |
309 | */ | 309 | */ |
310 | dquot_release_reservation_block(inode, used); | 310 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); |
311 | } | 311 | } |
312 | 312 | ||
313 | /* | 313 | /* |
@@ -1037,14 +1037,14 @@ static int ext4_journalled_write_end(struct file *file, | |||
1037 | } | 1037 | } |
1038 | 1038 | ||
1039 | /* | 1039 | /* |
1040 | * Reserve a single block located at lblock | 1040 | * Reserve a single cluster located at lblock |
1041 | */ | 1041 | */ |
1042 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1042 | int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1043 | { | 1043 | { |
1044 | int retries = 0; | 1044 | int retries = 0; |
1045 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1045 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1046 | struct ext4_inode_info *ei = EXT4_I(inode); | 1046 | struct ext4_inode_info *ei = EXT4_I(inode); |
1047 | unsigned long md_needed; | 1047 | unsigned int md_needed; |
1048 | int ret; | 1048 | int ret; |
1049 | 1049 | ||
1050 | /* | 1050 | /* |
@@ -1054,7 +1054,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |||
1054 | */ | 1054 | */ |
1055 | repeat: | 1055 | repeat: |
1056 | spin_lock(&ei->i_block_reservation_lock); | 1056 | spin_lock(&ei->i_block_reservation_lock); |
1057 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1057 | md_needed = EXT4_NUM_B2C(sbi, |
1058 | ext4_calc_metadata_amount(inode, lblock)); | ||
1058 | trace_ext4_da_reserve_space(inode, md_needed); | 1059 | trace_ext4_da_reserve_space(inode, md_needed); |
1059 | spin_unlock(&ei->i_block_reservation_lock); | 1060 | spin_unlock(&ei->i_block_reservation_lock); |
1060 | 1061 | ||
@@ -1063,7 +1064,7 @@ repeat: | |||
1063 | * us from metadata over-estimation, though we may go over by | 1064 | * us from metadata over-estimation, though we may go over by |
1064 | * a small amount in the end. Here we just reserve for data. | 1065 | * a small amount in the end. Here we just reserve for data. |
1065 | */ | 1066 | */ |
1066 | ret = dquot_reserve_block(inode, 1); | 1067 | ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); |
1067 | if (ret) | 1068 | if (ret) |
1068 | return ret; | 1069 | return ret; |
1069 | /* | 1070 | /* |
@@ -1071,7 +1072,7 @@ repeat: | |||
1071 | * we cannot afford to run out of free blocks. | 1072 | * we cannot afford to run out of free blocks. |
1072 | */ | 1073 | */ |
1073 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { | 1074 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { |
1074 | dquot_release_reservation_block(inode, 1); | 1075 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1075 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1076 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1076 | yield(); | 1077 | yield(); |
1077 | goto repeat; | 1078 | goto repeat; |
@@ -1118,6 +1119,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1118 | * We can release all of the reserved metadata blocks | 1119 | * We can release all of the reserved metadata blocks |
1119 | * only when we have written all of the delayed | 1120 | * only when we have written all of the delayed |
1120 | * allocation blocks. | 1121 | * allocation blocks. |
1122 | * Note that in case of bigalloc, i_reserved_meta_blocks, | ||
1123 | * i_reserved_data_blocks, etc. refer to number of clusters. | ||
1121 | */ | 1124 | */ |
1122 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, | 1125 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
1123 | ei->i_reserved_meta_blocks); | 1126 | ei->i_reserved_meta_blocks); |
@@ -1130,7 +1133,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1130 | 1133 | ||
1131 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1134 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1132 | 1135 | ||
1133 | dquot_release_reservation_block(inode, to_free); | 1136 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); |
1134 | } | 1137 | } |
1135 | 1138 | ||
1136 | static void ext4_da_page_release_reservation(struct page *page, | 1139 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -1139,6 +1142,9 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1139 | int to_release = 0; | 1142 | int to_release = 0; |
1140 | struct buffer_head *head, *bh; | 1143 | struct buffer_head *head, *bh; |
1141 | unsigned int curr_off = 0; | 1144 | unsigned int curr_off = 0; |
1145 | struct inode *inode = page->mapping->host; | ||
1146 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1147 | int num_clusters; | ||
1142 | 1148 | ||
1143 | head = page_buffers(page); | 1149 | head = page_buffers(page); |
1144 | bh = head; | 1150 | bh = head; |
@@ -1151,7 +1157,20 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1151 | } | 1157 | } |
1152 | curr_off = next_off; | 1158 | curr_off = next_off; |
1153 | } while ((bh = bh->b_this_page) != head); | 1159 | } while ((bh = bh->b_this_page) != head); |
1154 | ext4_da_release_space(page->mapping->host, to_release); | 1160 | |
1161 | /* If we have released all the blocks belonging to a cluster, then we | ||
1162 | * need to release the reserved space for that cluster. */ | ||
1163 | num_clusters = EXT4_NUM_B2C(sbi, to_release); | ||
1164 | while (num_clusters > 0) { | ||
1165 | ext4_fsblk_t lblk; | ||
1166 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + | ||
1167 | ((num_clusters - 1) << sbi->s_cluster_bits); | ||
1168 | if (sbi->s_cluster_ratio == 1 || | ||
1169 | !ext4_find_delalloc_cluster(inode, lblk, 1)) | ||
1170 | ext4_da_release_space(inode, 1); | ||
1171 | |||
1172 | num_clusters--; | ||
1173 | } | ||
1155 | } | 1174 | } |
1156 | 1175 | ||
1157 | /* | 1176 | /* |
@@ -1352,7 +1371,8 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1352 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), | 1371 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1353 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | 1372 | percpu_counter_sum(&sbi->s_freeclusters_counter))); |
1354 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 1373 | printk(KERN_CRIT "dirty_blocks=%lld\n", |
1355 | (long long) percpu_counter_sum(&sbi->s_dirtyclusters_counter)); | 1374 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1375 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
1356 | printk(KERN_CRIT "Block reservation details\n"); | 1376 | printk(KERN_CRIT "Block reservation details\n"); |
1357 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 1377 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", |
1358 | EXT4_I(inode)->i_reserved_data_blocks); | 1378 | EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1626,10 +1646,14 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1626 | /* | 1646 | /* |
1627 | * XXX: __block_write_begin() unmaps passed block, is it OK? | 1647 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
1628 | */ | 1648 | */ |
1629 | ret = ext4_da_reserve_space(inode, iblock); | 1649 | /* If the block was allocated from previously allocated cluster, |
1630 | if (ret) | 1650 | * then we dont need to reserve it again. */ |
1631 | /* not enough space to reserve */ | 1651 | if (!(map.m_flags & EXT4_MAP_FROM_CLUSTER)) { |
1632 | return ret; | 1652 | ret = ext4_da_reserve_space(inode, iblock); |
1653 | if (ret) | ||
1654 | /* not enough space to reserve */ | ||
1655 | return ret; | ||
1656 | } | ||
1633 | 1657 | ||
1634 | map_bh(bh, inode->i_sb, invalid_block); | 1658 | map_bh(bh, inode->i_sb, invalid_block); |
1635 | set_buffer_new(bh); | 1659 | set_buffer_new(bh); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 63dd56703342..5e1215d38331 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -4718,6 +4718,9 @@ do_more: | |||
4718 | 4718 | ||
4719 | freed += count; | 4719 | freed += count; |
4720 | 4720 | ||
4721 | if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
4722 | dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); | ||
4723 | |||
4721 | /* We dirtied the bitmap block */ | 4724 | /* We dirtied the bitmap block */ |
4722 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 4725 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
4723 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 4726 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
@@ -4736,8 +4739,6 @@ do_more: | |||
4736 | } | 4739 | } |
4737 | ext4_mark_super_dirty(sb); | 4740 | ext4_mark_super_dirty(sb); |
4738 | error_return: | 4741 | error_return: |
4739 | if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) | ||
4740 | dquot_free_block(inode, freed); | ||
4741 | brelse(bitmap_bh); | 4742 | brelse(bitmap_bh); |
4742 | ext4_std_error(sb, err); | 4743 | ext4_std_error(sb, err); |
4743 | return; | 4744 | return; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6810957e0ac7..66b8cfa15636 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2473,7 +2473,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, | |||
2473 | char *buf) | 2473 | char *buf) |
2474 | { | 2474 | { |
2475 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2475 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2476 | (s64) percpu_counter_sum(&sbi->s_dirtyclusters_counter)); | 2476 | (s64) EXT4_C2B(sbi, |
2477 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
2477 | } | 2478 | } |
2478 | 2479 | ||
2479 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, | 2480 | static ssize_t session_write_kbytes_show(struct ext4_attr *a, |