diff options
author | Theodore Ts'o <tytso@mit.edu> | 2011-09-09 18:52:51 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-09-09 18:52:51 -0400 |
commit | 4d33b1ef10995d7ba6191d67456202c697a92a32 (patch) | |
tree | 3e47753f9ac48e9f4c80dac0b69bce9fb4ac6f52 /fs/ext4 | |
parent | 84130193e0e6568dfdfb823f0e1e19aec80aff6e (diff) |
ext4: teach ext4_ext_map_blocks() about the bigalloc feature
If we need to allocate a new block in ext4_ext_map_blocks(), the
function needs to see if the cluster has already been allocated.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/extents.c | 181 |
1 files changed, 162 insertions, 19 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ba7bd5a176ce..bd42ab29efec 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1270,7 +1270,8 @@ static int ext4_ext_search_left(struct inode *inode, | |||
1270 | */ | 1270 | */ |
1271 | static int ext4_ext_search_right(struct inode *inode, | 1271 | static int ext4_ext_search_right(struct inode *inode, |
1272 | struct ext4_ext_path *path, | 1272 | struct ext4_ext_path *path, |
1273 | ext4_lblk_t *logical, ext4_fsblk_t *phys) | 1273 | ext4_lblk_t *logical, ext4_fsblk_t *phys, |
1274 | struct ext4_extent **ret_ex) | ||
1274 | { | 1275 | { |
1275 | struct buffer_head *bh = NULL; | 1276 | struct buffer_head *bh = NULL; |
1276 | struct ext4_extent_header *eh; | 1277 | struct ext4_extent_header *eh; |
@@ -1312,9 +1313,7 @@ static int ext4_ext_search_right(struct inode *inode, | |||
1312 | return -EIO; | 1313 | return -EIO; |
1313 | } | 1314 | } |
1314 | } | 1315 | } |
1315 | *logical = le32_to_cpu(ex->ee_block); | 1316 | goto found_extent; |
1316 | *phys = ext4_ext_pblock(ex); | ||
1317 | return 0; | ||
1318 | } | 1317 | } |
1319 | 1318 | ||
1320 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { | 1319 | if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { |
@@ -1327,9 +1326,7 @@ static int ext4_ext_search_right(struct inode *inode, | |||
1327 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { | 1326 | if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { |
1328 | /* next allocated block in this leaf */ | 1327 | /* next allocated block in this leaf */ |
1329 | ex++; | 1328 | ex++; |
1330 | *logical = le32_to_cpu(ex->ee_block); | 1329 | goto found_extent; |
1331 | *phys = ext4_ext_pblock(ex); | ||
1332 | return 0; | ||
1333 | } | 1330 | } |
1334 | 1331 | ||
1335 | /* go up and search for index to the right */ | 1332 | /* go up and search for index to the right */ |
@@ -1372,9 +1369,12 @@ got_index: | |||
1372 | return -EIO; | 1369 | return -EIO; |
1373 | } | 1370 | } |
1374 | ex = EXT_FIRST_EXTENT(eh); | 1371 | ex = EXT_FIRST_EXTENT(eh); |
1372 | found_extent: | ||
1375 | *logical = le32_to_cpu(ex->ee_block); | 1373 | *logical = le32_to_cpu(ex->ee_block); |
1376 | *phys = ext4_ext_pblock(ex); | 1374 | *phys = ext4_ext_pblock(ex); |
1377 | put_bh(bh); | 1375 | *ret_ex = ex; |
1376 | if (bh) | ||
1377 | put_bh(bh); | ||
1378 | return 0; | 1378 | return 0; |
1379 | } | 1379 | } |
1380 | 1380 | ||
@@ -1627,7 +1627,8 @@ static int ext4_ext_try_to_merge(struct inode *inode, | |||
1627 | * such that there will be no overlap, and then returns 1. | 1627 | * such that there will be no overlap, and then returns 1. |
1628 | * If there is no overlap found, it returns 0. | 1628 | * If there is no overlap found, it returns 0. |
1629 | */ | 1629 | */ |
1630 | static unsigned int ext4_ext_check_overlap(struct inode *inode, | 1630 | static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, |
1631 | struct inode *inode, | ||
1631 | struct ext4_extent *newext, | 1632 | struct ext4_extent *newext, |
1632 | struct ext4_ext_path *path) | 1633 | struct ext4_ext_path *path) |
1633 | { | 1634 | { |
@@ -1641,6 +1642,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1641 | if (!path[depth].p_ext) | 1642 | if (!path[depth].p_ext) |
1642 | goto out; | 1643 | goto out; |
1643 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); | 1644 | b2 = le32_to_cpu(path[depth].p_ext->ee_block); |
1645 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
1644 | 1646 | ||
1645 | /* | 1647 | /* |
1646 | * get the next allocated block if the extent in the path | 1648 | * get the next allocated block if the extent in the path |
@@ -1650,6 +1652,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode, | |||
1650 | b2 = ext4_ext_next_allocated_block(path); | 1652 | b2 = ext4_ext_next_allocated_block(path); |
1651 | if (b2 == EXT_MAX_BLOCKS) | 1653 | if (b2 == EXT_MAX_BLOCKS) |
1652 | goto out; | 1654 | goto out; |
1655 | b2 &= ~(sbi->s_cluster_ratio - 1); | ||
1653 | } | 1656 | } |
1654 | 1657 | ||
1655 | /* check for wrap through zero on extent logical start block*/ | 1658 | /* check for wrap through zero on extent logical start block*/ |
@@ -3294,6 +3297,106 @@ out2: | |||
3294 | } | 3297 | } |
3295 | 3298 | ||
3296 | /* | 3299 | /* |
3300 | * get_implied_cluster_alloc - check to see if the requested | ||
3301 | * allocation (in the map structure) overlaps with a cluster already | ||
3302 | * allocated in an extent. | ||
3303 | * @sbi The ext4-specific superblock structure | ||
3304 | * @map The requested lblk->pblk mapping | ||
3305 | * @ex The extent structure which might contain an implied | ||
3306 | * cluster allocation | ||
3307 | * | ||
3308 | * This function is called by ext4_ext_map_blocks() after we failed to | ||
3309 | * find blocks that were already in the inode's extent tree. Hence, | ||
3310 | * we know that the beginning of the requested region cannot overlap | ||
3311 | * the extent from the inode's extent tree. There are three cases we | ||
3312 | * want to catch. The first is this case: | ||
3313 | * | ||
3314 | * |--- cluster # N--| | ||
3315 | * |--- extent ---| |---- requested region ---| | ||
3316 | * |==========| | ||
3317 | * | ||
3318 | * The second case that we need to test for is this one: | ||
3319 | * | ||
3320 | * |--------- cluster # N ----------------| | ||
3321 | * |--- requested region --| |------- extent ----| | ||
3322 | * |=======================| | ||
3323 | * | ||
3324 | * The third case is when the requested region lies between two extents | ||
3325 | * within the same cluster: | ||
3326 | * |------------- cluster # N-------------| | ||
3327 | * |----- ex -----| |---- ex_right ----| | ||
3328 | * |------ requested region ------| | ||
3329 | * |================| | ||
3330 | * | ||
3331 | * In each of the above cases, we need to set the map->m_pblk and | ||
3332 | * map->m_len so it corresponds to the return the extent labelled as | ||
3333 | * "|====|" from cluster #N, since it is already in use for data in | ||
3334 | * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to | ||
3335 | * signal to ext4_ext_map_blocks() that map->m_pblk should be treated | ||
3336 | * as a new "allocated" block region. Otherwise, we will return 0 and | ||
3337 | * ext4_ext_map_blocks() will then allocate one or more new clusters | ||
3338 | * by calling ext4_mb_new_blocks(). | ||
3339 | */ | ||
3340 | static int get_implied_cluster_alloc(struct ext4_sb_info *sbi, | ||
3341 | struct ext4_map_blocks *map, | ||
3342 | struct ext4_extent *ex, | ||
3343 | struct ext4_ext_path *path) | ||
3344 | { | ||
3345 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | ||
3346 | ext4_lblk_t ex_cluster_start, ex_cluster_end; | ||
3347 | ext4_lblk_t rr_cluster_start, rr_cluster_end; | ||
3348 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | ||
3349 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | ||
3350 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | ||
3351 | |||
3352 | /* The extent passed in that we are trying to match */ | ||
3353 | ex_cluster_start = EXT4_B2C(sbi, ee_block); | ||
3354 | ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1); | ||
3355 | |||
3356 | /* The requested region passed into ext4_map_blocks() */ | ||
3357 | rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); | ||
3358 | rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1); | ||
3359 | |||
3360 | if ((rr_cluster_start == ex_cluster_end) || | ||
3361 | (rr_cluster_start == ex_cluster_start)) { | ||
3362 | if (rr_cluster_start == ex_cluster_end) | ||
3363 | ee_start += ee_len - 1; | ||
3364 | map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + | ||
3365 | c_offset; | ||
3366 | map->m_len = min(map->m_len, | ||
3367 | (unsigned) sbi->s_cluster_ratio - c_offset); | ||
3368 | /* | ||
3369 | * Check for and handle this case: | ||
3370 | * | ||
3371 | * |--------- cluster # N-------------| | ||
3372 | * |------- extent ----| | ||
3373 | * |--- requested region ---| | ||
3374 | * |===========| | ||
3375 | */ | ||
3376 | |||
3377 | if (map->m_lblk < ee_block) | ||
3378 | map->m_len = min(map->m_len, ee_block - map->m_lblk); | ||
3379 | |||
3380 | /* | ||
3381 | * Check for the case where there is already another allocated | ||
3382 | * block to the right of 'ex' but before the end of the cluster. | ||
3383 | * | ||
3384 | * |------------- cluster # N-------------| | ||
3385 | * |----- ex -----| |---- ex_right ----| | ||
3386 | * |------ requested region ------| | ||
3387 | * |================| | ||
3388 | */ | ||
3389 | if (map->m_lblk > ee_block) { | ||
3390 | ext4_lblk_t next = ext4_ext_next_allocated_block(path); | ||
3391 | map->m_len = min(map->m_len, next - map->m_lblk); | ||
3392 | } | ||
3393 | return 1; | ||
3394 | } | ||
3395 | return 0; | ||
3396 | } | ||
3397 | |||
3398 | |||
3399 | /* | ||
3297 | * Block allocation/map/preallocation routine for extents based files | 3400 | * Block allocation/map/preallocation routine for extents based files |
3298 | * | 3401 | * |
3299 | * | 3402 | * |
@@ -3315,14 +3418,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3315 | struct ext4_map_blocks *map, int flags) | 3418 | struct ext4_map_blocks *map, int flags) |
3316 | { | 3419 | { |
3317 | struct ext4_ext_path *path = NULL; | 3420 | struct ext4_ext_path *path = NULL; |
3318 | struct ext4_extent newex, *ex; | 3421 | struct ext4_extent newex, *ex, *ex2; |
3422 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3319 | ext4_fsblk_t newblock = 0; | 3423 | ext4_fsblk_t newblock = 0; |
3320 | int err = 0, depth, ret; | 3424 | int free_on_err = 0, err = 0, depth, ret; |
3321 | unsigned int allocated = 0; | 3425 | unsigned int allocated = 0, offset = 0; |
3322 | unsigned int punched_out = 0; | 3426 | unsigned int punched_out = 0; |
3323 | unsigned int result = 0; | 3427 | unsigned int result = 0; |
3324 | struct ext4_allocation_request ar; | 3428 | struct ext4_allocation_request ar; |
3325 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3429 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3430 | ext4_lblk_t cluster_offset; | ||
3326 | struct ext4_map_blocks punch_map; | 3431 | struct ext4_map_blocks punch_map; |
3327 | 3432 | ||
3328 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3433 | ext_debug("blocks %u/%u requested for inode %lu\n", |
@@ -3508,9 +3613,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3508 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); | 3613 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); |
3509 | goto out2; | 3614 | goto out2; |
3510 | } | 3615 | } |
3616 | |||
3511 | /* | 3617 | /* |
3512 | * Okay, we need to do block allocation. | 3618 | * Okay, we need to do block allocation. |
3513 | */ | 3619 | */ |
3620 | newex.ee_block = cpu_to_le32(map->m_lblk); | ||
3621 | cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | ||
3622 | |||
3623 | /* | ||
3624 | * If we are doing bigalloc, check to see if the extent returned | ||
3625 | * by ext4_ext_find_extent() implies a cluster we can use. | ||
3626 | */ | ||
3627 | if (cluster_offset && ex && | ||
3628 | get_implied_cluster_alloc(sbi, map, ex, path)) { | ||
3629 | ar.len = allocated = map->m_len; | ||
3630 | newblock = map->m_pblk; | ||
3631 | goto got_allocated_blocks; | ||
3632 | } | ||
3514 | 3633 | ||
3515 | /* find neighbour allocated blocks */ | 3634 | /* find neighbour allocated blocks */ |
3516 | ar.lleft = map->m_lblk; | 3635 | ar.lleft = map->m_lblk; |
@@ -3518,10 +3637,20 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3518 | if (err) | 3637 | if (err) |
3519 | goto out2; | 3638 | goto out2; |
3520 | ar.lright = map->m_lblk; | 3639 | ar.lright = map->m_lblk; |
3521 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); | 3640 | ex2 = NULL; |
3641 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2); | ||
3522 | if (err) | 3642 | if (err) |
3523 | goto out2; | 3643 | goto out2; |
3524 | 3644 | ||
3645 | /* Check if the extent after searching to the right implies a | ||
3646 | * cluster we can use. */ | ||
3647 | if ((sbi->s_cluster_ratio > 1) && ex2 && | ||
3648 | get_implied_cluster_alloc(sbi, map, ex2, path)) { | ||
3649 | ar.len = allocated = map->m_len; | ||
3650 | newblock = map->m_pblk; | ||
3651 | goto got_allocated_blocks; | ||
3652 | } | ||
3653 | |||
3525 | /* | 3654 | /* |
3526 | * See if request is beyond maximum number of blocks we can have in | 3655 | * See if request is beyond maximum number of blocks we can have in |
3527 | * a single extent. For an initialized extent this limit is | 3656 | * a single extent. For an initialized extent this limit is |
@@ -3536,9 +3665,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3536 | map->m_len = EXT_UNINIT_MAX_LEN; | 3665 | map->m_len = EXT_UNINIT_MAX_LEN; |
3537 | 3666 | ||
3538 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ | 3667 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ |
3539 | newex.ee_block = cpu_to_le32(map->m_lblk); | ||
3540 | newex.ee_len = cpu_to_le16(map->m_len); | 3668 | newex.ee_len = cpu_to_le16(map->m_len); |
3541 | err = ext4_ext_check_overlap(inode, &newex, path); | 3669 | err = ext4_ext_check_overlap(sbi, inode, &newex, path); |
3542 | if (err) | 3670 | if (err) |
3543 | allocated = ext4_ext_get_actual_len(&newex); | 3671 | allocated = ext4_ext_get_actual_len(&newex); |
3544 | else | 3672 | else |
@@ -3548,7 +3676,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3548 | ar.inode = inode; | 3676 | ar.inode = inode; |
3549 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); | 3677 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); |
3550 | ar.logical = map->m_lblk; | 3678 | ar.logical = map->m_lblk; |
3551 | ar.len = allocated; | 3679 | /* |
3680 | * We calculate the offset from the beginning of the cluster | ||
3681 | * for the logical block number, since when we allocate a | ||
3682 | * physical cluster, the physical block should start at the | ||
3683 | * same offset from the beginning of the cluster. This is | ||
3684 | * needed so that future calls to get_implied_cluster_alloc() | ||
3685 | * work correctly. | ||
3686 | */ | ||
3687 | offset = map->m_lblk & (sbi->s_cluster_ratio - 1); | ||
3688 | ar.len = EXT4_NUM_B2C(sbi, offset+allocated); | ||
3689 | ar.goal -= offset; | ||
3690 | ar.logical -= offset; | ||
3552 | if (S_ISREG(inode->i_mode)) | 3691 | if (S_ISREG(inode->i_mode)) |
3553 | ar.flags = EXT4_MB_HINT_DATA; | 3692 | ar.flags = EXT4_MB_HINT_DATA; |
3554 | else | 3693 | else |
@@ -3561,9 +3700,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3561 | goto out2; | 3700 | goto out2; |
3562 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", | 3701 | ext_debug("allocate new block: goal %llu, found %llu/%u\n", |
3563 | ar.goal, newblock, allocated); | 3702 | ar.goal, newblock, allocated); |
3703 | free_on_err = 1; | ||
3704 | ar.len = EXT4_C2B(sbi, ar.len) - offset; | ||
3705 | if (ar.len > allocated) | ||
3706 | ar.len = allocated; | ||
3564 | 3707 | ||
3708 | got_allocated_blocks: | ||
3565 | /* try to insert new extent into found leaf and return */ | 3709 | /* try to insert new extent into found leaf and return */ |
3566 | ext4_ext_store_pblock(&newex, newblock); | 3710 | ext4_ext_store_pblock(&newex, newblock + offset); |
3567 | newex.ee_len = cpu_to_le16(ar.len); | 3711 | newex.ee_len = cpu_to_le16(ar.len); |
3568 | /* Mark uninitialized */ | 3712 | /* Mark uninitialized */ |
3569 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ | 3713 | if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ |
@@ -3591,7 +3735,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3591 | if (!err) | 3735 | if (!err) |
3592 | err = ext4_ext_insert_extent(handle, inode, path, | 3736 | err = ext4_ext_insert_extent(handle, inode, path, |
3593 | &newex, flags); | 3737 | &newex, flags); |
3594 | if (err) { | 3738 | if (err && free_on_err) { |
3595 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? | 3739 | int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? |
3596 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; | 3740 | EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; |
3597 | /* free data blocks we just allocated */ | 3741 | /* free data blocks we just allocated */ |
@@ -4115,7 +4259,6 @@ found_delayed_extent: | |||
4115 | return EXT_BREAK; | 4259 | return EXT_BREAK; |
4116 | return EXT_CONTINUE; | 4260 | return EXT_CONTINUE; |
4117 | } | 4261 | } |
4118 | |||
4119 | /* fiemap flags we can handle specified here */ | 4262 | /* fiemap flags we can handle specified here */ |
4120 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | 4263 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) |
4121 | 4264 | ||