aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2014-11-25 11:41:49 -0500
committerTheodore Ts'o <tytso@mit.edu>2014-11-25 11:41:49 -0500
commitcbd7584e6ead1b79fb0b81573f158b57fa1f0b49 (patch)
tree55594320c91d408ab9aa63f8fc1ef79b3556cd14 /fs/ext4
parent0756b908a364c217bc2d8063783992ffe338b143 (diff)
ext4: fix block reservation for bigalloc filesystems
For bigalloc filesystems we have to check whether newly requested inode block isn't already part of a cluster for which we already have delayed allocation reservation. This check happens in ext4_ext_map_blocks() and that function sets EXT4_MAP_FROM_CLUSTER if that's the case. However if ext4_da_map_blocks() finds in extent cache information about the block, we don't call into ext4_ext_map_blocks() and thus we always end up getting new reservation even if the space for cluster is already reserved. This results in overreservation and premature ENOSPC reports. Fix the problem by checking for existing cluster reservation already in ext4_da_map_blocks(). That simplifies the logic and actually allows us to get rid of the EXT4_MAP_FROM_CLUSTER flag completely. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h21
-rw-r--r--fs/ext4/extents.c12
-rw-r--r--fs/ext4/inode.c27
3 files changed, 9 insertions, 51 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 21a3b38395ff..7b3f3b1decff 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -158,17 +158,8 @@ struct ext4_allocation_request {
158#define EXT4_MAP_MAPPED (1 << BH_Mapped) 158#define EXT4_MAP_MAPPED (1 << BH_Mapped)
159#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 159#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
160#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 160#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
161/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
162 * ext4_map_blocks wants to know whether or not the underlying cluster has
163 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
164 * the requested mapping was from previously mapped (or delayed allocated)
165 * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
166 * should never appear on buffer_head's state flags.
167 */
168#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
169#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 161#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
170 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 162 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
171 EXT4_MAP_FROM_CLUSTER)
172 163
173struct ext4_map_blocks { 164struct ext4_map_blocks {
174 ext4_fsblk_t m_pblk; 165 ext4_fsblk_t m_pblk;
@@ -2790,16 +2781,6 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
2790extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); 2781extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2791 2782
2792/* 2783/*
2793 * Note that these flags will never ever appear in a buffer_head's state flag.
2794 * See EXT4_MAP_... to see where this is used.
2795 */
2796enum ext4_state_bits {
2797 BH_AllocFromCluster /* allocated blocks were part of already
2798 * allocated cluster. */
2799 = BH_JBDPrivateStart
2800};
2801
2802/*
2803 * Add new method to test whether block and inode bitmaps are properly 2784 * Add new method to test whether block and inode bitmaps are properly
2804 * initialized. With uninit_bg reading the block from disk is not enough 2785 * initialized. With uninit_bg reading the block from disk is not enough
2805 * to mark the bitmap uptodate. We need to also zero-out the bitmap 2786 * to mark the bitmap uptodate. We need to also zero-out the bitmap
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9eae2f4916ce..7ef2f11aca56 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4282,6 +4282,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4282 ext4_io_end_t *io = ext4_inode_aio(inode); 4282 ext4_io_end_t *io = ext4_inode_aio(inode);
4283 ext4_lblk_t cluster_offset; 4283 ext4_lblk_t cluster_offset;
4284 int set_unwritten = 0; 4284 int set_unwritten = 0;
4285 bool map_from_cluster = false;
4285 4286
4286 ext_debug("blocks %u/%u requested for inode %lu\n", 4287 ext_debug("blocks %u/%u requested for inode %lu\n",
4287 map->m_lblk, map->m_len, inode->i_ino); 4288 map->m_lblk, map->m_len, inode->i_ino);
@@ -4358,10 +4359,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4358 } 4359 }
4359 } 4360 }
4360 4361
4361 if ((sbi->s_cluster_ratio > 1) &&
4362 ext4_find_delalloc_cluster(inode, map->m_lblk))
4363 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
4364
4365 /* 4362 /*
4366 * requested block isn't allocated yet; 4363 * requested block isn't allocated yet;
4367 * we couldn't try to create block if create flag is zero 4364 * we couldn't try to create block if create flag is zero
@@ -4379,7 +4376,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4379 /* 4376 /*
4380 * Okay, we need to do block allocation. 4377 * Okay, we need to do block allocation.
4381 */ 4378 */
4382 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
4383 newex.ee_block = cpu_to_le32(map->m_lblk); 4379 newex.ee_block = cpu_to_le32(map->m_lblk);
4384 cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); 4380 cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
4385 4381
@@ -4391,7 +4387,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4391 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { 4387 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
4392 ar.len = allocated = map->m_len; 4388 ar.len = allocated = map->m_len;
4393 newblock = map->m_pblk; 4389 newblock = map->m_pblk;
4394 map->m_flags |= EXT4_MAP_FROM_CLUSTER; 4390 map_from_cluster = true;
4395 goto got_allocated_blocks; 4391 goto got_allocated_blocks;
4396 } 4392 }
4397 4393
@@ -4412,7 +4408,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4412 get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { 4408 get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
4413 ar.len = allocated = map->m_len; 4409 ar.len = allocated = map->m_len;
4414 newblock = map->m_pblk; 4410 newblock = map->m_pblk;
4415 map->m_flags |= EXT4_MAP_FROM_CLUSTER; 4411 map_from_cluster = true;
4416 goto got_allocated_blocks; 4412 goto got_allocated_blocks;
4417 } 4413 }
4418 4414
@@ -4538,7 +4534,7 @@ got_allocated_blocks:
4538 */ 4534 */
4539 reserved_clusters = get_reserved_cluster_alloc(inode, 4535 reserved_clusters = get_reserved_cluster_alloc(inode,
4540 map->m_lblk, allocated); 4536 map->m_lblk, allocated);
4541 if (map->m_flags & EXT4_MAP_FROM_CLUSTER) { 4537 if (map_from_cluster) {
4542 if (reserved_clusters) { 4538 if (reserved_clusters) {
4543 /* 4539 /*
4544 * We have clusters reserved for this range. 4540 * We have clusters reserved for this range.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3356ab5395f4..2315e45161ee 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -416,11 +416,6 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
416 } 416 }
417 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 417 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
418 up_read((&EXT4_I(inode)->i_data_sem)); 418 up_read((&EXT4_I(inode)->i_data_sem));
419 /*
420 * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
421 * because it shouldn't be marked in es_map->m_flags.
422 */
423 map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
424 419
425 /* 420 /*
426 * We don't check m_len because extent will be collpased in status 421 * We don't check m_len because extent will be collpased in status
@@ -1434,19 +1429,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1434 * file system block. 1429 * file system block.
1435 */ 1430 */
1436 down_read(&EXT4_I(inode)->i_data_sem); 1431 down_read(&EXT4_I(inode)->i_data_sem);
1437 if (ext4_has_inline_data(inode)) { 1432 if (ext4_has_inline_data(inode))
1438 /*
1439 * We will soon create blocks for this page, and let
1440 * us pretend as if the blocks aren't allocated yet.
1441 * In case of clusters, we have to handle the work
1442 * of mapping from cluster so that the reserved space
1443 * is calculated properly.
1444 */
1445 if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
1446 ext4_find_delalloc_cluster(inode, map->m_lblk))
1447 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
1448 retval = 0; 1433 retval = 0;
1449 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1434 else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1450 retval = ext4_ext_map_blocks(NULL, inode, map, 1435 retval = ext4_ext_map_blocks(NULL, inode, map,
1451 EXT4_GET_BLOCKS_NO_PUT_HOLE); 1436 EXT4_GET_BLOCKS_NO_PUT_HOLE);
1452 else 1437 else
@@ -1465,7 +1450,8 @@ add_delayed:
1465 * then we don't need to reserve it again. However we still need 1450 * then we don't need to reserve it again. However we still need
1466 * to reserve metadata for every block we're going to write. 1451 * to reserve metadata for every block we're going to write.
1467 */ 1452 */
1468 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { 1453 if (EXT4_SB(inode->i_sb)->s_cluster_ratio <= 1 ||
1454 !ext4_find_delalloc_cluster(inode, map->m_lblk)) {
1469 ret = ext4_da_reserve_space(inode, iblock); 1455 ret = ext4_da_reserve_space(inode, iblock);
1470 if (ret) { 1456 if (ret) {
1471 /* not enough space to reserve */ 1457 /* not enough space to reserve */
@@ -1481,11 +1467,6 @@ add_delayed:
1481 goto out_unlock; 1467 goto out_unlock;
1482 } 1468 }
1483 1469
1484 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1485 * and it should not appear on the bh->b_state.
1486 */
1487 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1488
1489 map_bh(bh, inode->i_sb, invalid_block); 1470 map_bh(bh, inode->i_sb, invalid_block);
1490 set_buffer_new(bh); 1471 set_buffer_new(bh);
1491 set_buffer_delay(bh); 1472 set_buffer_delay(bh);