aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2009-09-18 13:34:02 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-09-18 13:34:02 -0400
commit50797481a7bdee548589506d7d7b48b08bc14dcd (patch)
tree19989d27e3a69c6c2c507f798f55a2d9a47a5d27 /fs
parent4ba74d00a20256e22f159cb288ff34b587608917 (diff)
ext4: Avoid group preallocation for closed files
Currently the group preallocation code tries to find a large (512) free block from which to do per-cpu group allocation for small files. The problem with this scheme is that it leaves the filesystem horribly fragmented. In the worst case, if the filesystem is unmounted and remounted (after a system shutdown, for example) we forget the fact that wee were using a particular (now-partially filled) 512 block extent. So the next time we try to allocate space for a small file, we will find *another* completely free 512 block chunk to allocate small files. Given that there are 32,768 blocks in a block group, after 64 iterations of "mount, write one 4k file in a directory, unmount", the block group will have 64 files, each separated by 511 blocks, and the block group will no longer have any free 512 completely free chunks of blocks for group preallocation space. So if we try to allocate blocks for a file that has been closed, such that we know the final size of the file, and the filesystem is not busy, avoid using group preallocation. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h30
-rw-r--r--fs/ext4/mballoc.c10
2 files changed, 38 insertions, 2 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 70aa951ecb3c..2e9a2036c114 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -952,6 +952,7 @@ struct ext4_sb_info {
952 atomic_t s_mb_lost_chunks; 952 atomic_t s_mb_lost_chunks;
953 atomic_t s_mb_preallocated; 953 atomic_t s_mb_preallocated;
954 atomic_t s_mb_discarded; 954 atomic_t s_mb_discarded;
955 atomic_t s_lock_busy;
955 956
956 /* locality groups */ 957 /* locality groups */
957 struct ext4_locality_group *s_locality_groups; 958 struct ext4_locality_group *s_locality_groups;
@@ -1593,15 +1594,42 @@ struct ext4_group_info {
1593#define EXT4_MB_GRP_NEED_INIT(grp) \ 1594#define EXT4_MB_GRP_NEED_INIT(grp) \
1594 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) 1595 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
1595 1596
1597#define EXT4_MAX_CONTENTION 8
1598#define EXT4_CONTENTION_THRESHOLD 2
1599
1596static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, 1600static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
1597 ext4_group_t group) 1601 ext4_group_t group)
1598{ 1602{
1599 return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); 1603 return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
1600} 1604}
1601 1605
1606/*
1607 * Returns true if the filesystem is busy enough that attempts to
1608 * access the block group locks has run into contention.
1609 */
1610static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
1611{
1612 return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
1613}
1614
1602static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) 1615static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
1603{ 1616{
1604 spin_lock(ext4_group_lock_ptr(sb, group)); 1617 spinlock_t *lock = ext4_group_lock_ptr(sb, group);
1618 if (spin_trylock(lock))
1619 /*
1620 * We're able to grab the lock right away, so drop the
1621 * lock contention counter.
1622 */
1623 atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
1624 else {
1625 /*
1626 * The lock is busy, so bump the contention counter,
1627 * and then wait on the spin lock.
1628 */
1629 atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
1630 EXT4_MAX_CONTENTION);
1631 spin_lock(lock);
1632 }
1605} 1633}
1606 1634
1607static inline void ext4_unlock_group(struct super_block *sb, 1635static inline void ext4_unlock_group(struct super_block *sb,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 90a30ce822fc..faf5bd056a93 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4191,9 +4191,17 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4191 return; 4191 return;
4192 4192
4193 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 4193 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
4194 isize = i_size_read(ac->ac_inode) >> bsbits; 4194 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4195 >> bsbits;
4195 size = max(size, isize); 4196 size = max(size, isize);
4196 4197
4198 if ((size == isize) &&
4199 !ext4_fs_is_busy(sbi) &&
4200 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4201 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4202 return;
4203 }
4204
4197 /* don't use group allocation for large files */ 4205 /* don't use group allocation for large files */
4198 if (size >= sbi->s_mb_stream_request) { 4206 if (size >= sbi->s_mb_stream_request) {
4199 ac->ac_flags |= EXT4_MB_STREAM_ALLOC; 4207 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;