diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2008-08-18 18:00:57 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-08-18 18:00:57 -0400 |
commit | 5e745b041f2ccad63077118b40468521306f3962 (patch) | |
tree | 2a4d53c884f92899ee8e4f541c32861a4a577e1d | |
parent | 91246c009094142f95ecc7573b7caed2bcef52c7 (diff) |
ext4: Fix small file fragmentation
For small file block allocations, mballoc uses per cpu prealloc
space. Use goal block when searching for the right prealloc
space. Also make sure ext4_da_writepages tries to write
all the pages for small files in single attempt
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/inode.c | 21 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 53 |
2 files changed, 61 insertions, 13 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d1906d9a22de..7e91913e325b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2282 | static int ext4_da_writepages(struct address_space *mapping, | 2282 | static int ext4_da_writepages(struct address_space *mapping, |
2283 | struct writeback_control *wbc) | 2283 | struct writeback_control *wbc) |
2284 | { | 2284 | { |
2285 | struct inode *inode = mapping->host; | ||
2286 | handle_t *handle = NULL; | 2285 | handle_t *handle = NULL; |
2287 | int needed_blocks; | ||
2288 | int ret = 0; | ||
2289 | long to_write; | ||
2290 | loff_t range_start = 0; | 2286 | loff_t range_start = 0; |
2291 | long pages_skipped = 0; | 2287 | struct inode *inode = mapping->host; |
2288 | int needed_blocks, ret = 0, nr_to_writebump = 0; | ||
2289 | long to_write, pages_skipped = 0; | ||
2290 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | ||
2292 | 2291 | ||
2293 | /* | 2292 | /* |
2294 | * No pages to write? This is mainly a kludge to avoid starting | 2293 | * No pages to write? This is mainly a kludge to avoid starting |
@@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2297 | */ | 2296 | */ |
2298 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 2297 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2299 | return 0; | 2298 | return 0; |
2299 | /* | ||
2300 | * Make sure nr_to_write is >= sbi->s_mb_stream_request | ||
2301 | * This make sure small files blocks are allocated in | ||
2302 | * single attempt. This ensure that small files | ||
2303 | * get less fragmented. | ||
2304 | */ | ||
2305 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { | ||
2306 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
2307 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
2308 | } | ||
2300 | 2309 | ||
2301 | if (!wbc->range_cyclic) | 2310 | if (!wbc->range_cyclic) |
2302 | /* | 2311 | /* |
@@ -2377,7 +2386,7 @@ restart_loop: | |||
2377 | } | 2386 | } |
2378 | 2387 | ||
2379 | out_writepages: | 2388 | out_writepages: |
2380 | wbc->nr_to_write = to_write; | 2389 | wbc->nr_to_write = to_write - nr_to_writebump; |
2381 | wbc->range_start = range_start; | 2390 | wbc->range_start = range_start; |
2382 | return ret; | 2391 | return ret; |
2383 | } | 2392 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..e0e3a5eb1ddb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3282 | } | 3282 | } |
3283 | 3283 | ||
3284 | /* | 3284 | /* |
3285 | * Return the prealloc space that have minimal distance | ||
3286 | * from the goal block. @cpa is the prealloc | ||
3287 | * space that is having currently known minimal distance | ||
3288 | * from the goal block. | ||
3289 | */ | ||
3290 | static struct ext4_prealloc_space * | ||
3291 | ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | ||
3292 | struct ext4_prealloc_space *pa, | ||
3293 | struct ext4_prealloc_space *cpa) | ||
3294 | { | ||
3295 | ext4_fsblk_t cur_distance, new_distance; | ||
3296 | |||
3297 | if (cpa == NULL) { | ||
3298 | atomic_inc(&pa->pa_count); | ||
3299 | return pa; | ||
3300 | } | ||
3301 | cur_distance = abs(goal_block - cpa->pa_pstart); | ||
3302 | new_distance = abs(goal_block - pa->pa_pstart); | ||
3303 | |||
3304 | if (cur_distance < new_distance) | ||
3305 | return cpa; | ||
3306 | |||
3307 | /* drop the previous reference */ | ||
3308 | atomic_dec(&cpa->pa_count); | ||
3309 | atomic_inc(&pa->pa_count); | ||
3310 | return pa; | ||
3311 | } | ||
3312 | |||
3313 | /* | ||
3285 | * search goal blocks in preallocated space | 3314 | * search goal blocks in preallocated space |
3286 | */ | 3315 | */ |
3287 | static noinline_for_stack int | 3316 | static noinline_for_stack int |
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3290 | int order, i; | 3319 | int order, i; |
3291 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3320 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
3292 | struct ext4_locality_group *lg; | 3321 | struct ext4_locality_group *lg; |
3293 | struct ext4_prealloc_space *pa; | 3322 | struct ext4_prealloc_space *pa, *cpa = NULL; |
3323 | ext4_fsblk_t goal_block; | ||
3294 | 3324 | ||
3295 | /* only data can be preallocated */ | 3325 | /* only data can be preallocated */ |
3296 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 3326 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3333 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3363 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3334 | order = PREALLOC_TB_SIZE - 1; | 3364 | order = PREALLOC_TB_SIZE - 1; |
3335 | 3365 | ||
3366 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | ||
3367 | ac->ac_g_ex.fe_start + | ||
3368 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3369 | /* | ||
3370 | * search for the prealloc space that is having | ||
3371 | * minimal distance from the goal block. | ||
3372 | */ | ||
3336 | for (i = order; i < PREALLOC_TB_SIZE; i++) { | 3373 | for (i = order; i < PREALLOC_TB_SIZE; i++) { |
3337 | rcu_read_lock(); | 3374 | rcu_read_lock(); |
3338 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], | 3375 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], |
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3340 | spin_lock(&pa->pa_lock); | 3377 | spin_lock(&pa->pa_lock); |
3341 | if (pa->pa_deleted == 0 && | 3378 | if (pa->pa_deleted == 0 && |
3342 | pa->pa_free >= ac->ac_o_ex.fe_len) { | 3379 | pa->pa_free >= ac->ac_o_ex.fe_len) { |
3343 | atomic_inc(&pa->pa_count); | 3380 | |
3344 | ext4_mb_use_group_pa(ac, pa); | 3381 | cpa = ext4_mb_check_group_pa(goal_block, |
3345 | spin_unlock(&pa->pa_lock); | 3382 | pa, cpa); |
3346 | ac->ac_criteria = 20; | ||
3347 | rcu_read_unlock(); | ||
3348 | return 1; | ||
3349 | } | 3383 | } |
3350 | spin_unlock(&pa->pa_lock); | 3384 | spin_unlock(&pa->pa_lock); |
3351 | } | 3385 | } |
3352 | rcu_read_unlock(); | 3386 | rcu_read_unlock(); |
3353 | } | 3387 | } |
3388 | if (cpa) { | ||
3389 | ext4_mb_use_group_pa(ac, cpa); | ||
3390 | ac->ac_criteria = 20; | ||
3391 | return 1; | ||
3392 | } | ||
3354 | return 0; | 3393 | return 0; |
3355 | } | 3394 | } |
3356 | 3395 | ||