aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-08-18 18:00:57 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-08-18 18:00:57 -0400
commit5e745b041f2ccad63077118b40468521306f3962 (patch)
tree2a4d53c884f92899ee8e4f541c32861a4a577e1d
parent91246c009094142f95ecc7573b7caed2bcef52c7 (diff)
ext4: Fix small file fragmentation
For small file block allocations, mballoc uses per cpu prealloc space. Use goal block when searching for the right prealloc space. Also make sure ext4_da_writepages tries to write all the pages for small files in single attempt Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/inode.c21
-rw-r--r--fs/ext4/mballoc.c53
2 files changed, 61 insertions, 13 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d1906d9a22de..7e91913e325b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2282static int ext4_da_writepages(struct address_space *mapping, 2282static int ext4_da_writepages(struct address_space *mapping,
2283 struct writeback_control *wbc) 2283 struct writeback_control *wbc)
2284{ 2284{
2285 struct inode *inode = mapping->host;
2286 handle_t *handle = NULL; 2285 handle_t *handle = NULL;
2287 int needed_blocks;
2288 int ret = 0;
2289 long to_write;
2290 loff_t range_start = 0; 2286 loff_t range_start = 0;
2291 long pages_skipped = 0; 2287 struct inode *inode = mapping->host;
2288 int needed_blocks, ret = 0, nr_to_writebump = 0;
2289 long to_write, pages_skipped = 0;
2290 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2292 2291
2293 /* 2292 /*
2294 * No pages to write? This is mainly a kludge to avoid starting 2293 * No pages to write? This is mainly a kludge to avoid starting
@@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping,
2297 */ 2296 */
2298 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 2297 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2299 return 0; 2298 return 0;
2299 /*
2300 * Make sure nr_to_write is >= sbi->s_mb_stream_request
2301 * This make sure small files blocks are allocated in
2302 * single attempt. This ensure that small files
2303 * get less fragmented.
2304 */
2305 if (wbc->nr_to_write < sbi->s_mb_stream_request) {
2306 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
2307 wbc->nr_to_write = sbi->s_mb_stream_request;
2308 }
2300 2309
2301 if (!wbc->range_cyclic) 2310 if (!wbc->range_cyclic)
2302 /* 2311 /*
@@ -2377,7 +2386,7 @@ restart_loop:
2377 } 2386 }
2378 2387
2379out_writepages: 2388out_writepages:
2380 wbc->nr_to_write = to_write; 2389 wbc->nr_to_write = to_write - nr_to_writebump;
2381 wbc->range_start = range_start; 2390 wbc->range_start = range_start;
2382 return ret; 2391 return ret;
2383} 2392}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 865e9ddb44d4..e0e3a5eb1ddb 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3282} 3282}
3283 3283
3284/* 3284/*
3285 * Return the prealloc space that have minimal distance
3286 * from the goal block. @cpa is the prealloc
3287 * space that is having currently known minimal distance
3288 * from the goal block.
3289 */
3290static struct ext4_prealloc_space *
3291ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3292 struct ext4_prealloc_space *pa,
3293 struct ext4_prealloc_space *cpa)
3294{
3295 ext4_fsblk_t cur_distance, new_distance;
3296
3297 if (cpa == NULL) {
3298 atomic_inc(&pa->pa_count);
3299 return pa;
3300 }
3301 cur_distance = abs(goal_block - cpa->pa_pstart);
3302 new_distance = abs(goal_block - pa->pa_pstart);
3303
3304 if (cur_distance < new_distance)
3305 return cpa;
3306
3307 /* drop the previous reference */
3308 atomic_dec(&cpa->pa_count);
3309 atomic_inc(&pa->pa_count);
3310 return pa;
3311}
3312
3313/*
3285 * search goal blocks in preallocated space 3314 * search goal blocks in preallocated space
3286 */ 3315 */
3287static noinline_for_stack int 3316static noinline_for_stack int
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3290 int order, i; 3319 int order, i;
3291 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3320 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3292 struct ext4_locality_group *lg; 3321 struct ext4_locality_group *lg;
3293 struct ext4_prealloc_space *pa; 3322 struct ext4_prealloc_space *pa, *cpa = NULL;
3323 ext4_fsblk_t goal_block;
3294 3324
3295 /* only data can be preallocated */ 3325 /* only data can be preallocated */
3296 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3326 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3333 /* The max size of hash table is PREALLOC_TB_SIZE */ 3363 /* The max size of hash table is PREALLOC_TB_SIZE */
3334 order = PREALLOC_TB_SIZE - 1; 3364 order = PREALLOC_TB_SIZE - 1;
3335 3365
3366 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
3367 ac->ac_g_ex.fe_start +
3368 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3369 /*
3370 * search for the prealloc space that is having
3371 * minimal distance from the goal block.
3372 */
3336 for (i = order; i < PREALLOC_TB_SIZE; i++) { 3373 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3337 rcu_read_lock(); 3374 rcu_read_lock();
3338 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], 3375 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3340 spin_lock(&pa->pa_lock); 3377 spin_lock(&pa->pa_lock);
3341 if (pa->pa_deleted == 0 && 3378 if (pa->pa_deleted == 0 &&
3342 pa->pa_free >= ac->ac_o_ex.fe_len) { 3379 pa->pa_free >= ac->ac_o_ex.fe_len) {
3343 atomic_inc(&pa->pa_count); 3380
3344 ext4_mb_use_group_pa(ac, pa); 3381 cpa = ext4_mb_check_group_pa(goal_block,
3345 spin_unlock(&pa->pa_lock); 3382 pa, cpa);
3346 ac->ac_criteria = 20;
3347 rcu_read_unlock();
3348 return 1;
3349 } 3383 }
3350 spin_unlock(&pa->pa_lock); 3384 spin_unlock(&pa->pa_lock);
3351 } 3385 }
3352 rcu_read_unlock(); 3386 rcu_read_unlock();
3353 } 3387 }
3388 if (cpa) {
3389 ext4_mb_use_group_pa(ac, cpa);
3390 ac->ac_criteria = 20;
3391 return 1;
3392 }
3354 return 0; 3393 return 0;
3355} 3394}
3356 3395