aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
authorJose R. Santos <jrs@us.ibm.com>2008-07-11 19:27:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-11 19:27:31 -0400
commit772cb7c83ba256a11c7bf99a11bef3858d23767c (patch)
treea42b97e5cbd870a76b2646c2dcb658a92c53f637 /fs/ext4/ialloc.c
parent736603ab297506f4396cb5af592004499950fcfd (diff)
ext4: New inode allocation for FLEX_BG meta-data groups.
This patch mostly controls the way inode are allocated in order to make ialloc aware of flex_bg block group grouping. It achieves this by bypassing the Orlov allocator when block group meta-data are packed toghether through mke2fs. Since the impact on the block allocator is minimal, this patch should have little or no effect on other block allocation algorithms. By controlling the inode allocation, it can basically control where the initial search for new block begins and thus indirectly manipulate the block allocator. This allocator favors data and meta-data locality so the disk will gradually be filled from block group zero upward. This helps improve performance by reducing seek time. Since the group of inode tables within one flex_bg are treated as one giant inode table, uninitialized block groups would not need to partially initialize as many inode table as with Orlov which would help fsck time as the filesystem usage goes up. Signed-off-by: Jose R. Santos <jrs@us.ibm.com> Signed-off-by: Valerie Clement <valerie.clement@bull.net> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c96
1 files changed, 96 insertions, 0 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b30cc79b9fcb..8b0a10acd708 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -157,6 +157,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
157 struct ext4_super_block * es; 157 struct ext4_super_block * es;
158 struct ext4_sb_info *sbi; 158 struct ext4_sb_info *sbi;
159 int fatal = 0, err; 159 int fatal = 0, err;
160 ext4_group_t flex_group;
160 161
161 if (atomic_read(&inode->i_count) > 1) { 162 if (atomic_read(&inode->i_count) > 1) {
162 printk ("ext4_free_inode: inode has count=%d\n", 163 printk ("ext4_free_inode: inode has count=%d\n",
@@ -232,6 +233,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
232 if (is_directory) 233 if (is_directory)
233 percpu_counter_dec(&sbi->s_dirs_counter); 234 percpu_counter_dec(&sbi->s_dirs_counter);
234 235
236 if (sbi->s_log_groups_per_flex) {
237 flex_group = ext4_flex_group(sbi, block_group);
238 spin_lock(sb_bgl_lock(sbi, flex_group));
239 sbi->s_flex_groups[flex_group].free_inodes++;
240 spin_unlock(sb_bgl_lock(sbi, flex_group));
241 }
235 } 242 }
236 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); 243 BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
237 err = ext4_journal_dirty_metadata(handle, bh2); 244 err = ext4_journal_dirty_metadata(handle, bh2);
@@ -286,6 +293,80 @@ static int find_group_dir(struct super_block *sb, struct inode *parent,
286 return ret; 293 return ret;
287} 294}
288 295
296#define free_block_ratio 10
297
298static int find_group_flex(struct super_block *sb, struct inode *parent,
299 ext4_group_t *best_group)
300{
301 struct ext4_sb_info *sbi = EXT4_SB(sb);
302 struct ext4_group_desc *desc;
303 struct buffer_head *bh;
304 struct flex_groups *flex_group = sbi->s_flex_groups;
305 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
306 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
307 ext4_group_t ngroups = sbi->s_groups_count;
308 int flex_size = ext4_flex_bg_size(sbi);
309 ext4_group_t best_flex = parent_fbg_group;
310 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
311 int flexbg_free_blocks;
312 int flex_freeb_ratio;
313 ext4_group_t n_fbg_groups;
314 ext4_group_t i;
315
316 n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
317 sbi->s_log_groups_per_flex;
318
319find_close_to_parent:
320 flexbg_free_blocks = flex_group[best_flex].free_blocks;
321 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
322 if (flex_group[best_flex].free_inodes &&
323 flex_freeb_ratio > free_block_ratio)
324 goto found_flexbg;
325
326 if (best_flex && best_flex == parent_fbg_group) {
327 best_flex--;
328 goto find_close_to_parent;
329 }
330
331 for (i = 0; i < n_fbg_groups; i++) {
332 if (i == parent_fbg_group || i == parent_fbg_group - 1)
333 continue;
334
335 flexbg_free_blocks = flex_group[i].free_blocks;
336 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
337
338 if (flex_freeb_ratio > free_block_ratio &&
339 flex_group[i].free_inodes) {
340 best_flex = i;
341 goto found_flexbg;
342 }
343
344 if (best_flex < 0 ||
345 (flex_group[i].free_blocks >
346 flex_group[best_flex].free_blocks &&
347 flex_group[i].free_inodes))
348 best_flex = i;
349 }
350
351 if (!flex_group[best_flex].free_inodes ||
352 !flex_group[best_flex].free_blocks)
353 return -1;
354
355found_flexbg:
356 for (i = best_flex * flex_size; i < ngroups &&
357 i < (best_flex + 1) * flex_size; i++) {
358 desc = ext4_get_group_desc(sb, i, &bh);
359 if (le16_to_cpu(desc->bg_free_inodes_count)) {
360 *best_group = i;
361 goto out;
362 }
363 }
364
365 return -1;
366out:
367 return 0;
368}
369
289/* 370/*
290 * Orlov's allocator for directories. 371 * Orlov's allocator for directories.
291 * 372 *
@@ -501,6 +582,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
501 struct inode *ret; 582 struct inode *ret;
502 ext4_group_t i; 583 ext4_group_t i;
503 int free = 0; 584 int free = 0;
585 ext4_group_t flex_group;
504 586
505 /* Cannot create files in a deleted directory */ 587 /* Cannot create files in a deleted directory */
506 if (!dir || !dir->i_nlink) 588 if (!dir || !dir->i_nlink)
@@ -514,6 +596,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
514 596
515 sbi = EXT4_SB(sb); 597 sbi = EXT4_SB(sb);
516 es = sbi->s_es; 598 es = sbi->s_es;
599
600 if (sbi->s_log_groups_per_flex) {
601 ret2 = find_group_flex(sb, dir, &group);
602 goto got_group;
603 }
604
517 if (S_ISDIR(mode)) { 605 if (S_ISDIR(mode)) {
518 if (test_opt (sb, OLDALLOC)) 606 if (test_opt (sb, OLDALLOC))
519 ret2 = find_group_dir(sb, dir, &group); 607 ret2 = find_group_dir(sb, dir, &group);
@@ -522,6 +610,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
522 } else 610 } else
523 ret2 = find_group_other(sb, dir, &group); 611 ret2 = find_group_other(sb, dir, &group);
524 612
613got_group:
525 err = -ENOSPC; 614 err = -ENOSPC;
526 if (ret2 == -1) 615 if (ret2 == -1)
527 goto out; 616 goto out;
@@ -676,6 +765,13 @@ got:
676 percpu_counter_inc(&sbi->s_dirs_counter); 765 percpu_counter_inc(&sbi->s_dirs_counter);
677 sb->s_dirt = 1; 766 sb->s_dirt = 1;
678 767
768 if (sbi->s_log_groups_per_flex) {
769 flex_group = ext4_flex_group(sbi, group);
770 spin_lock(sb_bgl_lock(sbi, flex_group));
771 sbi->s_flex_groups[flex_group].free_inodes--;
772 spin_unlock(sb_bgl_lock(sbi, flex_group));
773 }
774
679 inode->i_uid = current->fsuid; 775 inode->i_uid = current->fsuid;
680 if (test_opt (sb, GRPID)) 776 if (test_opt (sb, GRPID))
681 inode->i_gid = dir->i_gid; 777 inode->i_gid = dir->i_gid;