aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Dilger <adilger@sun.com>2009-06-13 11:45:35 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-06-13 11:45:35 -0400
commit11013911daea4820147ae6d7094dd7c6894e8651 (patch)
treef8e9f9d2058e0dfc826d4df3d88a7eb18a56b3f3
parentf157a4aa98a18bd3817a72bea90d48494e2586e7 (diff)
ext4: teach the inode allocator to use a goal inode number
Enhance the inode allocator to take a goal inode number as a paremeter; if it is specified, it takes precedence over Orlov or parent directory inode allocation algorithms. The extents migration function uses the goal inode number so that the extent trees allocated the migration function use the correct flex_bg. In the future, the goal inode functionality will also be used to allocate an adjacent inode for the extended attributes. Also, for testing purposes the goal inode number can be specified via /sys/fs/{dev}/inode_goal. This can be useful for testing inode allocation beyond 2^32 blocks on very large filesystems. Signed-off-by: Andreas Dilger <adilger@sun.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext410
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ialloc.c16
-rw-r--r--fs/ext4/migrate.c5
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/super.c2
6 files changed, 36 insertions, 10 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index 4e79074de282..5fb709997d96 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -79,3 +79,13 @@ Description:
79 This file is read-only and shows the number of 79 This file is read-only and shows the number of
80 kilobytes of data that have been written to this 80 kilobytes of data that have been written to this
81 filesystem since it was mounted. 81 filesystem since it was mounted.
82
83What: /sys/fs/ext4/<disk>/inode_goal
84Date: June 2008
85Contact: "Theodore Ts'o" <tytso@mit.edu>
86Description:
87 Tuning parameter which (if non-zero) controls the goal
88 inode used by the inode allocator in p0reference to
89 all other allocation hueristics. This is intended for
90 debugging use only, and should be 0 on production
91 systems.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d035cf149e0e..746cdcba969d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -863,6 +863,7 @@ struct ext4_sb_info {
863 int s_inode_size; 863 int s_inode_size;
864 int s_first_ino; 864 int s_first_ino;
865 unsigned int s_inode_readahead_blks; 865 unsigned int s_inode_readahead_blks;
866 unsigned int s_inode_goal;
866 spinlock_t s_next_gen_lock; 867 spinlock_t s_next_gen_lock;
867 u32 s_next_generation; 868 u32 s_next_generation;
868 u32 s_hash_seed[4]; 869 u32 s_hash_seed[4];
@@ -1316,7 +1317,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1316 1317
1317/* ialloc.c */ 1318/* ialloc.c */
1318extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, 1319extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
1319 const struct qstr *qstr); 1320 const struct qstr *qstr, __u32 goal);
1320extern void ext4_free_inode(handle_t *, struct inode *); 1321extern void ext4_free_inode(handle_t *, struct inode *);
1321extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 1322extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1322extern unsigned long ext4_count_free_inodes(struct super_block *); 1323extern unsigned long ext4_count_free_inodes(struct super_block *);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3f98ee712ff4..2f645732e3b7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -799,7 +799,7 @@ err_ret:
799 * group to find a free inode. 799 * group to find a free inode.
800 */ 800 */
801struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, 801struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
802 const struct qstr *qstr) 802 const struct qstr *qstr, __u32 goal)
803{ 803{
804 struct super_block *sb; 804 struct super_block *sb;
805 struct buffer_head *inode_bitmap_bh = NULL; 805 struct buffer_head *inode_bitmap_bh = NULL;
@@ -830,6 +830,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
830 ei = EXT4_I(inode); 830 ei = EXT4_I(inode);
831 sbi = EXT4_SB(sb); 831 sbi = EXT4_SB(sb);
832 832
833 if (!goal)
834 goal = sbi->s_inode_goal;
835
836 if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) {
837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
839 ret2 = 0;
840 goto got_group;
841 }
842
833 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 843 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
834 ret2 = find_group_flex(sb, dir, &group); 844 ret2 = find_group_flex(sb, dir, &group);
835 if (ret2 == -1) { 845 if (ret2 == -1) {
@@ -858,7 +868,7 @@ got_group:
858 if (ret2 == -1) 868 if (ret2 == -1)
859 goto out; 869 goto out;
860 870
861 for (i = 0; i < ngroups; i++) { 871 for (i = 0; i < ngroups; i++, ino = 0) {
862 err = -EIO; 872 err = -EIO;
863 873
864 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 874 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -870,8 +880,6 @@ got_group:
870 if (!inode_bitmap_bh) 880 if (!inode_bitmap_bh)
871 goto fail; 881 goto fail;
872 882
873 ino = 0;
874
875repeat_in_this_group: 883repeat_in_this_group:
876 ino = ext4_find_next_zero_bit((unsigned long *) 884 ino = ext4_find_next_zero_bit((unsigned long *)
877 inode_bitmap_bh->b_data, 885 inode_bitmap_bh->b_data,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 80d075b8aeaf..313a50b39741 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -458,6 +458,7 @@ int ext4_ext_migrate(struct inode *inode)
458 struct inode *tmp_inode = NULL; 458 struct inode *tmp_inode = NULL;
459 struct list_blocks_struct lb; 459 struct list_blocks_struct lb;
460 unsigned long max_entries; 460 unsigned long max_entries;
461 __u32 goal;
461 462
462 /* 463 /*
463 * If the filesystem does not support extents, or the inode 464 * If the filesystem does not support extents, or the inode
@@ -483,8 +484,10 @@ int ext4_ext_migrate(struct inode *inode)
483 retval = PTR_ERR(handle); 484 retval = PTR_ERR(handle);
484 return retval; 485 return retval;
485 } 486 }
487 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
488 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
486 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 489 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
487 S_IFREG, 0); 490 S_IFREG, 0, goal);
488 if (IS_ERR(tmp_inode)) { 491 if (IS_ERR(tmp_inode)) {
489 retval = -ENOMEM; 492 retval = -ENOMEM;
490 ext4_journal_stop(handle); 493 ext4_journal_stop(handle);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5f00d2418a83..de04013d16ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1782,7 +1782,7 @@ retry:
1782 if (IS_DIRSYNC(dir)) 1782 if (IS_DIRSYNC(dir))
1783 ext4_handle_sync(handle); 1783 ext4_handle_sync(handle);
1784 1784
1785 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name); 1785 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1786 err = PTR_ERR(inode); 1786 err = PTR_ERR(inode);
1787 if (!IS_ERR(inode)) { 1787 if (!IS_ERR(inode)) {
1788 inode->i_op = &ext4_file_inode_operations; 1788 inode->i_op = &ext4_file_inode_operations;
@@ -1816,7 +1816,7 @@ retry:
1816 if (IS_DIRSYNC(dir)) 1816 if (IS_DIRSYNC(dir))
1817 ext4_handle_sync(handle); 1817 ext4_handle_sync(handle);
1818 1818
1819 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name); 1819 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
1820 err = PTR_ERR(inode); 1820 err = PTR_ERR(inode);
1821 if (!IS_ERR(inode)) { 1821 if (!IS_ERR(inode)) {
1822 init_special_inode(inode, inode->i_mode, rdev); 1822 init_special_inode(inode, inode->i_mode, rdev);
@@ -1853,7 +1853,8 @@ retry:
1853 if (IS_DIRSYNC(dir)) 1853 if (IS_DIRSYNC(dir))
1854 ext4_handle_sync(handle); 1854 ext4_handle_sync(handle);
1855 1855
1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode, &dentry->d_name); 1856 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1857 &dentry->d_name, 0);
1857 err = PTR_ERR(inode); 1858 err = PTR_ERR(inode);
1858 if (IS_ERR(inode)) 1859 if (IS_ERR(inode))
1859 goto out_stop; 1860 goto out_stop;
@@ -2264,7 +2265,8 @@ retry:
2264 if (IS_DIRSYNC(dir)) 2265 if (IS_DIRSYNC(dir))
2265 ext4_handle_sync(handle); 2266 ext4_handle_sync(handle);
2266 2267
2267 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name); 2268 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2269 &dentry->d_name, 0);
2268 err = PTR_ERR(inode); 2270 err = PTR_ERR(inode);
2269 if (IS_ERR(inode)) 2271 if (IS_ERR(inode))
2270 goto out_stop; 2272 goto out_stop;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 04486a53469f..23013d303f81 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2206,6 +2206,7 @@ EXT4_RO_ATTR(session_write_kbytes);
2206EXT4_RO_ATTR(lifetime_write_kbytes); 2206EXT4_RO_ATTR(lifetime_write_kbytes);
2207EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, 2207EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2208 inode_readahead_blks_store, s_inode_readahead_blks); 2208 inode_readahead_blks_store, s_inode_readahead_blks);
2209EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2209EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); 2210EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2210EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); 2211EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2211EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); 2212EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -2218,6 +2219,7 @@ static struct attribute *ext4_attrs[] = {
2218 ATTR_LIST(session_write_kbytes), 2219 ATTR_LIST(session_write_kbytes),
2219 ATTR_LIST(lifetime_write_kbytes), 2220 ATTR_LIST(lifetime_write_kbytes),
2220 ATTR_LIST(inode_readahead_blks), 2221 ATTR_LIST(inode_readahead_blks),
2222 ATTR_LIST(inode_goal),
2221 ATTR_LIST(mb_stats), 2223 ATTR_LIST(mb_stats),
2222 ATTR_LIST(mb_max_to_scan), 2224 ATTR_LIST(mb_max_to_scan),
2223 ATTR_LIST(mb_min_to_scan), 2225 ATTR_LIST(mb_min_to_scan),