aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/proc.txt21
-rw-r--r--fs/ext4/ext4.h16
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/mballoc.c117
-rw-r--r--fs/ext4/super.c46
5 files changed, 30 insertions, 177 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 830bad7cce0f..efc4fd9f40ce 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -940,27 +940,6 @@ Table 1-10: Files in /proc/fs/ext4/<devname>
940 File Content 940 File Content
941 mb_groups details of multiblock allocator buddy cache of free blocks 941 mb_groups details of multiblock allocator buddy cache of free blocks
942 mb_history multiblock allocation history 942 mb_history multiblock allocation history
943 stats controls whether the multiblock allocator should start
944 collecting statistics, which are shown during the unmount
945 group_prealloc the multiblock allocator will round up allocation
946 requests to a multiple of this tuning parameter if the
947 stripe size is not set in the ext4 superblock
948 max_to_scan The maximum number of extents the multiblock allocator
949 will search to find the best extent
950 min_to_scan The minimum number of extents the multiblock allocator
951 will search to find the best extent
952 order2_req Tuning parameter which controls the minimum size for
953 requests (as a power of 2) where the buddy cache is
954 used
955 stream_req Files which have fewer blocks than this tunable
956 parameter will have their blocks allocated out of a
957 block group specific preallocation pool, so that small
958 files are packed closely together. Each large file
959 will have its blocks allocated out of its own unique
960 preallocation pool.
961inode_readahead Tuning parameter which controls the maximum number of
962 inode table blocks that ext4's inode table readahead
963 algorithm will pre-read into the buffer cache
964.............................................................................. 943..............................................................................
965 944
966 945
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0bd39188531c..e5c273ff928b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -976,22 +976,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
976 976
977extern struct proc_dir_entry *ext4_proc_root; 977extern struct proc_dir_entry *ext4_proc_root;
978 978
979#ifdef CONFIG_PROC_FS
980extern const struct file_operations ext4_ui_proc_fops;
981
982#define EXT4_PROC_HANDLER(name, var) \
983do { \
984 proc = proc_create_data(name, mode, sbi->s_proc, \
985 &ext4_ui_proc_fops, &sbi->s_##var); \
986 if (proc == NULL) { \
987 printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
988 goto err_out; \
989 } \
990} while (0)
991#else
992#define EXT4_PROC_HANDLER(name, var)
993#endif
994
995/* 979/*
996 * Function prototypes 980 * Function prototypes
997 */ 981 */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7dcac9d7e491..d3118d1acc39 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4153,12 +4153,7 @@ make_io:
4153 unsigned num; 4153 unsigned num;
4154 4154
4155 table = ext4_inode_table(sb, gdp); 4155 table = ext4_inode_table(sb, gdp);
4156 /* Make sure s_inode_readahead_blks is a power of 2 */ 4156 /* s_inode_readahead_blks is always a power of 2 */
4157 while (EXT4_SB(sb)->s_inode_readahead_blks &
4158 (EXT4_SB(sb)->s_inode_readahead_blks-1))
4159 EXT4_SB(sb)->s_inode_readahead_blks =
4160 (EXT4_SB(sb)->s_inode_readahead_blks &
4161 (EXT4_SB(sb)->s_inode_readahead_blks-1));
4162 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); 4157 b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
4163 if (table > b) 4158 if (table > b)
4164 b = table; 4159 b = table;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b0d6022eaa67..c4c430977622 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -46,22 +46,23 @@
46 * The allocation request involve request for multiple number of blocks 46 * The allocation request involve request for multiple number of blocks
47 * near to the goal(block) value specified. 47 * near to the goal(block) value specified.
48 * 48 *
49 * During initialization phase of the allocator we decide to use the group 49 * During initialization phase of the allocator we decide to use the
50 * preallocation or inode preallocation depending on the size file. The 50 * group preallocation or inode preallocation depending on the size of
51 * size of the file could be the resulting file size we would have after 51 * the file. The size of the file could be the resulting file size we
52 * allocation or the current file size which ever is larger. If the size is 52 * would have after allocation, or the current file size, which ever
53 * less that sbi->s_mb_stream_request we select the group 53 * is larger. If the size is less than sbi->s_mb_stream_request we
54 * preallocation. The default value of s_mb_stream_request is 16 54 * select to use the group preallocation. The default value of
55 * blocks. This can also be tuned via 55 * s_mb_stream_request is 16 blocks. This can also be tuned via
56 * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms 56 * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
57 * of number of blocks. 57 * terms of number of blocks.
58 * 58 *
59 * The main motivation for having small file use group preallocation is to 59 * The main motivation for having small file use group preallocation is to
60 * ensure that we have small file closer in the disk. 60 * ensure that we have small files closer together on the disk.
61 * 61 *
62 * First stage the allocator looks at the inode prealloc list 62 * First stage the allocator looks at the inode prealloc list,
63 * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for 63 * ext4_inode_info->i_prealloc_list, which contains list of prealloc
64 * this particular inode. The inode prealloc space is represented as: 64 * spaces for this particular inode. The inode prealloc space is
65 * represented as:
65 * 66 *
66 * pa_lstart -> the logical start block for this prealloc space 67 * pa_lstart -> the logical start block for this prealloc space
67 * pa_pstart -> the physical start block for this prealloc space 68 * pa_pstart -> the physical start block for this prealloc space
@@ -121,29 +122,29 @@
121 * list. In case of inode preallocation we follow a list of heuristics 122 * list. In case of inode preallocation we follow a list of heuristics
122 * based on file size. This can be found in ext4_mb_normalize_request. If 123 * based on file size. This can be found in ext4_mb_normalize_request. If
123 * we are doing a group prealloc we try to normalize the request to 124 * we are doing a group prealloc we try to normalize the request to
124 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to 125 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
125 * 512 blocks. This can be tuned via 126 * 512 blocks. This can be tuned via
126 * /proc/fs/ext4/<partition/group_prealloc. The value is represented in 127 * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
127 * terms of number of blocks. If we have mounted the file system with -O 128 * terms of number of blocks. If we have mounted the file system with -O
128 * stripe=<value> option the group prealloc request is normalized to the 129 * stripe=<value> option the group prealloc request is normalized to the
129 * stripe value (sbi->s_stripe) 130 * stripe value (sbi->s_stripe)
130 * 131 *
131 * The regular allocator(using the buddy cache) support few tunables. 132 * The regular allocator(using the buddy cache) supports few tunables.
132 * 133 *
133 * /proc/fs/ext4/<partition>/min_to_scan 134 * /sys/fs/ext4/<partition>/mb_min_to_scan
134 * /proc/fs/ext4/<partition>/max_to_scan 135 * /sys/fs/ext4/<partition>/mb_max_to_scan
135 * /proc/fs/ext4/<partition>/order2_req 136 * /sys/fs/ext4/<partition>/mb_order2_req
136 * 137 *
137 * The regular allocator use buddy scan only if the request len is power of 138 * The regular allocator uses buddy scan only if the request len is power of
138 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 139 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
139 * value of s_mb_order2_reqs can be tuned via 140 * value of s_mb_order2_reqs can be tuned via
140 * /proc/fs/ext4/<partition>/order2_req. If the request len is equal to 141 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
141 * stripe size (sbi->s_stripe), we try to search for contigous block in 142 * stripe size (sbi->s_stripe), we try to search for contigous block in
142 * stripe size. This should result in better allocation on RAID setup. If 143 * stripe size. This should result in better allocation on RAID setups. If
143 * not we search in the specific group using bitmap for best extents. The 144 * not, we search in the specific group using bitmap for best extents. The
144 * tunable min_to_scan and max_to_scan controll the behaviour here. 145 * tunable min_to_scan and max_to_scan control the behaviour here.
145 * min_to_scan indicate how long the mballoc __must__ look for a best 146 * min_to_scan indicate how long the mballoc __must__ look for a best
146 * extent and max_to_scanindicate how long the mballoc __can__ look for a 147 * extent and max_to_scan indicates how long the mballoc __can__ look for a
147 * best extent in the found extents. Searching for the blocks starts with 148 * best extent in the found extents. Searching for the blocks starts with
148 * the group specified as the goal value in allocation context via 149 * the group specified as the goal value in allocation context via
149 * ac_g_ex. Each group is first checked based on the criteria whether it 150 * ac_g_ex. Each group is first checked based on the criteria whether it
@@ -337,8 +338,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
337 ext4_group_t group); 338 ext4_group_t group);
338static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 339static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
339 ext4_group_t group); 340 ext4_group_t group);
340static int ext4_mb_init_per_dev_proc(struct super_block *sb);
341static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
342static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 341static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
343 342
344 343
@@ -1978,7 +1977,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1978 /* 1977 /*
1979 * We search using buddy data only if the order of the request 1978 * We search using buddy data only if the order of the request
1980 * is greater than equal to the sbi_s_mb_order2_reqs 1979 * is greater than equal to the sbi_s_mb_order2_reqs
1981 * You can tune it via /proc/fs/ext4/<partition>/order2_req 1980 * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
1982 */ 1981 */
1983 if (i >= sbi->s_mb_order2_reqs) { 1982 if (i >= sbi->s_mb_order2_reqs) {
1984 /* 1983 /*
@@ -2753,7 +2752,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2753 spin_lock_init(&lg->lg_prealloc_lock); 2752 spin_lock_init(&lg->lg_prealloc_lock);
2754 } 2753 }
2755 2754
2756 ext4_mb_init_per_dev_proc(sb);
2757 ext4_mb_history_init(sb); 2755 ext4_mb_history_init(sb);
2758 2756
2759 if (sbi->s_journal) 2757 if (sbi->s_journal)
@@ -2836,7 +2834,6 @@ int ext4_mb_release(struct super_block *sb)
2836 2834
2837 free_percpu(sbi->s_locality_groups); 2835 free_percpu(sbi->s_locality_groups);
2838 ext4_mb_history_release(sb); 2836 ext4_mb_history_release(sb);
2839 ext4_mb_destroy_per_dev_proc(sb);
2840 2837
2841 return 0; 2838 return 0;
2842} 2839}
@@ -2897,62 +2894,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2897 mb_debug("freed %u blocks in %u structures\n", count, count2); 2894 mb_debug("freed %u blocks in %u structures\n", count, count2);
2898} 2895}
2899 2896
2900#define EXT4_MB_STATS_NAME "stats"
2901#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2902#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
2903#define EXT4_MB_ORDER2_REQ "order2_req"
2904#define EXT4_MB_STREAM_REQ "stream_req"
2905#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
2906
2907static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2908{
2909#ifdef CONFIG_PROC_FS
2910 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2911 struct ext4_sb_info *sbi = EXT4_SB(sb);
2912 struct proc_dir_entry *proc;
2913
2914 if (sbi->s_proc == NULL)
2915 return -EINVAL;
2916
2917 EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
2918 EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
2919 EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
2920 EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
2921 EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
2922 EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
2923 return 0;
2924
2925err_out:
2926 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2927 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2928 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2929 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2930 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2931 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2932 return -ENOMEM;
2933#else
2934 return 0;
2935#endif
2936}
2937
2938static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2939{
2940#ifdef CONFIG_PROC_FS
2941 struct ext4_sb_info *sbi = EXT4_SB(sb);
2942
2943 if (sbi->s_proc == NULL)
2944 return -EINVAL;
2945
2946 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2947 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2948 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2949 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2950 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2951 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2952#endif
2953 return 0;
2954}
2955
2956int __init init_ext4_mballoc(void) 2897int __init init_ext4_mballoc(void)
2957{ 2898{
2958 ext4_pspace_cachep = 2899 ext4_pspace_cachep =
@@ -3123,7 +3064,7 @@ out_err:
3123 * here we normalize request for locality group 3064 * here we normalize request for locality group
3124 * Group request are normalized to s_strip size if we set the same via mount 3065 * Group request are normalized to s_strip size if we set the same via mount
3125 * option. If not we set it to s_mb_group_prealloc which can be configured via 3066 * option. If not we set it to s_mb_group_prealloc which can be configured via
3126 * /proc/fs/ext4/<partition>/group_prealloc 3067 * /sys/fs/ext4/<partition>/mb_group_prealloc
3127 * 3068 *
3128 * XXX: should we try to preallocate more than the group has now? 3069 * XXX: should we try to preallocate more than the group has now?
3129 */ 3070 */
@@ -4239,7 +4180,7 @@ static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4239 * file is determined by the current size or the resulting size after 4180 * file is determined by the current size or the resulting size after
4240 * allocation which ever is larger 4181 * allocation which ever is larger
4241 * 4182 *
4242 * One can tune this size via /proc/fs/ext4/<partition>/stream_req 4183 * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
4243 */ 4184 */
4244static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) 4185static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4245{ 4186{
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2883d4318c22..1ec554cc107a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -579,7 +579,6 @@ static void ext4_put_super(struct super_block *sb)
579 ext4_commit_super(sb, es, 1); 579 ext4_commit_super(sb, es, 1);
580 } 580 }
581 if (sbi->s_proc) { 581 if (sbi->s_proc) {
582 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
583 remove_proc_entry(sb->s_id, ext4_proc_root); 582 remove_proc_entry(sb->s_id, ext4_proc_root);
584 } 583 }
585 kobject_del(&sbi->s_kobj); 584 kobject_del(&sbi->s_kobj);
@@ -2529,11 +2528,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2529#ifdef CONFIG_PROC_FS 2528#ifdef CONFIG_PROC_FS
2530 if (ext4_proc_root) 2529 if (ext4_proc_root)
2531 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2530 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2532
2533 if (sbi->s_proc)
2534 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2535 &ext4_ui_proc_fops,
2536 &sbi->s_inode_readahead_blks);
2537#endif 2531#endif
2538 2532
2539 bgl_lock_init(sbi->s_blockgroup_lock); 2533 bgl_lock_init(sbi->s_blockgroup_lock);
@@ -2832,7 +2826,6 @@ failed_mount2:
2832 kfree(sbi->s_group_desc); 2826 kfree(sbi->s_group_desc);
2833failed_mount: 2827failed_mount:
2834 if (sbi->s_proc) { 2828 if (sbi->s_proc) {
2835 remove_proc_entry("inode_readahead_blks", sbi->s_proc);
2836 remove_proc_entry(sb->s_id, ext4_proc_root); 2829 remove_proc_entry(sb->s_id, ext4_proc_root);
2837 } 2830 }
2838#ifdef CONFIG_QUOTA 2831#ifdef CONFIG_QUOTA
@@ -3865,45 +3858,6 @@ static int ext4_get_sb(struct file_system_type *fs_type,
3865 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); 3858 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3866} 3859}
3867 3860
3868#ifdef CONFIG_PROC_FS
3869static int ext4_ui_proc_show(struct seq_file *m, void *v)
3870{
3871 unsigned int *p = m->private;
3872
3873 seq_printf(m, "%u\n", *p);
3874 return 0;
3875}
3876
3877static int ext4_ui_proc_open(struct inode *inode, struct file *file)
3878{
3879 return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
3880}
3881
3882static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
3883 size_t cnt, loff_t *ppos)
3884{
3885 unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
3886 char str[32];
3887
3888 if (cnt >= sizeof(str))
3889 return -EINVAL;
3890 if (copy_from_user(str, buf, cnt))
3891 return -EFAULT;
3892
3893 *p = simple_strtoul(str, NULL, 0);
3894 return cnt;
3895}
3896
3897const struct file_operations ext4_ui_proc_fops = {
3898 .owner = THIS_MODULE,
3899 .open = ext4_ui_proc_open,
3900 .read = seq_read,
3901 .llseek = seq_lseek,
3902 .release = single_release,
3903 .write = ext4_ui_proc_write,
3904};
3905#endif
3906
3907static struct file_system_type ext4_fs_type = { 3861static struct file_system_type ext4_fs_type = {
3908 .owner = THIS_MODULE, 3862 .owner = THIS_MODULE,
3909 .name = "ext4", 3863 .name = "ext4",