diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 117 |
1 files changed, 29 insertions, 88 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b0d6022eaa67..c4c430977622 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -46,22 +46,23 @@ | |||
46 | * The allocation request involve request for multiple number of blocks | 46 | * The allocation request involve request for multiple number of blocks |
47 | * near to the goal(block) value specified. | 47 | * near to the goal(block) value specified. |
48 | * | 48 | * |
49 | * During initialization phase of the allocator we decide to use the group | 49 | * During initialization phase of the allocator we decide to use the |
50 | * preallocation or inode preallocation depending on the size file. The | 50 | * group preallocation or inode preallocation depending on the size of |
51 | * size of the file could be the resulting file size we would have after | 51 | * the file. The size of the file could be the resulting file size we |
52 | * allocation or the current file size which ever is larger. If the size is | 52 | * would have after allocation, or the current file size, which ever |
53 | * less that sbi->s_mb_stream_request we select the group | 53 | * is larger. If the size is less than sbi->s_mb_stream_request we |
54 | * preallocation. The default value of s_mb_stream_request is 16 | 54 | * select to use the group preallocation. The default value of |
55 | * blocks. This can also be tuned via | 55 | * s_mb_stream_request is 16 blocks. This can also be tuned via |
56 | * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms | 56 | * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in |
57 | * of number of blocks. | 57 | * terms of number of blocks. |
58 | * | 58 | * |
59 | * The main motivation for having small file use group preallocation is to | 59 | * The main motivation for having small file use group preallocation is to |
60 | * ensure that we have small file closer in the disk. | 60 | * ensure that we have small files closer together on the disk. |
61 | * | 61 | * |
62 | * First stage the allocator looks at the inode prealloc list | 62 | * First stage the allocator looks at the inode prealloc list, |
63 | * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for | 63 | * ext4_inode_info->i_prealloc_list, which contains list of prealloc |
64 | * this particular inode. The inode prealloc space is represented as: | 64 | * spaces for this particular inode. The inode prealloc space is |
65 | * represented as: | ||
65 | * | 66 | * |
66 | * pa_lstart -> the logical start block for this prealloc space | 67 | * pa_lstart -> the logical start block for this prealloc space |
67 | * pa_pstart -> the physical start block for this prealloc space | 68 | * pa_pstart -> the physical start block for this prealloc space |
@@ -121,29 +122,29 @@ | |||
121 | * list. In case of inode preallocation we follow a list of heuristics | 122 | * list. In case of inode preallocation we follow a list of heuristics |
122 | * based on file size. This can be found in ext4_mb_normalize_request. If | 123 | * based on file size. This can be found in ext4_mb_normalize_request. If |
123 | * we are doing a group prealloc we try to normalize the request to | 124 | * we are doing a group prealloc we try to normalize the request to |
124 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to | 125 | * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is |
125 | * 512 blocks. This can be tuned via | 126 | * 512 blocks. This can be tuned via |
126 | * /proc/fs/ext4/<partition/group_prealloc. The value is represented in | 127 | * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in |
127 | * terms of number of blocks. If we have mounted the file system with -O | 128 | * terms of number of blocks. If we have mounted the file system with -O |
128 | * stripe=<value> option the group prealloc request is normalized to the | 129 | * stripe=<value> option the group prealloc request is normalized to the |
129 | * stripe value (sbi->s_stripe) | 130 | * stripe value (sbi->s_stripe) |
130 | * | 131 | * |
131 | * The regular allocator(using the buddy cache) support few tunables. | 132 | * The regular allocator(using the buddy cache) supports few tunables. |
132 | * | 133 | * |
133 | * /proc/fs/ext4/<partition>/min_to_scan | 134 | * /sys/fs/ext4/<partition>/mb_min_to_scan |
134 | * /proc/fs/ext4/<partition>/max_to_scan | 135 | * /sys/fs/ext4/<partition>/mb_max_to_scan |
135 | * /proc/fs/ext4/<partition>/order2_req | 136 | * /sys/fs/ext4/<partition>/mb_order2_req |
136 | * | 137 | * |
137 | * The regular allocator use buddy scan only if the request len is power of | 138 | * The regular allocator uses buddy scan only if the request len is power of |
138 | * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The | 139 | * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The |
139 | * value of s_mb_order2_reqs can be tuned via | 140 | * value of s_mb_order2_reqs can be tuned via |
140 | * /proc/fs/ext4/<partition>/order2_req. If the request len is equal to | 141 | * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to |
141 | * stripe size (sbi->s_stripe), we try to search for contigous block in | 142 | * stripe size (sbi->s_stripe), we try to search for contigous block in |
142 | * stripe size. This should result in better allocation on RAID setup. If | 143 | * stripe size. This should result in better allocation on RAID setups. If |
143 | * not we search in the specific group using bitmap for best extents. The | 144 | * not, we search in the specific group using bitmap for best extents. The |
144 | * tunable min_to_scan and max_to_scan controll the behaviour here. | 145 | * tunable min_to_scan and max_to_scan control the behaviour here. |
145 | * min_to_scan indicate how long the mballoc __must__ look for a best | 146 | * min_to_scan indicate how long the mballoc __must__ look for a best |
146 | * extent and max_to_scanindicate how long the mballoc __can__ look for a | 147 | * extent and max_to_scan indicates how long the mballoc __can__ look for a |
147 | * best extent in the found extents. Searching for the blocks starts with | 148 | * best extent in the found extents. Searching for the blocks starts with |
148 | * the group specified as the goal value in allocation context via | 149 | * the group specified as the goal value in allocation context via |
149 | * ac_g_ex. Each group is first checked based on the criteria whether it | 150 | * ac_g_ex. Each group is first checked based on the criteria whether it |
@@ -337,8 +338,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
337 | ext4_group_t group); | 338 | ext4_group_t group); |
338 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 339 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
339 | ext4_group_t group); | 340 | ext4_group_t group); |
340 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | ||
341 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | ||
342 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | 341 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); |
343 | 342 | ||
344 | 343 | ||
@@ -1978,7 +1977,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1978 | /* | 1977 | /* |
1979 | * We search using buddy data only if the order of the request | 1978 | * We search using buddy data only if the order of the request |
1980 | * is greater than equal to the sbi_s_mb_order2_reqs | 1979 | * is greater than equal to the sbi_s_mb_order2_reqs |
1981 | * You can tune it via /proc/fs/ext4/<partition>/order2_req | 1980 | * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req |
1982 | */ | 1981 | */ |
1983 | if (i >= sbi->s_mb_order2_reqs) { | 1982 | if (i >= sbi->s_mb_order2_reqs) { |
1984 | /* | 1983 | /* |
@@ -2753,7 +2752,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2753 | spin_lock_init(&lg->lg_prealloc_lock); | 2752 | spin_lock_init(&lg->lg_prealloc_lock); |
2754 | } | 2753 | } |
2755 | 2754 | ||
2756 | ext4_mb_init_per_dev_proc(sb); | ||
2757 | ext4_mb_history_init(sb); | 2755 | ext4_mb_history_init(sb); |
2758 | 2756 | ||
2759 | if (sbi->s_journal) | 2757 | if (sbi->s_journal) |
@@ -2836,7 +2834,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2836 | 2834 | ||
2837 | free_percpu(sbi->s_locality_groups); | 2835 | free_percpu(sbi->s_locality_groups); |
2838 | ext4_mb_history_release(sb); | 2836 | ext4_mb_history_release(sb); |
2839 | ext4_mb_destroy_per_dev_proc(sb); | ||
2840 | 2837 | ||
2841 | return 0; | 2838 | return 0; |
2842 | } | 2839 | } |
@@ -2897,62 +2894,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2897 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2894 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
2898 | } | 2895 | } |
2899 | 2896 | ||
2900 | #define EXT4_MB_STATS_NAME "stats" | ||
2901 | #define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" | ||
2902 | #define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" | ||
2903 | #define EXT4_MB_ORDER2_REQ "order2_req" | ||
2904 | #define EXT4_MB_STREAM_REQ "stream_req" | ||
2905 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | ||
2906 | |||
2907 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | ||
2908 | { | ||
2909 | #ifdef CONFIG_PROC_FS | ||
2910 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | ||
2911 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2912 | struct proc_dir_entry *proc; | ||
2913 | |||
2914 | if (sbi->s_proc == NULL) | ||
2915 | return -EINVAL; | ||
2916 | |||
2917 | EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); | ||
2918 | EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); | ||
2919 | EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); | ||
2920 | EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); | ||
2921 | EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); | ||
2922 | EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); | ||
2923 | return 0; | ||
2924 | |||
2925 | err_out: | ||
2926 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); | ||
2927 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); | ||
2928 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); | ||
2929 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); | ||
2930 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); | ||
2931 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); | ||
2932 | return -ENOMEM; | ||
2933 | #else | ||
2934 | return 0; | ||
2935 | #endif | ||
2936 | } | ||
2937 | |||
2938 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | ||
2939 | { | ||
2940 | #ifdef CONFIG_PROC_FS | ||
2941 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2942 | |||
2943 | if (sbi->s_proc == NULL) | ||
2944 | return -EINVAL; | ||
2945 | |||
2946 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); | ||
2947 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); | ||
2948 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); | ||
2949 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); | ||
2950 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); | ||
2951 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); | ||
2952 | #endif | ||
2953 | return 0; | ||
2954 | } | ||
2955 | |||
2956 | int __init init_ext4_mballoc(void) | 2897 | int __init init_ext4_mballoc(void) |
2957 | { | 2898 | { |
2958 | ext4_pspace_cachep = | 2899 | ext4_pspace_cachep = |
@@ -3123,7 +3064,7 @@ out_err: | |||
3123 | * here we normalize request for locality group | 3064 | * here we normalize request for locality group |
3124 | * Group request are normalized to s_strip size if we set the same via mount | 3065 | * Group request are normalized to s_strip size if we set the same via mount |
3125 | * option. If not we set it to s_mb_group_prealloc which can be configured via | 3066 | * option. If not we set it to s_mb_group_prealloc which can be configured via |
3126 | * /proc/fs/ext4/<partition>/group_prealloc | 3067 | * /sys/fs/ext4/<partition>/mb_group_prealloc |
3127 | * | 3068 | * |
3128 | * XXX: should we try to preallocate more than the group has now? | 3069 | * XXX: should we try to preallocate more than the group has now? |
3129 | */ | 3070 | */ |
@@ -4239,7 +4180,7 @@ static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4239 | * file is determined by the current size or the resulting size after | 4180 | * file is determined by the current size or the resulting size after |
4240 | * allocation which ever is larger | 4181 | * allocation which ever is larger |
4241 | * | 4182 | * |
4242 | * One can tune this size via /proc/fs/ext4/<partition>/stream_req | 4183 | * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req |
4243 | */ | 4184 | */ |
4244 | static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | 4185 | static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) |
4245 | { | 4186 | { |