aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerie Clement <valerie.clement@bull.net>2008-05-13 19:31:14 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-05-13 19:31:14 -0400
commit1930479c4b6bbcb6f164a5b3498e0d98329967f4 (patch)
tree2f0185f4d4b30fea72a1b90e24de28e440ebce18
parent2c8be6b222f76c332d9faeb00c047996d340632c (diff)
ext4: mballoc fix mb_normalize_request algorithm for 1KB block size filesystems
In case of inode preallocation, the number of blocks to allocate depends on the file size and it is calculated in ext4_mb_normalize_request(). Each group in the filesystem is then checked to find one that can be used for allocation; this is done in ext4_mb_good_group(). When a file bigger than 4MB is created, the requested number of blocks to preallocate, calculated by ext4_mb_normalize_request is 4096. However for a filesystem with 1KB block size, the maximum size of the block buddies used by the multiblock allocator is 2048, so none of groups in the filesystem satisfies the search criteria in ext4_mb_good_group(). Scanning all the filesystem groups impacts performance. This was demonstrated by using a freshly created, 70GB, 1k block filesystem, with caches dropped write before the test via /proc/sys/vm/drop_caches, and with the filesystem mounted with nodelalloc and nodealloc,nomballoc. The time to write an 8 megabyte file using "dd if=/dev/zero of=/mnt/test/fo bs=8k count=1k conv=fsync" took 35.5091 seconds (236kB/s) with nodellaloc, and 0.233754 seconds (35.9 MB/s) with the nodelloc,nomballoc options. With a 1TB partition, it took several minutes to write 8MB! This patch modifies the algorithm in ext4_mb_normalize_group_request to calculate the number of blocks to allocate by taking into account the maximum size of free blocks chunks handled by the multiblock allocator. It has also been tested for filesystems with 2KB and 4KB block sizes to ensure that those cases don't regress. Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Valerie Clement <valerie.clement@bull.net> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/mballoc.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b128bdc0f55c..1d7fde994521 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2880,12 +2880,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2880 if (size < i_size_read(ac->ac_inode)) 2880 if (size < i_size_read(ac->ac_inode))
2881 size = i_size_read(ac->ac_inode); 2881 size = i_size_read(ac->ac_inode);
2882 2882
2883 /* max available blocks in a free group */ 2883 /* max size of free chunks */
2884 max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - 2884 max = 2 << bsbits;
2885 EXT4_SB(ac->ac_sb)->s_itb_per_group;
2886 2885
2887#define NRL_CHECK_SIZE(req, size, max,bits) \ 2886#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
2888 (req <= (size) || max <= ((size) >> bits)) 2887 (req <= (size) || max <= (chunk_size))
2889 2888
2890 /* first, try to predict filesize */ 2889 /* first, try to predict filesize */
2891 /* XXX: should this table be tunable? */ 2890 /* XXX: should this table be tunable? */
@@ -2904,16 +2903,16 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2904 size = 512 * 1024; 2903 size = 512 * 1024;
2905 } else if (size <= 1024 * 1024) { 2904 } else if (size <= 1024 * 1024) {
2906 size = 1024 * 1024; 2905 size = 1024 * 1024;
2907 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { 2906 } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
2908 start_off = ((loff_t)ac->ac_o_ex.fe_logical >> 2907 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2909 (20 - bsbits)) << 20; 2908 (21 - bsbits)) << 21;
2910 size = 1024 * 1024; 2909 size = 2 * 1024 * 1024;
2911 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { 2910 } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
2912 start_off = ((loff_t)ac->ac_o_ex.fe_logical >> 2911 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2913 (22 - bsbits)) << 22; 2912 (22 - bsbits)) << 22;
2914 size = 4 * 1024 * 1024; 2913 size = 4 * 1024 * 1024;
2915 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, 2914 } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
2916 (8<<20)>>bsbits, max, bsbits)) { 2915 (8<<20)>>bsbits, max, 8 * 1024)) {
2917 start_off = ((loff_t)ac->ac_o_ex.fe_logical >> 2916 start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
2918 (23 - bsbits)) << 23; 2917 (23 - bsbits)) << 23;
2919 size = 8 * 1024 * 1024; 2918 size = 8 * 1024 * 1024;