aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2017-11-08 22:23:20 -0500
committerTheodore Ts'o <tytso@mit.edu>2017-11-08 22:23:20 -0500
commit232530680290ba94ca37852ab10d9556ea28badf (patch)
treecf23ba2b6c563adc685f3b9fee830ac7d202546c
parentd77147ff443b255d82c907a632c825b2cc610b10 (diff)
ext4: improve smp scalability for inode generation
->s_next_generation is protected by s_next_gen_lock but its usage pattern is very primitive. We don't actually need sequentially increasing new generation numbers, so let's use prandom_u32() instead. Reported-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/ioctl.c8
-rw-r--r--fs/ext4/super.c2
4 files changed, 4 insertions, 12 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 53ce95b52fd8..5e6d7b6f50c7 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1355,8 +1355,6 @@ struct ext4_sb_info {
1355 int s_first_ino; 1355 int s_first_ino;
1356 unsigned int s_inode_readahead_blks; 1356 unsigned int s_inode_readahead_blks;
1357 unsigned int s_inode_goal; 1357 unsigned int s_inode_goal;
1358 spinlock_t s_next_gen_lock;
1359 u32 s_next_generation;
1360 u32 s_hash_seed[4]; 1358 u32 s_hash_seed[4];
1361 int s_def_hash_version; 1359 int s_def_hash_version;
1362 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ 1360 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ee823022aa34..da79eb5dba40 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1138,9 +1138,7 @@ got:
1138 inode->i_ino); 1138 inode->i_ino);
1139 goto out; 1139 goto out;
1140 } 1140 }
1141 spin_lock(&sbi->s_next_gen_lock); 1141 inode->i_generation = prandom_u32();
1142 inode->i_generation = sbi->s_next_generation++;
1143 spin_unlock(&sbi->s_next_gen_lock);
1144 1142
1145 /* Precompute checksum seed for inode metadata */ 1143 /* Precompute checksum seed for inode metadata */
1146 if (ext4_has_metadata_csum(sb)) { 1144 if (ext4_has_metadata_csum(sb)) {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 144bbda2b808..23a4766f6678 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/quotaops.h> 16#include <linux/quotaops.h>
17#include <linux/random.h>
17#include <linux/uuid.h> 18#include <linux/uuid.h>
18#include <linux/uaccess.h> 19#include <linux/uaccess.h>
19#include <linux/delay.h> 20#include <linux/delay.h>
@@ -98,7 +99,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
98 int err; 99 int err;
99 struct inode *inode_bl; 100 struct inode *inode_bl;
100 struct ext4_inode_info *ei_bl; 101 struct ext4_inode_info *ei_bl;
101 struct ext4_sb_info *sbi = EXT4_SB(sb);
102 102
103 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) 103 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
104 return -EINVAL; 104 return -EINVAL;
@@ -157,10 +157,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
157 157
158 inode->i_ctime = inode_bl->i_ctime = current_time(inode); 158 inode->i_ctime = inode_bl->i_ctime = current_time(inode);
159 159
160 spin_lock(&sbi->s_next_gen_lock); 160 inode->i_generation = prandom_u32();
161 inode->i_generation = sbi->s_next_generation++; 161 inode_bl->i_generation = prandom_u32();
162 inode_bl->i_generation = sbi->s_next_generation++;
163 spin_unlock(&sbi->s_next_gen_lock);
164 162
165 ext4_discard_preallocations(inode); 163 ext4_discard_preallocations(inode);
166 164
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3a278faf5868..9f2e3eb5131f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3982,8 +3982,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3982 } 3982 }
3983 3983
3984 sbi->s_gdb_count = db_count; 3984 sbi->s_gdb_count = db_count;
3985 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3986 spin_lock_init(&sbi->s_next_gen_lock);
3987 3985
3988 timer_setup(&sbi->s_err_report, print_daily_error_info, 0); 3986 timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
3989 3987