aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2009-05-01 08:50:38 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-05-01 08:50:38 -0400
commit8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5 (patch)
tree38fd56a82049f50b4d774af47b9d39f116071755 /fs/ext4/ialloc.c
parent9ca92389c5312a51e819c15c762f0abdc7f3129b (diff)
ext4: Avoid races caused by on-line resizing and SMP memory reordering
Ext4's on-line resizing adds a new block group and then, only at the last step adjusts s_groups_count. However, it's possible on SMP systems that another CPU could see the updated the s_group_count and not see the newly initialized data structures for the just-added block group. For this reason, it's important to insert a SMP read barrier after reading s_groups_count and before reading any (for example) the new block group descriptors allowed by the increased value of s_groups_count. Unfortunately, we rather blatently violate this locking protocol documented in fs/ext4/resize.c. Fortunately, (1) on-line resizes happen relatively rarely, and (2) it seems rare that the filesystem code will immediately try to use just-added block group before any memory ordering issues resolve themselves. So apparently problems here are relatively hard to hit, since ext3 has been vulnerable to the same issue for years with no one apparently complaining. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c40
1 files changed, 19 insertions, 21 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18e0a08a6b5..55ba419ca00b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -316,7 +316,7 @@ error_return:
316static int find_group_dir(struct super_block *sb, struct inode *parent, 316static int find_group_dir(struct super_block *sb, struct inode *parent,
317 ext4_group_t *best_group) 317 ext4_group_t *best_group)
318{ 318{
319 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 319 ext4_group_t ngroups = ext4_get_groups_count(sb);
320 unsigned int freei, avefreei; 320 unsigned int freei, avefreei;
321 struct ext4_group_desc *desc, *best_desc = NULL; 321 struct ext4_group_desc *desc, *best_desc = NULL;
322 ext4_group_t group; 322 ext4_group_t group;
@@ -353,7 +353,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
353 struct flex_groups *flex_group = sbi->s_flex_groups; 353 struct flex_groups *flex_group = sbi->s_flex_groups;
354 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 354 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
355 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); 355 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
356 ext4_group_t ngroups = sbi->s_groups_count; 356 ext4_group_t ngroups = ext4_get_groups_count(sb);
357 int flex_size = ext4_flex_bg_size(sbi); 357 int flex_size = ext4_flex_bg_size(sbi);
358 ext4_group_t best_flex = parent_fbg_group; 358 ext4_group_t best_flex = parent_fbg_group;
359 int blocks_per_flex = sbi->s_blocks_per_group * flex_size; 359 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
@@ -362,7 +362,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
362 ext4_group_t n_fbg_groups; 362 ext4_group_t n_fbg_groups;
363 ext4_group_t i; 363 ext4_group_t i;
364 364
365 n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> 365 n_fbg_groups = (ngroups + flex_size - 1) >>
366 sbi->s_log_groups_per_flex; 366 sbi->s_log_groups_per_flex;
367 367
368find_close_to_parent: 368find_close_to_parent:
@@ -478,20 +478,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
478{ 478{
479 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 479 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
480 struct ext4_sb_info *sbi = EXT4_SB(sb); 480 struct ext4_sb_info *sbi = EXT4_SB(sb);
481 ext4_group_t ngroups = sbi->s_groups_count; 481 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
482 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 482 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
483 unsigned int freei, avefreei; 483 unsigned int freei, avefreei;
484 ext4_fsblk_t freeb, avefreeb; 484 ext4_fsblk_t freeb, avefreeb;
485 unsigned int ndirs; 485 unsigned int ndirs;
486 int max_dirs, min_inodes; 486 int max_dirs, min_inodes;
487 ext4_grpblk_t min_blocks; 487 ext4_grpblk_t min_blocks;
488 ext4_group_t i, grp, g; 488 ext4_group_t i, grp, g, ngroups;
489 struct ext4_group_desc *desc; 489 struct ext4_group_desc *desc;
490 struct orlov_stats stats; 490 struct orlov_stats stats;
491 int flex_size = ext4_flex_bg_size(sbi); 491 int flex_size = ext4_flex_bg_size(sbi);
492 492
493 ngroups = real_ngroups;
493 if (flex_size > 1) { 494 if (flex_size > 1) {
494 ngroups = (ngroups + flex_size - 1) >> 495 ngroups = (real_ngroups + flex_size - 1) >>
495 sbi->s_log_groups_per_flex; 496 sbi->s_log_groups_per_flex;
496 parent_group >>= sbi->s_log_groups_per_flex; 497 parent_group >>= sbi->s_log_groups_per_flex;
497 } 498 }
@@ -543,7 +544,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
543 */ 544 */
544 grp *= flex_size; 545 grp *= flex_size;
545 for (i = 0; i < flex_size; i++) { 546 for (i = 0; i < flex_size; i++) {
546 if (grp+i >= sbi->s_groups_count) 547 if (grp+i >= real_ngroups)
547 break; 548 break;
548 desc = ext4_get_group_desc(sb, grp+i, NULL); 549 desc = ext4_get_group_desc(sb, grp+i, NULL);
549 if (desc && ext4_free_inodes_count(sb, desc)) { 550 if (desc && ext4_free_inodes_count(sb, desc)) {
@@ -583,7 +584,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
583 } 584 }
584 585
585fallback: 586fallback:
586 ngroups = sbi->s_groups_count; 587 ngroups = real_ngroups;
587 avefreei = freei / ngroups; 588 avefreei = freei / ngroups;
588fallback_retry: 589fallback_retry:
589 parent_group = EXT4_I(parent)->i_block_group; 590 parent_group = EXT4_I(parent)->i_block_group;
@@ -613,9 +614,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
613 ext4_group_t *group, int mode) 614 ext4_group_t *group, int mode)
614{ 615{
615 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 616 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
616 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 617 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
617 struct ext4_group_desc *desc; 618 struct ext4_group_desc *desc;
618 ext4_group_t i, last;
619 int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); 619 int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
620 620
621 /* 621 /*
@@ -799,11 +799,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
799 struct super_block *sb; 799 struct super_block *sb;
800 struct buffer_head *inode_bitmap_bh = NULL; 800 struct buffer_head *inode_bitmap_bh = NULL;
801 struct buffer_head *group_desc_bh; 801 struct buffer_head *group_desc_bh;
802 ext4_group_t group = 0; 802 ext4_group_t ngroups, group = 0;
803 unsigned long ino = 0; 803 unsigned long ino = 0;
804 struct inode *inode; 804 struct inode *inode;
805 struct ext4_group_desc *gdp = NULL; 805 struct ext4_group_desc *gdp = NULL;
806 struct ext4_super_block *es;
807 struct ext4_inode_info *ei; 806 struct ext4_inode_info *ei;
808 struct ext4_sb_info *sbi; 807 struct ext4_sb_info *sbi;
809 int ret2, err = 0; 808 int ret2, err = 0;
@@ -818,15 +817,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
818 return ERR_PTR(-EPERM); 817 return ERR_PTR(-EPERM);
819 818
820 sb = dir->i_sb; 819 sb = dir->i_sb;
820 ngroups = ext4_get_groups_count(sb);
821 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, 821 trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
822 dir->i_ino, mode); 822 dir->i_ino, mode);
823 inode = new_inode(sb); 823 inode = new_inode(sb);
824 if (!inode) 824 if (!inode)
825 return ERR_PTR(-ENOMEM); 825 return ERR_PTR(-ENOMEM);
826 ei = EXT4_I(inode); 826 ei = EXT4_I(inode);
827
828 sbi = EXT4_SB(sb); 827 sbi = EXT4_SB(sb);
829 es = sbi->s_es;
830 828
831 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 829 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
832 ret2 = find_group_flex(sb, dir, &group); 830 ret2 = find_group_flex(sb, dir, &group);
@@ -856,7 +854,7 @@ got_group:
856 if (ret2 == -1) 854 if (ret2 == -1)
857 goto out; 855 goto out;
858 856
859 for (i = 0; i < sbi->s_groups_count; i++) { 857 for (i = 0; i < ngroups; i++) {
860 err = -EIO; 858 err = -EIO;
861 859
862 gdp = ext4_get_group_desc(sb, group, &group_desc_bh); 860 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
@@ -917,7 +915,7 @@ repeat_in_this_group:
917 * group descriptor metadata has not yet been updated. 915 * group descriptor metadata has not yet been updated.
918 * So we just go onto the next blockgroup. 916 * So we just go onto the next blockgroup.
919 */ 917 */
920 if (++group == sbi->s_groups_count) 918 if (++group == ngroups)
921 group = 0; 919 group = 0;
922 } 920 }
923 err = -ENOSPC; 921 err = -ENOSPC;
@@ -1158,7 +1156,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1158{ 1156{
1159 unsigned long desc_count; 1157 unsigned long desc_count;
1160 struct ext4_group_desc *gdp; 1158 struct ext4_group_desc *gdp;
1161 ext4_group_t i; 1159 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1162#ifdef EXT4FS_DEBUG 1160#ifdef EXT4FS_DEBUG
1163 struct ext4_super_block *es; 1161 struct ext4_super_block *es;
1164 unsigned long bitmap_count, x; 1162 unsigned long bitmap_count, x;
@@ -1168,7 +1166,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1168 desc_count = 0; 1166 desc_count = 0;
1169 bitmap_count = 0; 1167 bitmap_count = 0;
1170 gdp = NULL; 1168 gdp = NULL;
1171 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1169 for (i = 0; i < ngroups; i++) {
1172 gdp = ext4_get_group_desc(sb, i, NULL); 1170 gdp = ext4_get_group_desc(sb, i, NULL);
1173 if (!gdp) 1171 if (!gdp)
1174 continue; 1172 continue;
@@ -1190,7 +1188,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1190 return desc_count; 1188 return desc_count;
1191#else 1189#else
1192 desc_count = 0; 1190 desc_count = 0;
1193 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1191 for (i = 0; i < ngroups; i++) {
1194 gdp = ext4_get_group_desc(sb, i, NULL); 1192 gdp = ext4_get_group_desc(sb, i, NULL);
1195 if (!gdp) 1193 if (!gdp)
1196 continue; 1194 continue;
@@ -1205,9 +1203,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
1205unsigned long ext4_count_dirs(struct super_block * sb) 1203unsigned long ext4_count_dirs(struct super_block * sb)
1206{ 1204{
1207 unsigned long count = 0; 1205 unsigned long count = 0;
1208 ext4_group_t i; 1206 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1209 1207
1210 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 1208 for (i = 0; i < ngroups; i++) {
1211 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); 1209 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1212 if (!gdp) 1210 if (!gdp)
1213 continue; 1211 continue;