diff options
author | Theodore Ts'o <tytso@mit.edu> | 2009-05-01 08:50:38 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-05-01 08:50:38 -0400 |
commit | 8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5 (patch) | |
tree | 38fd56a82049f50b4d774af47b9d39f116071755 /fs/ext4/mballoc.c | |
parent | 9ca92389c5312a51e819c15c762f0abdc7f3129b (diff) |
ext4: Avoid races caused by on-line resizing and SMP memory reordering
Ext4's on-line resizing adds a new block group and then, only at the
last step adjusts s_groups_count. However, it's possible on SMP
systems that another CPU could see the updated the s_group_count and
not see the newly initialized data structures for the just-added block
group. For this reason, it's important to insert a SMP read barrier
after reading s_groups_count and before reading any (for example) the
new block group descriptors allowed by the increased value of
s_groups_count.
Unfortunately, we rather blatently violate this locking protocol
documented in fs/ext4/resize.c. Fortunately, (1) on-line resizes
happen relatively rarely, and (2) it seems rare that the filesystem
code will immediately try to use just-added block group before any
memory ordering issues resolve themselves. So apparently problems
here are relatively hard to hit, since ext3 has been vulnerable to the
same issue for years with no one apparently complaining.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 45 |
1 files changed, 24 insertions, 21 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f871677a7984..c3af9e6b6668 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -739,6 +739,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, | |||
739 | 739 | ||
740 | static int ext4_mb_init_cache(struct page *page, char *incore) | 740 | static int ext4_mb_init_cache(struct page *page, char *incore) |
741 | { | 741 | { |
742 | ext4_group_t ngroups; | ||
742 | int blocksize; | 743 | int blocksize; |
743 | int blocks_per_page; | 744 | int blocks_per_page; |
744 | int groups_per_page; | 745 | int groups_per_page; |
@@ -757,6 +758,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
757 | 758 | ||
758 | inode = page->mapping->host; | 759 | inode = page->mapping->host; |
759 | sb = inode->i_sb; | 760 | sb = inode->i_sb; |
761 | ngroups = ext4_get_groups_count(sb); | ||
760 | blocksize = 1 << inode->i_blkbits; | 762 | blocksize = 1 << inode->i_blkbits; |
761 | blocks_per_page = PAGE_CACHE_SIZE / blocksize; | 763 | blocks_per_page = PAGE_CACHE_SIZE / blocksize; |
762 | 764 | ||
@@ -780,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
780 | for (i = 0; i < groups_per_page; i++) { | 782 | for (i = 0; i < groups_per_page; i++) { |
781 | struct ext4_group_desc *desc; | 783 | struct ext4_group_desc *desc; |
782 | 784 | ||
783 | if (first_group + i >= EXT4_SB(sb)->s_groups_count) | 785 | if (first_group + i >= ngroups) |
784 | break; | 786 | break; |
785 | 787 | ||
786 | err = -EIO; | 788 | err = -EIO; |
@@ -852,7 +854,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
852 | struct ext4_group_info *grinfo; | 854 | struct ext4_group_info *grinfo; |
853 | 855 | ||
854 | group = (first_block + i) >> 1; | 856 | group = (first_block + i) >> 1; |
855 | if (group >= EXT4_SB(sb)->s_groups_count) | 857 | if (group >= ngroups) |
856 | break; | 858 | break; |
857 | 859 | ||
858 | /* | 860 | /* |
@@ -1788,6 +1790,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | |||
1788 | int block, pnum; | 1790 | int block, pnum; |
1789 | int blocks_per_page; | 1791 | int blocks_per_page; |
1790 | int groups_per_page; | 1792 | int groups_per_page; |
1793 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
1791 | ext4_group_t first_group; | 1794 | ext4_group_t first_group; |
1792 | struct ext4_group_info *grp; | 1795 | struct ext4_group_info *grp; |
1793 | 1796 | ||
@@ -1807,7 +1810,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | |||
1807 | /* read all groups the page covers into the cache */ | 1810 | /* read all groups the page covers into the cache */ |
1808 | for (i = 0; i < groups_per_page; i++) { | 1811 | for (i = 0; i < groups_per_page; i++) { |
1809 | 1812 | ||
1810 | if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) | 1813 | if ((first_group + i) >= ngroups) |
1811 | break; | 1814 | break; |
1812 | grp = ext4_get_group_info(sb, first_group + i); | 1815 | grp = ext4_get_group_info(sb, first_group + i); |
1813 | /* take all groups write allocation | 1816 | /* take all groups write allocation |
@@ -1945,8 +1948,7 @@ err: | |||
1945 | static noinline_for_stack int | 1948 | static noinline_for_stack int |
1946 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1949 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1947 | { | 1950 | { |
1948 | ext4_group_t group; | 1951 | ext4_group_t ngroups, group, i; |
1949 | ext4_group_t i; | ||
1950 | int cr; | 1952 | int cr; |
1951 | int err = 0; | 1953 | int err = 0; |
1952 | int bsbits; | 1954 | int bsbits; |
@@ -1957,6 +1959,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1957 | 1959 | ||
1958 | sb = ac->ac_sb; | 1960 | sb = ac->ac_sb; |
1959 | sbi = EXT4_SB(sb); | 1961 | sbi = EXT4_SB(sb); |
1962 | ngroups = ext4_get_groups_count(sb); | ||
1960 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 1963 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
1961 | 1964 | ||
1962 | /* first, try the goal */ | 1965 | /* first, try the goal */ |
@@ -2017,11 +2020,11 @@ repeat: | |||
2017 | */ | 2020 | */ |
2018 | group = ac->ac_g_ex.fe_group; | 2021 | group = ac->ac_g_ex.fe_group; |
2019 | 2022 | ||
2020 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { | 2023 | for (i = 0; i < ngroups; group++, i++) { |
2021 | struct ext4_group_info *grp; | 2024 | struct ext4_group_info *grp; |
2022 | struct ext4_group_desc *desc; | 2025 | struct ext4_group_desc *desc; |
2023 | 2026 | ||
2024 | if (group == EXT4_SB(sb)->s_groups_count) | 2027 | if (group == ngroups) |
2025 | group = 0; | 2028 | group = 0; |
2026 | 2029 | ||
2027 | /* quick check to skip empty groups */ | 2030 | /* quick check to skip empty groups */ |
@@ -2315,12 +2318,10 @@ static struct file_operations ext4_mb_seq_history_fops = { | |||
2315 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | 2318 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) |
2316 | { | 2319 | { |
2317 | struct super_block *sb = seq->private; | 2320 | struct super_block *sb = seq->private; |
2318 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2319 | ext4_group_t group; | 2321 | ext4_group_t group; |
2320 | 2322 | ||
2321 | if (*pos < 0 || *pos >= sbi->s_groups_count) | 2323 | if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) |
2322 | return NULL; | 2324 | return NULL; |
2323 | |||
2324 | group = *pos + 1; | 2325 | group = *pos + 1; |
2325 | return (void *) ((unsigned long) group); | 2326 | return (void *) ((unsigned long) group); |
2326 | } | 2327 | } |
@@ -2328,11 +2329,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | |||
2328 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) | 2329 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) |
2329 | { | 2330 | { |
2330 | struct super_block *sb = seq->private; | 2331 | struct super_block *sb = seq->private; |
2331 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2332 | ext4_group_t group; | 2332 | ext4_group_t group; |
2333 | 2333 | ||
2334 | ++*pos; | 2334 | ++*pos; |
2335 | if (*pos < 0 || *pos >= sbi->s_groups_count) | 2335 | if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) |
2336 | return NULL; | 2336 | return NULL; |
2337 | group = *pos + 1; | 2337 | group = *pos + 1; |
2338 | return (void *) ((unsigned long) group); | 2338 | return (void *) ((unsigned long) group); |
@@ -2587,6 +2587,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) | |||
2587 | 2587 | ||
2588 | static int ext4_mb_init_backend(struct super_block *sb) | 2588 | static int ext4_mb_init_backend(struct super_block *sb) |
2589 | { | 2589 | { |
2590 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
2590 | ext4_group_t i; | 2591 | ext4_group_t i; |
2591 | int metalen; | 2592 | int metalen; |
2592 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2593 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -2598,7 +2599,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2598 | struct ext4_group_desc *desc; | 2599 | struct ext4_group_desc *desc; |
2599 | 2600 | ||
2600 | /* This is the number of blocks used by GDT */ | 2601 | /* This is the number of blocks used by GDT */ |
2601 | num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - | 2602 | num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - |
2602 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); | 2603 | 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); |
2603 | 2604 | ||
2604 | /* | 2605 | /* |
@@ -2644,7 +2645,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2644 | for (i = 0; i < num_meta_group_infos; i++) { | 2645 | for (i = 0; i < num_meta_group_infos; i++) { |
2645 | if ((i + 1) == num_meta_group_infos) | 2646 | if ((i + 1) == num_meta_group_infos) |
2646 | metalen = sizeof(*meta_group_info) * | 2647 | metalen = sizeof(*meta_group_info) * |
2647 | (sbi->s_groups_count - | 2648 | (ngroups - |
2648 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); | 2649 | (i << EXT4_DESC_PER_BLOCK_BITS(sb))); |
2649 | meta_group_info = kmalloc(metalen, GFP_KERNEL); | 2650 | meta_group_info = kmalloc(metalen, GFP_KERNEL); |
2650 | if (meta_group_info == NULL) { | 2651 | if (meta_group_info == NULL) { |
@@ -2655,7 +2656,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2655 | sbi->s_group_info[i] = meta_group_info; | 2656 | sbi->s_group_info[i] = meta_group_info; |
2656 | } | 2657 | } |
2657 | 2658 | ||
2658 | for (i = 0; i < sbi->s_groups_count; i++) { | 2659 | for (i = 0; i < ngroups; i++) { |
2659 | desc = ext4_get_group_desc(sb, i, NULL); | 2660 | desc = ext4_get_group_desc(sb, i, NULL); |
2660 | if (desc == NULL) { | 2661 | if (desc == NULL) { |
2661 | printk(KERN_ERR | 2662 | printk(KERN_ERR |
@@ -2781,13 +2782,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2781 | 2782 | ||
2782 | int ext4_mb_release(struct super_block *sb) | 2783 | int ext4_mb_release(struct super_block *sb) |
2783 | { | 2784 | { |
2785 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
2784 | ext4_group_t i; | 2786 | ext4_group_t i; |
2785 | int num_meta_group_infos; | 2787 | int num_meta_group_infos; |
2786 | struct ext4_group_info *grinfo; | 2788 | struct ext4_group_info *grinfo; |
2787 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2789 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2788 | 2790 | ||
2789 | if (sbi->s_group_info) { | 2791 | if (sbi->s_group_info) { |
2790 | for (i = 0; i < sbi->s_groups_count; i++) { | 2792 | for (i = 0; i < ngroups; i++) { |
2791 | grinfo = ext4_get_group_info(sb, i); | 2793 | grinfo = ext4_get_group_info(sb, i); |
2792 | #ifdef DOUBLE_CHECK | 2794 | #ifdef DOUBLE_CHECK |
2793 | kfree(grinfo->bb_bitmap); | 2795 | kfree(grinfo->bb_bitmap); |
@@ -2797,7 +2799,7 @@ int ext4_mb_release(struct super_block *sb) | |||
2797 | ext4_unlock_group(sb, i); | 2799 | ext4_unlock_group(sb, i); |
2798 | kfree(grinfo); | 2800 | kfree(grinfo); |
2799 | } | 2801 | } |
2800 | num_meta_group_infos = (sbi->s_groups_count + | 2802 | num_meta_group_infos = (ngroups + |
2801 | EXT4_DESC_PER_BLOCK(sb) - 1) >> | 2803 | EXT4_DESC_PER_BLOCK(sb) - 1) >> |
2802 | EXT4_DESC_PER_BLOCK_BITS(sb); | 2804 | EXT4_DESC_PER_BLOCK_BITS(sb); |
2803 | for (i = 0; i < num_meta_group_infos; i++) | 2805 | for (i = 0; i < num_meta_group_infos; i++) |
@@ -4121,7 +4123,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, | |||
4121 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | 4123 | static void ext4_mb_show_ac(struct ext4_allocation_context *ac) |
4122 | { | 4124 | { |
4123 | struct super_block *sb = ac->ac_sb; | 4125 | struct super_block *sb = ac->ac_sb; |
4124 | ext4_group_t i; | 4126 | ext4_group_t ngroups, i; |
4125 | 4127 | ||
4126 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 4128 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
4127 | " Allocation context details:\n"); | 4129 | " Allocation context details:\n"); |
@@ -4145,7 +4147,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4145 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, | 4147 | printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, |
4146 | ac->ac_found); | 4148 | ac->ac_found); |
4147 | printk(KERN_ERR "EXT4-fs: groups: \n"); | 4149 | printk(KERN_ERR "EXT4-fs: groups: \n"); |
4148 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 4150 | ngroups = ext4_get_groups_count(sb); |
4151 | for (i = 0; i < ngroups; i++) { | ||
4149 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); | 4152 | struct ext4_group_info *grp = ext4_get_group_info(sb, i); |
4150 | struct ext4_prealloc_space *pa; | 4153 | struct ext4_prealloc_space *pa; |
4151 | ext4_grpblk_t start; | 4154 | ext4_grpblk_t start; |
@@ -4469,13 +4472,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4469 | 4472 | ||
4470 | static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | 4473 | static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) |
4471 | { | 4474 | { |
4472 | ext4_group_t i; | 4475 | ext4_group_t i, ngroups = ext4_get_groups_count(sb); |
4473 | int ret; | 4476 | int ret; |
4474 | int freed = 0; | 4477 | int freed = 0; |
4475 | 4478 | ||
4476 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", | 4479 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", |
4477 | sb->s_id, needed); | 4480 | sb->s_id, needed); |
4478 | for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { | 4481 | for (i = 0; i < ngroups && needed > 0; i++) { |
4479 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); | 4482 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); |
4480 | freed += ret; | 4483 | freed += ret; |
4481 | needed -= ret; | 4484 | needed -= ret; |