aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2009-05-01 08:50:38 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-05-01 08:50:38 -0400
commit8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5 (patch)
tree38fd56a82049f50b4d774af47b9d39f116071755 /fs/ext4/mballoc.c
parent9ca92389c5312a51e819c15c762f0abdc7f3129b (diff)
ext4: Avoid races caused by on-line resizing and SMP memory reordering
Ext4's on-line resizing adds a new block group and then, only at the last step adjusts s_groups_count. However, it's possible on SMP systems that another CPU could see the updated the s_group_count and not see the newly initialized data structures for the just-added block group. For this reason, it's important to insert a SMP read barrier after reading s_groups_count and before reading any (for example) the new block group descriptors allowed by the increased value of s_groups_count. Unfortunately, we rather blatently violate this locking protocol documented in fs/ext4/resize.c. Fortunately, (1) on-line resizes happen relatively rarely, and (2) it seems rare that the filesystem code will immediately try to use just-added block group before any memory ordering issues resolve themselves. So apparently problems here are relatively hard to hit, since ext3 has been vulnerable to the same issue for years with no one apparently complaining. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c45
1 files changed, 24 insertions, 21 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f871677a7984..c3af9e6b6668 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -739,6 +739,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
739 739
740static int ext4_mb_init_cache(struct page *page, char *incore) 740static int ext4_mb_init_cache(struct page *page, char *incore)
741{ 741{
742 ext4_group_t ngroups;
742 int blocksize; 743 int blocksize;
743 int blocks_per_page; 744 int blocks_per_page;
744 int groups_per_page; 745 int groups_per_page;
@@ -757,6 +758,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
757 758
758 inode = page->mapping->host; 759 inode = page->mapping->host;
759 sb = inode->i_sb; 760 sb = inode->i_sb;
761 ngroups = ext4_get_groups_count(sb);
760 blocksize = 1 << inode->i_blkbits; 762 blocksize = 1 << inode->i_blkbits;
761 blocks_per_page = PAGE_CACHE_SIZE / blocksize; 763 blocks_per_page = PAGE_CACHE_SIZE / blocksize;
762 764
@@ -780,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
780 for (i = 0; i < groups_per_page; i++) { 782 for (i = 0; i < groups_per_page; i++) {
781 struct ext4_group_desc *desc; 783 struct ext4_group_desc *desc;
782 784
783 if (first_group + i >= EXT4_SB(sb)->s_groups_count) 785 if (first_group + i >= ngroups)
784 break; 786 break;
785 787
786 err = -EIO; 788 err = -EIO;
@@ -852,7 +854,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
852 struct ext4_group_info *grinfo; 854 struct ext4_group_info *grinfo;
853 855
854 group = (first_block + i) >> 1; 856 group = (first_block + i) >> 1;
855 if (group >= EXT4_SB(sb)->s_groups_count) 857 if (group >= ngroups)
856 break; 858 break;
857 859
858 /* 860 /*
@@ -1788,6 +1790,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1788 int block, pnum; 1790 int block, pnum;
1789 int blocks_per_page; 1791 int blocks_per_page;
1790 int groups_per_page; 1792 int groups_per_page;
1793 ext4_group_t ngroups = ext4_get_groups_count(sb);
1791 ext4_group_t first_group; 1794 ext4_group_t first_group;
1792 struct ext4_group_info *grp; 1795 struct ext4_group_info *grp;
1793 1796
@@ -1807,7 +1810,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1807 /* read all groups the page covers into the cache */ 1810 /* read all groups the page covers into the cache */
1808 for (i = 0; i < groups_per_page; i++) { 1811 for (i = 0; i < groups_per_page; i++) {
1809 1812
1810 if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) 1813 if ((first_group + i) >= ngroups)
1811 break; 1814 break;
1812 grp = ext4_get_group_info(sb, first_group + i); 1815 grp = ext4_get_group_info(sb, first_group + i);
1813 /* take all groups write allocation 1816 /* take all groups write allocation
@@ -1945,8 +1948,7 @@ err:
1945static noinline_for_stack int 1948static noinline_for_stack int
1946ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1949ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1947{ 1950{
1948 ext4_group_t group; 1951 ext4_group_t ngroups, group, i;
1949 ext4_group_t i;
1950 int cr; 1952 int cr;
1951 int err = 0; 1953 int err = 0;
1952 int bsbits; 1954 int bsbits;
@@ -1957,6 +1959,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1957 1959
1958 sb = ac->ac_sb; 1960 sb = ac->ac_sb;
1959 sbi = EXT4_SB(sb); 1961 sbi = EXT4_SB(sb);
1962 ngroups = ext4_get_groups_count(sb);
1960 BUG_ON(ac->ac_status == AC_STATUS_FOUND); 1963 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1961 1964
1962 /* first, try the goal */ 1965 /* first, try the goal */
@@ -2017,11 +2020,11 @@ repeat:
2017 */ 2020 */
2018 group = ac->ac_g_ex.fe_group; 2021 group = ac->ac_g_ex.fe_group;
2019 2022
2020 for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { 2023 for (i = 0; i < ngroups; group++, i++) {
2021 struct ext4_group_info *grp; 2024 struct ext4_group_info *grp;
2022 struct ext4_group_desc *desc; 2025 struct ext4_group_desc *desc;
2023 2026
2024 if (group == EXT4_SB(sb)->s_groups_count) 2027 if (group == ngroups)
2025 group = 0; 2028 group = 0;
2026 2029
2027 /* quick check to skip empty groups */ 2030 /* quick check to skip empty groups */
@@ -2315,12 +2318,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
2315static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) 2318static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2316{ 2319{
2317 struct super_block *sb = seq->private; 2320 struct super_block *sb = seq->private;
2318 struct ext4_sb_info *sbi = EXT4_SB(sb);
2319 ext4_group_t group; 2321 ext4_group_t group;
2320 2322
2321 if (*pos < 0 || *pos >= sbi->s_groups_count) 2323 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2322 return NULL; 2324 return NULL;
2323
2324 group = *pos + 1; 2325 group = *pos + 1;
2325 return (void *) ((unsigned long) group); 2326 return (void *) ((unsigned long) group);
2326} 2327}
@@ -2328,11 +2329,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2328static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) 2329static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
2329{ 2330{
2330 struct super_block *sb = seq->private; 2331 struct super_block *sb = seq->private;
2331 struct ext4_sb_info *sbi = EXT4_SB(sb);
2332 ext4_group_t group; 2332 ext4_group_t group;
2333 2333
2334 ++*pos; 2334 ++*pos;
2335 if (*pos < 0 || *pos >= sbi->s_groups_count) 2335 if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
2336 return NULL; 2336 return NULL;
2337 group = *pos + 1; 2337 group = *pos + 1;
2338 return (void *) ((unsigned long) group); 2338 return (void *) ((unsigned long) group);
@@ -2587,6 +2587,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2587 2587
2588static int ext4_mb_init_backend(struct super_block *sb) 2588static int ext4_mb_init_backend(struct super_block *sb)
2589{ 2589{
2590 ext4_group_t ngroups = ext4_get_groups_count(sb);
2590 ext4_group_t i; 2591 ext4_group_t i;
2591 int metalen; 2592 int metalen;
2592 struct ext4_sb_info *sbi = EXT4_SB(sb); 2593 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2598,7 +2599,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2598 struct ext4_group_desc *desc; 2599 struct ext4_group_desc *desc;
2599 2600
2600 /* This is the number of blocks used by GDT */ 2601 /* This is the number of blocks used by GDT */
2601 num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 2602 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
2602 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); 2603 1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
2603 2604
2604 /* 2605 /*
@@ -2644,7 +2645,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2644 for (i = 0; i < num_meta_group_infos; i++) { 2645 for (i = 0; i < num_meta_group_infos; i++) {
2645 if ((i + 1) == num_meta_group_infos) 2646 if ((i + 1) == num_meta_group_infos)
2646 metalen = sizeof(*meta_group_info) * 2647 metalen = sizeof(*meta_group_info) *
2647 (sbi->s_groups_count - 2648 (ngroups -
2648 (i << EXT4_DESC_PER_BLOCK_BITS(sb))); 2649 (i << EXT4_DESC_PER_BLOCK_BITS(sb)));
2649 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2650 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2650 if (meta_group_info == NULL) { 2651 if (meta_group_info == NULL) {
@@ -2655,7 +2656,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2655 sbi->s_group_info[i] = meta_group_info; 2656 sbi->s_group_info[i] = meta_group_info;
2656 } 2657 }
2657 2658
2658 for (i = 0; i < sbi->s_groups_count; i++) { 2659 for (i = 0; i < ngroups; i++) {
2659 desc = ext4_get_group_desc(sb, i, NULL); 2660 desc = ext4_get_group_desc(sb, i, NULL);
2660 if (desc == NULL) { 2661 if (desc == NULL) {
2661 printk(KERN_ERR 2662 printk(KERN_ERR
@@ -2781,13 +2782,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2781 2782
2782int ext4_mb_release(struct super_block *sb) 2783int ext4_mb_release(struct super_block *sb)
2783{ 2784{
2785 ext4_group_t ngroups = ext4_get_groups_count(sb);
2784 ext4_group_t i; 2786 ext4_group_t i;
2785 int num_meta_group_infos; 2787 int num_meta_group_infos;
2786 struct ext4_group_info *grinfo; 2788 struct ext4_group_info *grinfo;
2787 struct ext4_sb_info *sbi = EXT4_SB(sb); 2789 struct ext4_sb_info *sbi = EXT4_SB(sb);
2788 2790
2789 if (sbi->s_group_info) { 2791 if (sbi->s_group_info) {
2790 for (i = 0; i < sbi->s_groups_count; i++) { 2792 for (i = 0; i < ngroups; i++) {
2791 grinfo = ext4_get_group_info(sb, i); 2793 grinfo = ext4_get_group_info(sb, i);
2792#ifdef DOUBLE_CHECK 2794#ifdef DOUBLE_CHECK
2793 kfree(grinfo->bb_bitmap); 2795 kfree(grinfo->bb_bitmap);
@@ -2797,7 +2799,7 @@ int ext4_mb_release(struct super_block *sb)
2797 ext4_unlock_group(sb, i); 2799 ext4_unlock_group(sb, i);
2798 kfree(grinfo); 2800 kfree(grinfo);
2799 } 2801 }
2800 num_meta_group_infos = (sbi->s_groups_count + 2802 num_meta_group_infos = (ngroups +
2801 EXT4_DESC_PER_BLOCK(sb) - 1) >> 2803 EXT4_DESC_PER_BLOCK(sb) - 1) >>
2802 EXT4_DESC_PER_BLOCK_BITS(sb); 2804 EXT4_DESC_PER_BLOCK_BITS(sb);
2803 for (i = 0; i < num_meta_group_infos; i++) 2805 for (i = 0; i < num_meta_group_infos; i++)
@@ -4121,7 +4123,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
4121static void ext4_mb_show_ac(struct ext4_allocation_context *ac) 4123static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4122{ 4124{
4123 struct super_block *sb = ac->ac_sb; 4125 struct super_block *sb = ac->ac_sb;
4124 ext4_group_t i; 4126 ext4_group_t ngroups, i;
4125 4127
4126 printk(KERN_ERR "EXT4-fs: Can't allocate:" 4128 printk(KERN_ERR "EXT4-fs: Can't allocate:"
4127 " Allocation context details:\n"); 4129 " Allocation context details:\n");
@@ -4145,7 +4147,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4145 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, 4147 printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
4146 ac->ac_found); 4148 ac->ac_found);
4147 printk(KERN_ERR "EXT4-fs: groups: \n"); 4149 printk(KERN_ERR "EXT4-fs: groups: \n");
4148 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { 4150 ngroups = ext4_get_groups_count(sb);
4151 for (i = 0; i < ngroups; i++) {
4149 struct ext4_group_info *grp = ext4_get_group_info(sb, i); 4152 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
4150 struct ext4_prealloc_space *pa; 4153 struct ext4_prealloc_space *pa;
4151 ext4_grpblk_t start; 4154 ext4_grpblk_t start;
@@ -4469,13 +4472,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4469 4472
4470static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) 4473static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4471{ 4474{
4472 ext4_group_t i; 4475 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4473 int ret; 4476 int ret;
4474 int freed = 0; 4477 int freed = 0;
4475 4478
4476 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", 4479 trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
4477 sb->s_id, needed); 4480 sb->s_id, needed);
4478 for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { 4481 for (i = 0; i < ngroups && needed > 0; i++) {
4479 ret = ext4_mb_discard_group_preallocations(sb, i, needed); 4482 ret = ext4_mb_discard_group_preallocations(sb, i, needed);
4480 freed += ret; 4483 freed += ret;
4481 needed -= ret; 4484 needed -= ret;