aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2012-02-20 17:52:46 -0500
committerTheodore Ts'o <tytso@mit.edu>2012-02-20 17:52:46 -0500
commit813e57276fd909f7d5a816ef7ca706fca491ee61 (patch)
treecc2d262c0f6524f1a18d2a168d03e2ae3eb624ab
parent119c0d4460b001e44b41dcf73dc6ee794b98bd31 (diff)
ext4: fix race when setting bitmap_uptodate flag
In ext4_read_{inode,block}_bitmap() we were setting bitmap_uptodate() before submitting the buffer for read. The is bad, since we check bitmap_uptodate() without locking the buffer, and so if another process is racing with us, it's possible that they will think the bitmap is uptodate even though the read has not completed yet, resulting in inodes and blocks potentially getting allocated more than once if we get really unlucky. Addresses-Google-Bug: 2828254 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/balloc.c63
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/ialloc.c26
-rw-r--r--fs/ext4/mballoc.c79
4 files changed, 86 insertions, 93 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f9e2cd8cf711..a2cff2b9d5b5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -336,10 +336,10 @@ err_out:
336 * Return buffer_head on success or NULL in case of failure. 336 * Return buffer_head on success or NULL in case of failure.
337 */ 337 */
338struct buffer_head * 338struct buffer_head *
339ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 339ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
340{ 340{
341 struct ext4_group_desc *desc; 341 struct ext4_group_desc *desc;
342 struct buffer_head *bh = NULL; 342 struct buffer_head *bh;
343 ext4_fsblk_t bitmap_blk; 343 ext4_fsblk_t bitmap_blk;
344 344
345 desc = ext4_get_group_desc(sb, block_group, NULL); 345 desc = ext4_get_group_desc(sb, block_group, NULL);
@@ -348,9 +348,9 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
348 bitmap_blk = ext4_block_bitmap(sb, desc); 348 bitmap_blk = ext4_block_bitmap(sb, desc);
349 bh = sb_getblk(sb, bitmap_blk); 349 bh = sb_getblk(sb, bitmap_blk);
350 if (unlikely(!bh)) { 350 if (unlikely(!bh)) {
351 ext4_error(sb, "Cannot read block bitmap - " 351 ext4_error(sb, "Cannot get buffer for block bitmap - "
352 "block_group = %u, block_bitmap = %llu", 352 "block_group = %u, block_bitmap = %llu",
353 block_group, bitmap_blk); 353 block_group, bitmap_blk);
354 return NULL; 354 return NULL;
355 } 355 }
356 356
@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
382 return bh; 382 return bh;
383 } 383 }
384 /* 384 /*
385 * submit the buffer_head for read. We can 385 * submit the buffer_head for reading
386 * safely mark the bitmap as uptodate now.
387 * We do it here so the bitmap uptodate bit
388 * get set with buffer lock held.
389 */ 386 */
387 set_buffer_new(bh);
390 trace_ext4_read_block_bitmap_load(sb, block_group); 388 trace_ext4_read_block_bitmap_load(sb, block_group);
391 set_bitmap_uptodate(bh); 389 bh->b_end_io = ext4_end_bitmap_read;
392 if (bh_submit_read(bh) < 0) { 390 get_bh(bh);
393 put_bh(bh); 391 submit_bh(READ, bh);
392 return bh;
393}
394
395/* Returns 0 on success, 1 on error */
396int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
397 struct buffer_head *bh)
398{
399 struct ext4_group_desc *desc;
400
401 if (!buffer_new(bh))
402 return 0;
403 desc = ext4_get_group_desc(sb, block_group, NULL);
404 if (!desc)
405 return 1;
406 wait_on_buffer(bh);
407 if (!buffer_uptodate(bh)) {
394 ext4_error(sb, "Cannot read block bitmap - " 408 ext4_error(sb, "Cannot read block bitmap - "
395 "block_group = %u, block_bitmap = %llu", 409 "block_group = %u, block_bitmap = %llu",
396 block_group, bitmap_blk); 410 block_group, bh->b_blocknr);
397 return NULL; 411 return 1;
398 } 412 }
413 clear_buffer_new(bh);
414 /* Panic or remount fs read-only if block bitmap is invalid */
399 ext4_valid_block_bitmap(sb, desc, block_group, bh); 415 ext4_valid_block_bitmap(sb, desc, block_group, bh);
400 /* 416 return 0;
401 * file system mounted not to panic on error, 417}
402 * continue with corrupt bitmap 418
403 */ 419struct buffer_head *
420ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
421{
422 struct buffer_head *bh;
423
424 bh = ext4_read_block_bitmap_nowait(sb, block_group);
425 if (ext4_wait_block_bitmap(sb, block_group, bh)) {
426 put_bh(bh);
427 return NULL;
428 }
404 return bh; 429 return bh;
405} 430}
406 431
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 513004fc3d84..315f01be73e6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1794,8 +1794,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1794 ext4_group_t block_group, 1794 ext4_group_t block_group,
1795 struct buffer_head ** bh); 1795 struct buffer_head ** bh);
1796extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1796extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1797struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1797
1798 ext4_group_t block_group); 1798extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
1799 ext4_group_t block_group);
1800extern int ext4_wait_block_bitmap(struct super_block *sb,
1801 ext4_group_t block_group,
1802 struct buffer_head *bh);
1803extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1804 ext4_group_t block_group);
1799extern void ext4_init_block_bitmap(struct super_block *sb, 1805extern void ext4_init_block_bitmap(struct super_block *sb,
1800 struct buffer_head *bh, 1806 struct buffer_head *bh,
1801 ext4_group_t group, 1807 ext4_group_t group,
@@ -1841,6 +1847,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *);
1841extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap); 1847extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
1842extern int ext4_init_inode_table(struct super_block *sb, 1848extern int ext4_init_inode_table(struct super_block *sb,
1843 ext4_group_t group, int barrier); 1849 ext4_group_t group, int barrier);
1850extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
1844 1851
1845/* mballoc.c */ 1852/* mballoc.c */
1846extern long ext4_mb_stats; 1853extern long ext4_mb_stats;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 84e6e9a3986b..b2685992fb2d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
92 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
93} 93}
94 94
95void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
96{
97 if (uptodate) {
98 set_buffer_uptodate(bh);
99 set_bitmap_uptodate(bh);
100 }
101 unlock_buffer(bh);
102 put_bh(bh);
103}
104
95/* 105/*
96 * Read the inode allocation bitmap for a given block_group, reading 106 * Read the inode allocation bitmap for a given block_group, reading
97 * into the specified slot in the superblock's bitmap cache. 107 * into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
147 return bh; 157 return bh;
148 } 158 }
149 /* 159 /*
150 * submit the buffer_head for read. We can 160 * submit the buffer_head for reading
151 * safely mark the bitmap as uptodate now.
152 * We do it here so the bitmap uptodate bit
153 * get set with buffer lock held.
154 */ 161 */
155 trace_ext4_load_inode_bitmap(sb, block_group); 162 trace_ext4_load_inode_bitmap(sb, block_group);
156 set_bitmap_uptodate(bh); 163 bh->b_end_io = ext4_end_bitmap_read;
157 if (bh_submit_read(bh) < 0) { 164 get_bh(bh);
165 submit_bh(READ, bh);
166 wait_on_buffer(bh);
167 if (!buffer_uptodate(bh)) {
158 put_bh(bh); 168 put_bh(bh);
159 ext4_error(sb, "Cannot read inode bitmap - " 169 ext4_error(sb, "Cannot read inode bitmap - "
160 "block_group = %u, inode_bitmap = %llu", 170 "block_group = %u, inode_bitmap = %llu",
161 block_group, bitmap_blk); 171 block_group, bitmap_blk);
162 return NULL; 172 return NULL;
163 } 173 }
164 return bh; 174 return bh;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cb990b21c698..545fa0256606 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -782,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
782 int groups_per_page; 782 int groups_per_page;
783 int err = 0; 783 int err = 0;
784 int i; 784 int i;
785 ext4_group_t first_group; 785 ext4_group_t first_group, group;
786 int first_block; 786 int first_block;
787 struct super_block *sb; 787 struct super_block *sb;
788 struct buffer_head *bhs; 788 struct buffer_head *bhs;
@@ -806,24 +806,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
806 806
807 /* allocate buffer_heads to read bitmaps */ 807 /* allocate buffer_heads to read bitmaps */
808 if (groups_per_page > 1) { 808 if (groups_per_page > 1) {
809 err = -ENOMEM;
810 i = sizeof(struct buffer_head *) * groups_per_page; 809 i = sizeof(struct buffer_head *) * groups_per_page;
811 bh = kzalloc(i, GFP_NOFS); 810 bh = kzalloc(i, GFP_NOFS);
812 if (bh == NULL) 811 if (bh == NULL) {
812 err = -ENOMEM;
813 goto out; 813 goto out;
814 }
814 } else 815 } else
815 bh = &bhs; 816 bh = &bhs;
816 817
817 first_group = page->index * blocks_per_page / 2; 818 first_group = page->index * blocks_per_page / 2;
818 819
819 /* read all groups the page covers into the cache */ 820 /* read all groups the page covers into the cache */
820 for (i = 0; i < groups_per_page; i++) { 821 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
821 struct ext4_group_desc *desc; 822 if (group >= ngroups)
822
823 if (first_group + i >= ngroups)
824 break; 823 break;
825 824
826 grinfo = ext4_get_group_info(sb, first_group + i); 825 grinfo = ext4_get_group_info(sb, group);
827 /* 826 /*
828 * If page is uptodate then we came here after online resize 827 * If page is uptodate then we came here after online resize
829 * which added some new uninitialized group info structs, so 828 * which added some new uninitialized group info structs, so
@@ -834,69 +833,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
834 bh[i] = NULL; 833 bh[i] = NULL;
835 continue; 834 continue;
836 } 835 }
837 836 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
838 err = -EIO; 837 err = -ENOMEM;
839 desc = ext4_get_group_desc(sb, first_group + i, NULL);
840 if (desc == NULL)
841 goto out;
842
843 err = -ENOMEM;
844 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
845 if (bh[i] == NULL)
846 goto out; 838 goto out;
847
848 if (bitmap_uptodate(bh[i]))
849 continue;
850
851 lock_buffer(bh[i]);
852 if (bitmap_uptodate(bh[i])) {
853 unlock_buffer(bh[i]);
854 continue;
855 } 839 }
856 ext4_lock_group(sb, first_group + i); 840 mb_debug(1, "read bitmap for group %u\n", group);
857 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
858 ext4_init_block_bitmap(sb, bh[i],
859 first_group + i, desc);
860 set_bitmap_uptodate(bh[i]);
861 set_buffer_uptodate(bh[i]);
862 ext4_unlock_group(sb, first_group + i);
863 unlock_buffer(bh[i]);
864 continue;
865 }
866 ext4_unlock_group(sb, first_group + i);
867 if (buffer_uptodate(bh[i])) {
868 /*
869 * if not uninit if bh is uptodate,
870 * bitmap is also uptodate
871 */
872 set_bitmap_uptodate(bh[i]);
873 unlock_buffer(bh[i]);
874 continue;
875 }
876 get_bh(bh[i]);
877 /*
878 * submit the buffer_head for read. We can
879 * safely mark the bitmap as uptodate now.
880 * We do it here so the bitmap uptodate bit
881 * get set with buffer lock held.
882 */
883 set_bitmap_uptodate(bh[i]);
884 bh[i]->b_end_io = end_buffer_read_sync;
885 submit_bh(READ, bh[i]);
886 mb_debug(1, "read bitmap for group %u\n", first_group + i);
887 } 841 }
888 842
889 /* wait for I/O completion */ 843 /* wait for I/O completion */
890 for (i = 0; i < groups_per_page; i++) 844 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
891 if (bh[i]) 845 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
892 wait_on_buffer(bh[i]); 846 err = -EIO;
893
894 err = -EIO;
895 for (i = 0; i < groups_per_page; i++)
896 if (bh[i] && !buffer_uptodate(bh[i]))
897 goto out; 847 goto out;
848 }
849 }
898 850
899 err = 0;
900 first_block = page->index * blocks_per_page; 851 first_block = page->index * blocks_per_page;
901 for (i = 0; i < blocks_per_page; i++) { 852 for (i = 0; i < blocks_per_page; i++) {
902 int group; 853 int group;