diff options
author | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 23:44:47 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 23:44:47 -0400 |
commit | a107e5a3a473a2ea62bd5af24e11b84adf1486ff (patch) | |
tree | d36c2cb38d8be88d4d75cdebc354aa140aa0e470 /fs/ext4/ialloc.c | |
parent | e3e1288e86a07cdeb0aee5860a2dff111c6eff79 (diff) | |
parent | a269029d0e2192046be4c07ed78a45022469ee4c (diff) |
Merge branch 'next' into upstream-merge
Conflicts:
fs/ext4/inode.c
fs/ext4/mballoc.c
include/trace/events/ext4.h
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 135 |
1 files changed, 128 insertions, 7 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 45853e0d1f21..1ce240a23ebb 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -50,7 +50,7 @@ | |||
50 | * need to use it within a single byte (to ensure we get endianness right). | 50 | * need to use it within a single byte (to ensure we get endianness right). |
51 | * We can use memset for the rest of the bitmap as there are no other users. | 51 | * We can use memset for the rest of the bitmap as there are no other users. |
52 | */ | 52 | */ |
53 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | 53 | void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | 56 | ||
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
65 | } | 65 | } |
66 | 66 | ||
67 | /* Initializes an uninitialized inode bitmap */ | 67 | /* Initializes an uninitialized inode bitmap */ |
68 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | 68 | static unsigned ext4_init_inode_bitmap(struct super_block *sb, |
69 | ext4_group_t block_group, | 69 | struct buffer_head *bh, |
70 | struct ext4_group_desc *gdp) | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | ||
71 | { | 72 | { |
72 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 73 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
73 | 74 | ||
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
85 | } | 86 | } |
86 | 87 | ||
87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
89 | bh->b_data); | 90 | bh->b_data); |
90 | 91 | ||
91 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
107 | desc = ext4_get_group_desc(sb, block_group, NULL); | 108 | desc = ext4_get_group_desc(sb, block_group, NULL); |
108 | if (!desc) | 109 | if (!desc) |
109 | return NULL; | 110 | return NULL; |
111 | |||
110 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 112 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
111 | bh = sb_getblk(sb, bitmap_blk); | 113 | bh = sb_getblk(sb, bitmap_blk); |
112 | if (unlikely(!bh)) { | 114 | if (unlikely(!bh)) { |
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 125 | unlock_buffer(bh); |
124 | return bh; | 126 | return bh; |
125 | } | 127 | } |
128 | |||
126 | ext4_lock_group(sb, block_group); | 129 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 130 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 131 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
133 | return bh; | 136 | return bh; |
134 | } | 137 | } |
135 | ext4_unlock_group(sb, block_group); | 138 | ext4_unlock_group(sb, block_group); |
139 | |||
136 | if (buffer_uptodate(bh)) { | 140 | if (buffer_uptodate(bh)) { |
137 | /* | 141 | /* |
138 | * if not uninit if bh is uptodate, | 142 | * if not uninit if bh is uptodate, |
@@ -411,8 +415,8 @@ struct orlov_stats { | |||
411 | * for a particular block group or flex_bg. If flex_size is 1, then g | 415 | * for a particular block group or flex_bg. If flex_size is 1, then g |
412 | * is a block group number; otherwise it is flex_bg number. | 416 | * is a block group number; otherwise it is flex_bg number. |
413 | */ | 417 | */ |
414 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | 418 | static void get_orlov_stats(struct super_block *sb, ext4_group_t g, |
415 | int flex_size, struct orlov_stats *stats) | 419 | int flex_size, struct orlov_stats *stats) |
416 | { | 420 | { |
417 | struct ext4_group_desc *desc; | 421 | struct ext4_group_desc *desc; |
418 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; | 422 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb, | |||
712 | { | 716 | { |
713 | int free = 0, retval = 0, count; | 717 | int free = 0, retval = 0, count; |
714 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 718 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
719 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
715 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 720 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
716 | 721 | ||
722 | /* | ||
723 | * We have to be sure that new inode allocation does not race with | ||
724 | * inode table initialization, because otherwise we may end up | ||
725 | * allocating and writing new inode right before sb_issue_zeroout | ||
726 | * takes place and overwriting our new inode with zeroes. So we | ||
727 | * take alloc_sem to prevent it. | ||
728 | */ | ||
729 | down_read(&grp->alloc_sem); | ||
717 | ext4_lock_group(sb, group); | 730 | ext4_lock_group(sb, group); |
718 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 731 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
719 | /* not a free inode */ | 732 | /* not a free inode */ |
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
724 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 737 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
725 | ino > EXT4_INODES_PER_GROUP(sb)) { | 738 | ino > EXT4_INODES_PER_GROUP(sb)) { |
726 | ext4_unlock_group(sb, group); | 739 | ext4_unlock_group(sb, group); |
740 | up_read(&grp->alloc_sem); | ||
727 | ext4_error(sb, "reserved inode or inode > inodes count - " | 741 | ext4_error(sb, "reserved inode or inode > inodes count - " |
728 | "block_group = %u, inode=%lu", group, | 742 | "block_group = %u, inode=%lu", group, |
729 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 743 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
772 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
773 | err_ret: | 787 | err_ret: |
774 | ext4_unlock_group(sb, group); | 788 | ext4_unlock_group(sb, group); |
789 | up_read(&grp->alloc_sem); | ||
775 | return retval; | 790 | return retval; |
776 | } | 791 | } |
777 | 792 | ||
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1205 | } | 1220 | } |
1206 | return count; | 1221 | return count; |
1207 | } | 1222 | } |
1223 | |||
1224 | /* | ||
1225 | * Zeroes not yet zeroed inode table - just write zeroes through the whole | ||
1226 | * inode table. Must be called without any spinlock held. The only place | ||
1227 | * where it is called from on active part of filesystem is ext4lazyinit | ||
1228 | * thread, so we do not need any special locks, however we have to prevent | ||
1229 | * inode allocation from the current group, so we take alloc_sem lock, to | ||
1230 | * block ext4_claim_inode until we are finished. | ||
1231 | */ | ||
1232 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | ||
1233 | int barrier) | ||
1234 | { | ||
1235 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
1236 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1237 | struct ext4_group_desc *gdp = NULL; | ||
1238 | struct buffer_head *group_desc_bh; | ||
1239 | handle_t *handle; | ||
1240 | ext4_fsblk_t blk; | ||
1241 | int num, ret = 0, used_blks = 0; | ||
1242 | |||
1243 | /* This should not happen, but just to be sure check this */ | ||
1244 | if (sb->s_flags & MS_RDONLY) { | ||
1245 | ret = 1; | ||
1246 | goto out; | ||
1247 | } | ||
1248 | |||
1249 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | ||
1250 | if (!gdp) | ||
1251 | goto out; | ||
1252 | |||
1253 | /* | ||
1254 | * We do not need to lock this, because we are the only one | ||
1255 | * handling this flag. | ||
1256 | */ | ||
1257 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
1258 | goto out; | ||
1259 | |||
1260 | handle = ext4_journal_start_sb(sb, 1); | ||
1261 | if (IS_ERR(handle)) { | ||
1262 | ret = PTR_ERR(handle); | ||
1263 | goto out; | ||
1264 | } | ||
1265 | |||
1266 | down_write(&grp->alloc_sem); | ||
1267 | /* | ||
1268 | * If inode bitmap was already initialized there may be some | ||
1269 | * used inodes so we need to skip blocks with used inodes in | ||
1270 | * inode table. | ||
1271 | */ | ||
1272 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
1273 | used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - | ||
1274 | ext4_itable_unused_count(sb, gdp)), | ||
1275 | sbi->s_inodes_per_block); | ||
1276 | |||
1277 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | ||
1278 | ext4_error(sb, "Something is wrong with group %u\n" | ||
1279 | "Used itable blocks: %d" | ||
1280 | "itable unused count: %u\n", | ||
1281 | group, used_blks, | ||
1282 | ext4_itable_unused_count(sb, gdp)); | ||
1283 | ret = 1; | ||
1284 | goto out; | ||
1285 | } | ||
1286 | |||
1287 | blk = ext4_inode_table(sb, gdp) + used_blks; | ||
1288 | num = sbi->s_itb_per_group - used_blks; | ||
1289 | |||
1290 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
1291 | ret = ext4_journal_get_write_access(handle, | ||
1292 | group_desc_bh); | ||
1293 | if (ret) | ||
1294 | goto err_out; | ||
1295 | |||
1296 | /* | ||
1297 | * Skip zeroout if the inode table is full. But we set the ZEROED | ||
1298 | * flag anyway, because obviously, when it is full it does not need | ||
1299 | * further zeroing. | ||
1300 | */ | ||
1301 | if (unlikely(num == 0)) | ||
1302 | goto skip_zeroout; | ||
1303 | |||
1304 | ext4_debug("going to zero out inode table in group %d\n", | ||
1305 | group); | ||
1306 | ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); | ||
1307 | if (ret < 0) | ||
1308 | goto err_out; | ||
1309 | if (barrier) | ||
1310 | blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); | ||
1311 | |||
1312 | skip_zeroout: | ||
1313 | ext4_lock_group(sb, group); | ||
1314 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
1315 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1316 | ext4_unlock_group(sb, group); | ||
1317 | |||
1318 | BUFFER_TRACE(group_desc_bh, | ||
1319 | "call ext4_handle_dirty_metadata"); | ||
1320 | ret = ext4_handle_dirty_metadata(handle, NULL, | ||
1321 | group_desc_bh); | ||
1322 | |||
1323 | err_out: | ||
1324 | up_write(&grp->alloc_sem); | ||
1325 | ext4_journal_stop(handle); | ||
1326 | out: | ||
1327 | return ret; | ||
1328 | } | ||