diff options
Diffstat (limited to 'fs/ext4/ialloc.c')
| -rw-r--r-- | fs/ext4/ialloc.c | 135 |
1 files changed, 128 insertions, 7 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 45853e0d1f21..1ce240a23ebb 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -50,7 +50,7 @@ | |||
| 50 | * need to use it within a single byte (to ensure we get endianness right). | 50 | * need to use it within a single byte (to ensure we get endianness right). |
| 51 | * We can use memset for the rest of the bitmap as there are no other users. | 51 | * We can use memset for the rest of the bitmap as there are no other users. |
| 52 | */ | 52 | */ |
| 53 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | 53 | void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
| 54 | { | 54 | { |
| 55 | int i; | 55 | int i; |
| 56 | 56 | ||
| @@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | /* Initializes an uninitialized inode bitmap */ | 67 | /* Initializes an uninitialized inode bitmap */ |
| 68 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | 68 | static unsigned ext4_init_inode_bitmap(struct super_block *sb, |
| 69 | ext4_group_t block_group, | 69 | struct buffer_head *bh, |
| 70 | struct ext4_group_desc *gdp) | 70 | ext4_group_t block_group, |
| 71 | struct ext4_group_desc *gdp) | ||
| 71 | { | 72 | { |
| 72 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 73 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 73 | 74 | ||
| @@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
| 85 | } | 86 | } |
| 86 | 87 | ||
| 87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
| 88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
| 89 | bh->b_data); | 90 | bh->b_data); |
| 90 | 91 | ||
| 91 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
| @@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 107 | desc = ext4_get_group_desc(sb, block_group, NULL); | 108 | desc = ext4_get_group_desc(sb, block_group, NULL); |
| 108 | if (!desc) | 109 | if (!desc) |
| 109 | return NULL; | 110 | return NULL; |
| 111 | |||
| 110 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 112 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
| 111 | bh = sb_getblk(sb, bitmap_blk); | 113 | bh = sb_getblk(sb, bitmap_blk); |
| 112 | if (unlikely(!bh)) { | 114 | if (unlikely(!bh)) { |
| @@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 123 | unlock_buffer(bh); | 125 | unlock_buffer(bh); |
| 124 | return bh; | 126 | return bh; |
| 125 | } | 127 | } |
| 128 | |||
| 126 | ext4_lock_group(sb, block_group); | 129 | ext4_lock_group(sb, block_group); |
| 127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 130 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
| 128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 131 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
| @@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 133 | return bh; | 136 | return bh; |
| 134 | } | 137 | } |
| 135 | ext4_unlock_group(sb, block_group); | 138 | ext4_unlock_group(sb, block_group); |
| 139 | |||
| 136 | if (buffer_uptodate(bh)) { | 140 | if (buffer_uptodate(bh)) { |
| 137 | /* | 141 | /* |
| 138 | * if not uninit if bh is uptodate, | 142 | * if not uninit if bh is uptodate, |
| @@ -411,8 +415,8 @@ struct orlov_stats { | |||
| 411 | * for a particular block group or flex_bg. If flex_size is 1, then g | 415 | * for a particular block group or flex_bg. If flex_size is 1, then g |
| 412 | * is a block group number; otherwise it is flex_bg number. | 416 | * is a block group number; otherwise it is flex_bg number. |
| 413 | */ | 417 | */ |
| 414 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | 418 | static void get_orlov_stats(struct super_block *sb, ext4_group_t g, |
| 415 | int flex_size, struct orlov_stats *stats) | 419 | int flex_size, struct orlov_stats *stats) |
| 416 | { | 420 | { |
| 417 | struct ext4_group_desc *desc; | 421 | struct ext4_group_desc *desc; |
| 418 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; | 422 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
| @@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb, | |||
| 712 | { | 716 | { |
| 713 | int free = 0, retval = 0, count; | 717 | int free = 0, retval = 0, count; |
| 714 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 718 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 719 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
| 715 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 720 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
| 716 | 721 | ||
| 722 | /* | ||
| 723 | * We have to be sure that new inode allocation does not race with | ||
| 724 | * inode table initialization, because otherwise we may end up | ||
| 725 | * allocating and writing new inode right before sb_issue_zeroout | ||
| 726 | * takes place and overwriting our new inode with zeroes. So we | ||
| 727 | * take alloc_sem to prevent it. | ||
| 728 | */ | ||
| 729 | down_read(&grp->alloc_sem); | ||
| 717 | ext4_lock_group(sb, group); | 730 | ext4_lock_group(sb, group); |
| 718 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 731 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
| 719 | /* not a free inode */ | 732 | /* not a free inode */ |
| @@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
| 724 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 737 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
| 725 | ino > EXT4_INODES_PER_GROUP(sb)) { | 738 | ino > EXT4_INODES_PER_GROUP(sb)) { |
| 726 | ext4_unlock_group(sb, group); | 739 | ext4_unlock_group(sb, group); |
| 740 | up_read(&grp->alloc_sem); | ||
| 727 | ext4_error(sb, "reserved inode or inode > inodes count - " | 741 | ext4_error(sb, "reserved inode or inode > inodes count - " |
| 728 | "block_group = %u, inode=%lu", group, | 742 | "block_group = %u, inode=%lu", group, |
| 729 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 743 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
| @@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
| 772 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
| 773 | err_ret: | 787 | err_ret: |
| 774 | ext4_unlock_group(sb, group); | 788 | ext4_unlock_group(sb, group); |
| 789 | up_read(&grp->alloc_sem); | ||
| 775 | return retval; | 790 | return retval; |
| 776 | } | 791 | } |
| 777 | 792 | ||
| @@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
| 1205 | } | 1220 | } |
| 1206 | return count; | 1221 | return count; |
| 1207 | } | 1222 | } |
| 1223 | |||
| 1224 | /* | ||
| 1225 | * Zeroes not yet zeroed inode table - just write zeroes through the whole | ||
| 1226 | * inode table. Must be called without any spinlock held. The only place | ||
| 1227 | * where it is called from on active part of filesystem is ext4lazyinit | ||
| 1228 | * thread, so we do not need any special locks, however we have to prevent | ||
| 1229 | * inode allocation from the current group, so we take alloc_sem lock, to | ||
| 1230 | * block ext4_claim_inode until we are finished. | ||
| 1231 | */ | ||
| 1232 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | ||
| 1233 | int barrier) | ||
| 1234 | { | ||
| 1235 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
| 1236 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 1237 | struct ext4_group_desc *gdp = NULL; | ||
| 1238 | struct buffer_head *group_desc_bh; | ||
| 1239 | handle_t *handle; | ||
| 1240 | ext4_fsblk_t blk; | ||
| 1241 | int num, ret = 0, used_blks = 0; | ||
| 1242 | |||
| 1243 | /* This should not happen, but just to be sure check this */ | ||
| 1244 | if (sb->s_flags & MS_RDONLY) { | ||
| 1245 | ret = 1; | ||
| 1246 | goto out; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | ||
| 1250 | if (!gdp) | ||
| 1251 | goto out; | ||
| 1252 | |||
| 1253 | /* | ||
| 1254 | * We do not need to lock this, because we are the only one | ||
| 1255 | * handling this flag. | ||
| 1256 | */ | ||
| 1257 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
| 1258 | goto out; | ||
| 1259 | |||
| 1260 | handle = ext4_journal_start_sb(sb, 1); | ||
| 1261 | if (IS_ERR(handle)) { | ||
| 1262 | ret = PTR_ERR(handle); | ||
| 1263 | goto out; | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | down_write(&grp->alloc_sem); | ||
| 1267 | /* | ||
| 1268 | * If inode bitmap was already initialized there may be some | ||
| 1269 | * used inodes so we need to skip blocks with used inodes in | ||
| 1270 | * inode table. | ||
| 1271 | */ | ||
| 1272 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
| 1273 | used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - | ||
| 1274 | ext4_itable_unused_count(sb, gdp)), | ||
| 1275 | sbi->s_inodes_per_block); | ||
| 1276 | |||
| 1277 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | ||
| 1278 | ext4_error(sb, "Something is wrong with group %u\n" | ||
| 1279 | "Used itable blocks: %d" | ||
| 1280 | "itable unused count: %u\n", | ||
| 1281 | group, used_blks, | ||
| 1282 | ext4_itable_unused_count(sb, gdp)); | ||
| 1283 | ret = 1; | ||
| 1284 | goto out; | ||
| 1285 | } | ||
| 1286 | |||
| 1287 | blk = ext4_inode_table(sb, gdp) + used_blks; | ||
| 1288 | num = sbi->s_itb_per_group - used_blks; | ||
| 1289 | |||
| 1290 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
| 1291 | ret = ext4_journal_get_write_access(handle, | ||
| 1292 | group_desc_bh); | ||
| 1293 | if (ret) | ||
| 1294 | goto err_out; | ||
| 1295 | |||
| 1296 | /* | ||
| 1297 | * Skip zeroout if the inode table is full. But we set the ZEROED | ||
| 1298 | * flag anyway, because obviously, when it is full it does not need | ||
| 1299 | * further zeroing. | ||
| 1300 | */ | ||
| 1301 | if (unlikely(num == 0)) | ||
| 1302 | goto skip_zeroout; | ||
| 1303 | |||
| 1304 | ext4_debug("going to zero out inode table in group %d\n", | ||
| 1305 | group); | ||
| 1306 | ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); | ||
| 1307 | if (ret < 0) | ||
| 1308 | goto err_out; | ||
| 1309 | if (barrier) | ||
| 1310 | blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); | ||
| 1311 | |||
| 1312 | skip_zeroout: | ||
| 1313 | ext4_lock_group(sb, group); | ||
| 1314 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
| 1315 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
| 1316 | ext4_unlock_group(sb, group); | ||
| 1317 | |||
| 1318 | BUFFER_TRACE(group_desc_bh, | ||
| 1319 | "call ext4_handle_dirty_metadata"); | ||
| 1320 | ret = ext4_handle_dirty_metadata(handle, NULL, | ||
| 1321 | group_desc_bh); | ||
| 1322 | |||
| 1323 | err_out: | ||
| 1324 | up_write(&grp->alloc_sem); | ||
| 1325 | ext4_journal_stop(handle); | ||
| 1326 | out: | ||
| 1327 | return ret; | ||
| 1328 | } | ||
