diff options
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 147 |
1 files changed, 137 insertions, 10 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 45853e0d1f21..21bb2f61e502 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -50,7 +50,7 @@ | |||
50 | * need to use it within a single byte (to ensure we get endianness right). | 50 | * need to use it within a single byte (to ensure we get endianness right). |
51 | * We can use memset for the rest of the bitmap as there are no other users. | 51 | * We can use memset for the rest of the bitmap as there are no other users. |
52 | */ | 52 | */ |
53 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | 53 | void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) |
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | 56 | ||
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | |||
65 | } | 65 | } |
66 | 66 | ||
67 | /* Initializes an uninitialized inode bitmap */ | 67 | /* Initializes an uninitialized inode bitmap */ |
68 | unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | 68 | static unsigned ext4_init_inode_bitmap(struct super_block *sb, |
69 | ext4_group_t block_group, | 69 | struct buffer_head *bh, |
70 | struct ext4_group_desc *gdp) | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | ||
71 | { | 72 | { |
72 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 73 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
73 | 74 | ||
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
85 | } | 86 | } |
86 | 87 | ||
87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
89 | bh->b_data); | 90 | bh->b_data); |
90 | 91 | ||
91 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
107 | desc = ext4_get_group_desc(sb, block_group, NULL); | 108 | desc = ext4_get_group_desc(sb, block_group, NULL); |
108 | if (!desc) | 109 | if (!desc) |
109 | return NULL; | 110 | return NULL; |
111 | |||
110 | bitmap_blk = ext4_inode_bitmap(sb, desc); | 112 | bitmap_blk = ext4_inode_bitmap(sb, desc); |
111 | bh = sb_getblk(sb, bitmap_blk); | 113 | bh = sb_getblk(sb, bitmap_blk); |
112 | if (unlikely(!bh)) { | 114 | if (unlikely(!bh)) { |
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
123 | unlock_buffer(bh); | 125 | unlock_buffer(bh); |
124 | return bh; | 126 | return bh; |
125 | } | 127 | } |
128 | |||
126 | ext4_lock_group(sb, block_group); | 129 | ext4_lock_group(sb, block_group); |
127 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 130 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
128 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 131 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
133 | return bh; | 136 | return bh; |
134 | } | 137 | } |
135 | ext4_unlock_group(sb, block_group); | 138 | ext4_unlock_group(sb, block_group); |
139 | |||
136 | if (buffer_uptodate(bh)) { | 140 | if (buffer_uptodate(bh)) { |
137 | /* | 141 | /* |
138 | * if not uninit if bh is uptodate, | 142 | * if not uninit if bh is uptodate, |
@@ -148,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
148 | * We do it here so the bitmap uptodate bit | 152 | * We do it here so the bitmap uptodate bit |
149 | * get set with buffer lock held. | 153 | * get set with buffer lock held. |
150 | */ | 154 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | ||
151 | set_bitmap_uptodate(bh); | 156 | set_bitmap_uptodate(bh); |
152 | if (bh_submit_read(bh) < 0) { | 157 | if (bh_submit_read(bh) < 0) { |
153 | put_bh(bh); | 158 | put_bh(bh); |
@@ -411,8 +416,8 @@ struct orlov_stats { | |||
411 | * for a particular block group or flex_bg. If flex_size is 1, then g | 416 | * for a particular block group or flex_bg. If flex_size is 1, then g |
412 | * is a block group number; otherwise it is flex_bg number. | 417 | * is a block group number; otherwise it is flex_bg number. |
413 | */ | 418 | */ |
414 | void get_orlov_stats(struct super_block *sb, ext4_group_t g, | 419 | static void get_orlov_stats(struct super_block *sb, ext4_group_t g, |
415 | int flex_size, struct orlov_stats *stats) | 420 | int flex_size, struct orlov_stats *stats) |
416 | { | 421 | { |
417 | struct ext4_group_desc *desc; | 422 | struct ext4_group_desc *desc; |
418 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; | 423 | struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; |
@@ -645,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
645 | *group = parent_group + flex_size; | 650 | *group = parent_group + flex_size; |
646 | if (*group > ngroups) | 651 | if (*group > ngroups) |
647 | *group = 0; | 652 | *group = 0; |
648 | return find_group_orlov(sb, parent, group, mode, 0); | 653 | return find_group_orlov(sb, parent, group, mode, NULL); |
649 | } | 654 | } |
650 | 655 | ||
651 | /* | 656 | /* |
@@ -712,8 +717,17 @@ static int ext4_claim_inode(struct super_block *sb, | |||
712 | { | 717 | { |
713 | int free = 0, retval = 0, count; | 718 | int free = 0, retval = 0, count; |
714 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 719 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
720 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
715 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | 721 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); |
716 | 722 | ||
723 | /* | ||
724 | * We have to be sure that new inode allocation does not race with | ||
725 | * inode table initialization, because otherwise we may end up | ||
726 | * allocating and writing new inode right before sb_issue_zeroout | ||
727 | * takes place and overwriting our new inode with zeroes. So we | ||
728 | * take alloc_sem to prevent it. | ||
729 | */ | ||
730 | down_read(&grp->alloc_sem); | ||
717 | ext4_lock_group(sb, group); | 731 | ext4_lock_group(sb, group); |
718 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 732 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { |
719 | /* not a free inode */ | 733 | /* not a free inode */ |
@@ -724,6 +738,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
724 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | 738 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
725 | ino > EXT4_INODES_PER_GROUP(sb)) { | 739 | ino > EXT4_INODES_PER_GROUP(sb)) { |
726 | ext4_unlock_group(sb, group); | 740 | ext4_unlock_group(sb, group); |
741 | up_read(&grp->alloc_sem); | ||
727 | ext4_error(sb, "reserved inode or inode > inodes count - " | 742 | ext4_error(sb, "reserved inode or inode > inodes count - " |
728 | "block_group = %u, inode=%lu", group, | 743 | "block_group = %u, inode=%lu", group, |
729 | ino + group * EXT4_INODES_PER_GROUP(sb)); | 744 | ino + group * EXT4_INODES_PER_GROUP(sb)); |
@@ -772,6 +787,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
772 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 787 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); |
773 | err_ret: | 788 | err_ret: |
774 | ext4_unlock_group(sb, group); | 789 | ext4_unlock_group(sb, group); |
790 | up_read(&grp->alloc_sem); | ||
775 | return retval; | 791 | return retval; |
776 | } | 792 | } |
777 | 793 | ||
@@ -1012,7 +1028,7 @@ got: | |||
1012 | inode->i_generation = sbi->s_next_generation++; | 1028 | inode->i_generation = sbi->s_next_generation++; |
1013 | spin_unlock(&sbi->s_next_gen_lock); | 1029 | spin_unlock(&sbi->s_next_gen_lock); |
1014 | 1030 | ||
1015 | ei->i_state_flags = 0; | 1031 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
1016 | ext4_set_inode_state(inode, EXT4_STATE_NEW); | 1032 | ext4_set_inode_state(inode, EXT4_STATE_NEW); |
1017 | 1033 | ||
1018 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 1034 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
@@ -1027,7 +1043,7 @@ got: | |||
1027 | if (err) | 1043 | if (err) |
1028 | goto fail_free_drop; | 1044 | goto fail_free_drop; |
1029 | 1045 | ||
1030 | err = ext4_init_security(handle, inode, dir); | 1046 | err = ext4_init_security(handle, inode, dir, qstr); |
1031 | if (err) | 1047 | if (err) |
1032 | goto fail_free_drop; | 1048 | goto fail_free_drop; |
1033 | 1049 | ||
@@ -1039,6 +1055,11 @@ got: | |||
1039 | } | 1055 | } |
1040 | } | 1056 | } |
1041 | 1057 | ||
1058 | if (ext4_handle_valid(handle)) { | ||
1059 | ei->i_sync_tid = handle->h_transaction->t_tid; | ||
1060 | ei->i_datasync_tid = handle->h_transaction->t_tid; | ||
1061 | } | ||
1062 | |||
1042 | err = ext4_mark_inode_dirty(handle, inode); | 1063 | err = ext4_mark_inode_dirty(handle, inode); |
1043 | if (err) { | 1064 | if (err) { |
1044 | ext4_std_error(sb, err); | 1065 | ext4_std_error(sb, err); |
@@ -1205,3 +1226,109 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1205 | } | 1226 | } |
1206 | return count; | 1227 | return count; |
1207 | } | 1228 | } |
1229 | |||
1230 | /* | ||
1231 | * Zeroes not yet zeroed inode table - just write zeroes through the whole | ||
1232 | * inode table. Must be called without any spinlock held. The only place | ||
1233 | * where it is called from on active part of filesystem is ext4lazyinit | ||
1234 | * thread, so we do not need any special locks, however we have to prevent | ||
1235 | * inode allocation from the current group, so we take alloc_sem lock, to | ||
1236 | * block ext4_claim_inode until we are finished. | ||
1237 | */ | ||
1238 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | ||
1239 | int barrier) | ||
1240 | { | ||
1241 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
1242 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1243 | struct ext4_group_desc *gdp = NULL; | ||
1244 | struct buffer_head *group_desc_bh; | ||
1245 | handle_t *handle; | ||
1246 | ext4_fsblk_t blk; | ||
1247 | int num, ret = 0, used_blks = 0; | ||
1248 | |||
1249 | /* This should not happen, but just to be sure check this */ | ||
1250 | if (sb->s_flags & MS_RDONLY) { | ||
1251 | ret = 1; | ||
1252 | goto out; | ||
1253 | } | ||
1254 | |||
1255 | gdp = ext4_get_group_desc(sb, group, &group_desc_bh); | ||
1256 | if (!gdp) | ||
1257 | goto out; | ||
1258 | |||
1259 | /* | ||
1260 | * We do not need to lock this, because we are the only one | ||
1261 | * handling this flag. | ||
1262 | */ | ||
1263 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) | ||
1264 | goto out; | ||
1265 | |||
1266 | handle = ext4_journal_start_sb(sb, 1); | ||
1267 | if (IS_ERR(handle)) { | ||
1268 | ret = PTR_ERR(handle); | ||
1269 | goto out; | ||
1270 | } | ||
1271 | |||
1272 | down_write(&grp->alloc_sem); | ||
1273 | /* | ||
1274 | * If inode bitmap was already initialized there may be some | ||
1275 | * used inodes so we need to skip blocks with used inodes in | ||
1276 | * inode table. | ||
1277 | */ | ||
1278 | if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
1279 | used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - | ||
1280 | ext4_itable_unused_count(sb, gdp)), | ||
1281 | sbi->s_inodes_per_block); | ||
1282 | |||
1283 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | ||
1284 | ext4_error(sb, "Something is wrong with group %u\n" | ||
1285 | "Used itable blocks: %d" | ||
1286 | "itable unused count: %u\n", | ||
1287 | group, used_blks, | ||
1288 | ext4_itable_unused_count(sb, gdp)); | ||
1289 | ret = 1; | ||
1290 | goto out; | ||
1291 | } | ||
1292 | |||
1293 | blk = ext4_inode_table(sb, gdp) + used_blks; | ||
1294 | num = sbi->s_itb_per_group - used_blks; | ||
1295 | |||
1296 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
1297 | ret = ext4_journal_get_write_access(handle, | ||
1298 | group_desc_bh); | ||
1299 | if (ret) | ||
1300 | goto err_out; | ||
1301 | |||
1302 | /* | ||
1303 | * Skip zeroout if the inode table is full. But we set the ZEROED | ||
1304 | * flag anyway, because obviously, when it is full it does not need | ||
1305 | * further zeroing. | ||
1306 | */ | ||
1307 | if (unlikely(num == 0)) | ||
1308 | goto skip_zeroout; | ||
1309 | |||
1310 | ext4_debug("going to zero out inode table in group %d\n", | ||
1311 | group); | ||
1312 | ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); | ||
1313 | if (ret < 0) | ||
1314 | goto err_out; | ||
1315 | if (barrier) | ||
1316 | blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); | ||
1317 | |||
1318 | skip_zeroout: | ||
1319 | ext4_lock_group(sb, group); | ||
1320 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
1321 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1322 | ext4_unlock_group(sb, group); | ||
1323 | |||
1324 | BUFFER_TRACE(group_desc_bh, | ||
1325 | "call ext4_handle_dirty_metadata"); | ||
1326 | ret = ext4_handle_dirty_metadata(handle, NULL, | ||
1327 | group_desc_bh); | ||
1328 | |||
1329 | err_out: | ||
1330 | up_write(&grp->alloc_sem); | ||
1331 | ext4_journal_stop(handle); | ||
1332 | out: | ||
1333 | return ret; | ||
1334 | } | ||