aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c147
1 files changed, 137 insertions, 10 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0d1f21..21bb2f61e502 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
50 * need to use it within a single byte (to ensure we get endianness right). 50 * need to use it within a single byte (to ensure we get endianness right).
51 * We can use memset for the rest of the bitmap as there are no other users. 51 * We can use memset for the rest of the bitmap as there are no other users.
52 */ 52 */
53void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 53void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
54{ 54{
55 int i; 55 int i;
56 56
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
65} 65}
66 66
67/* Initializes an uninitialized inode bitmap */ 67/* Initializes an uninitialized inode bitmap */
68unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, 68static unsigned ext4_init_inode_bitmap(struct super_block *sb,
69 ext4_group_t block_group, 69 struct buffer_head *bh,
70 struct ext4_group_desc *gdp) 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp)
71{ 72{
72 struct ext4_sb_info *sbi = EXT4_SB(sb); 73 struct ext4_sb_info *sbi = EXT4_SB(sb);
73 74
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
85 } 86 }
86 87
87 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 88 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
88 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 89 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
89 bh->b_data); 90 bh->b_data);
90 91
91 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
107 desc = ext4_get_group_desc(sb, block_group, NULL); 108 desc = ext4_get_group_desc(sb, block_group, NULL);
108 if (!desc) 109 if (!desc)
109 return NULL; 110 return NULL;
111
110 bitmap_blk = ext4_inode_bitmap(sb, desc); 112 bitmap_blk = ext4_inode_bitmap(sb, desc);
111 bh = sb_getblk(sb, bitmap_blk); 113 bh = sb_getblk(sb, bitmap_blk);
112 if (unlikely(!bh)) { 114 if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 125 unlock_buffer(bh);
124 return bh; 126 return bh;
125 } 127 }
128
126 ext4_lock_group(sb, block_group); 129 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 130 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 131 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
133 return bh; 136 return bh;
134 } 137 }
135 ext4_unlock_group(sb, block_group); 138 ext4_unlock_group(sb, block_group);
139
136 if (buffer_uptodate(bh)) { 140 if (buffer_uptodate(bh)) {
137 /* 141 /*
138 * if not uninit if bh is uptodate, 142 * if not uninit if bh is uptodate,
@@ -148,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
148 * We do it here so the bitmap uptodate bit 152 * We do it here so the bitmap uptodate bit
149 * get set with buffer lock held. 153 * get set with buffer lock held.
150 */ 154 */
155 trace_ext4_load_inode_bitmap(sb, block_group);
151 set_bitmap_uptodate(bh); 156 set_bitmap_uptodate(bh);
152 if (bh_submit_read(bh) < 0) { 157 if (bh_submit_read(bh) < 0) {
153 put_bh(bh); 158 put_bh(bh);
@@ -411,8 +416,8 @@ struct orlov_stats {
411 * for a particular block group or flex_bg. If flex_size is 1, then g 416 * for a particular block group or flex_bg. If flex_size is 1, then g
412 * is a block group number; otherwise it is flex_bg number. 417 * is a block group number; otherwise it is flex_bg number.
413 */ 418 */
414void get_orlov_stats(struct super_block *sb, ext4_group_t g, 419static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
415 int flex_size, struct orlov_stats *stats) 420 int flex_size, struct orlov_stats *stats)
416{ 421{
417 struct ext4_group_desc *desc; 422 struct ext4_group_desc *desc;
418 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; 423 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -645,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
645 *group = parent_group + flex_size; 650 *group = parent_group + flex_size;
646 if (*group > ngroups) 651 if (*group > ngroups)
647 *group = 0; 652 *group = 0;
648 return find_group_orlov(sb, parent, group, mode, 0); 653 return find_group_orlov(sb, parent, group, mode, NULL);
649 } 654 }
650 655
651 /* 656 /*
@@ -712,8 +717,17 @@ static int ext4_claim_inode(struct super_block *sb,
712{ 717{
713 int free = 0, retval = 0, count; 718 int free = 0, retval = 0, count;
714 struct ext4_sb_info *sbi = EXT4_SB(sb); 719 struct ext4_sb_info *sbi = EXT4_SB(sb);
720 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 721 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
716 722
723 /*
724 * We have to be sure that new inode allocation does not race with
725 * inode table initialization, because otherwise we may end up
726 * allocating and writing new inode right before sb_issue_zeroout
727 * takes place and overwriting our new inode with zeroes. So we
728 * take alloc_sem to prevent it.
729 */
730 down_read(&grp->alloc_sem);
717 ext4_lock_group(sb, group); 731 ext4_lock_group(sb, group);
718 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 732 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
719 /* not a free inode */ 733 /* not a free inode */
@@ -724,6 +738,7 @@ static int ext4_claim_inode(struct super_block *sb,
724 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 738 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
725 ino > EXT4_INODES_PER_GROUP(sb)) { 739 ino > EXT4_INODES_PER_GROUP(sb)) {
726 ext4_unlock_group(sb, group); 740 ext4_unlock_group(sb, group);
741 up_read(&grp->alloc_sem);
727 ext4_error(sb, "reserved inode or inode > inodes count - " 742 ext4_error(sb, "reserved inode or inode > inodes count - "
728 "block_group = %u, inode=%lu", group, 743 "block_group = %u, inode=%lu", group,
729 ino + group * EXT4_INODES_PER_GROUP(sb)); 744 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +787,7 @@ static int ext4_claim_inode(struct super_block *sb,
772 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 787 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
773err_ret: 788err_ret:
774 ext4_unlock_group(sb, group); 789 ext4_unlock_group(sb, group);
790 up_read(&grp->alloc_sem);
775 return retval; 791 return retval;
776} 792}
777 793
@@ -1012,7 +1028,7 @@ got:
1012 inode->i_generation = sbi->s_next_generation++; 1028 inode->i_generation = sbi->s_next_generation++;
1013 spin_unlock(&sbi->s_next_gen_lock); 1029 spin_unlock(&sbi->s_next_gen_lock);
1014 1030
1015 ei->i_state_flags = 0; 1031 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
1016 ext4_set_inode_state(inode, EXT4_STATE_NEW); 1032 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1017 1033
1018 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1034 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
@@ -1027,7 +1043,7 @@ got:
1027 if (err) 1043 if (err)
1028 goto fail_free_drop; 1044 goto fail_free_drop;
1029 1045
1030 err = ext4_init_security(handle, inode, dir); 1046 err = ext4_init_security(handle, inode, dir, qstr);
1031 if (err) 1047 if (err)
1032 goto fail_free_drop; 1048 goto fail_free_drop;
1033 1049
@@ -1039,6 +1055,11 @@ got:
1039 } 1055 }
1040 } 1056 }
1041 1057
1058 if (ext4_handle_valid(handle)) {
1059 ei->i_sync_tid = handle->h_transaction->t_tid;
1060 ei->i_datasync_tid = handle->h_transaction->t_tid;
1061 }
1062
1042 err = ext4_mark_inode_dirty(handle, inode); 1063 err = ext4_mark_inode_dirty(handle, inode);
1043 if (err) { 1064 if (err) {
1044 ext4_std_error(sb, err); 1065 ext4_std_error(sb, err);
@@ -1205,3 +1226,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1205 } 1226 }
1206 return count; 1227 return count;
1207} 1228}
1229
1230/*
1231 * Zeroes not yet zeroed inode table - just write zeroes through the whole
1232 * inode table. Must be called without any spinlock held. The only place
1233 * where it is called from on active part of filesystem is ext4lazyinit
1234 * thread, so we do not need any special locks, however we have to prevent
1235 * inode allocation from the current group, so we take alloc_sem lock, to
1236 * block ext4_claim_inode until we are finished.
1237 */
1238extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1239 int barrier)
1240{
1241 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1242 struct ext4_sb_info *sbi = EXT4_SB(sb);
1243 struct ext4_group_desc *gdp = NULL;
1244 struct buffer_head *group_desc_bh;
1245 handle_t *handle;
1246 ext4_fsblk_t blk;
1247 int num, ret = 0, used_blks = 0;
1248
1249 /* This should not happen, but just to be sure check this */
1250 if (sb->s_flags & MS_RDONLY) {
1251 ret = 1;
1252 goto out;
1253 }
1254
1255 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
1256 if (!gdp)
1257 goto out;
1258
1259 /*
1260 * We do not need to lock this, because we are the only one
1261 * handling this flag.
1262 */
1263 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1264 goto out;
1265
1266 handle = ext4_journal_start_sb(sb, 1);
1267 if (IS_ERR(handle)) {
1268 ret = PTR_ERR(handle);
1269 goto out;
1270 }
1271
1272 down_write(&grp->alloc_sem);
1273 /*
1274 * If inode bitmap was already initialized there may be some
1275 * used inodes so we need to skip blocks with used inodes in
1276 * inode table.
1277 */
1278 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
1279 used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
1280 ext4_itable_unused_count(sb, gdp)),
1281 sbi->s_inodes_per_block);
1282
1283 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1284 ext4_error(sb, "Something is wrong with group %u\n"
1285 "Used itable blocks: %d"
1286 "itable unused count: %u\n",
1287 group, used_blks,
1288 ext4_itable_unused_count(sb, gdp));
1289 ret = 1;
1290 goto out;
1291 }
1292
1293 blk = ext4_inode_table(sb, gdp) + used_blks;
1294 num = sbi->s_itb_per_group - used_blks;
1295
1296 BUFFER_TRACE(group_desc_bh, "get_write_access");
1297 ret = ext4_journal_get_write_access(handle,
1298 group_desc_bh);
1299 if (ret)
1300 goto err_out;
1301
1302 /*
1303 * Skip zeroout if the inode table is full. But we set the ZEROED
1304 * flag anyway, because obviously, when it is full it does not need
1305 * further zeroing.
1306 */
1307 if (unlikely(num == 0))
1308 goto skip_zeroout;
1309
1310 ext4_debug("going to zero out inode table in group %d\n",
1311 group);
1312 ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
1313 if (ret < 0)
1314 goto err_out;
1315 if (barrier)
1316 blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
1317
1318skip_zeroout:
1319 ext4_lock_group(sb, group);
1320 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
1321 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1322 ext4_unlock_group(sb, group);
1323
1324 BUFFER_TRACE(group_desc_bh,
1325 "call ext4_handle_dirty_metadata");
1326 ret = ext4_handle_dirty_metadata(handle, NULL,
1327 group_desc_bh);
1328
1329err_out:
1330 up_write(&grp->alloc_sem);
1331 ext4_journal_stop(handle);
1332out:
1333 return ret;
1334}