aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-10-27 23:44:47 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-10-27 23:44:47 -0400
commita107e5a3a473a2ea62bd5af24e11b84adf1486ff (patch)
treed36c2cb38d8be88d4d75cdebc354aa140aa0e470 /fs/ext4/ialloc.c
parente3e1288e86a07cdeb0aee5860a2dff111c6eff79 (diff)
parenta269029d0e2192046be4c07ed78a45022469ee4c (diff)
Merge branch 'next' into upstream-merge
Conflicts: fs/ext4/inode.c fs/ext4/mballoc.c include/trace/events/ext4.h
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c135
1 files changed, 128 insertions, 7 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0d1f21..1ce240a23ebb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
50 * need to use it within a single byte (to ensure we get endianness right). 50 * need to use it within a single byte (to ensure we get endianness right).
51 * We can use memset for the rest of the bitmap as there are no other users. 51 * We can use memset for the rest of the bitmap as there are no other users.
52 */ 52 */
53void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 53void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
54{ 54{
55 int i; 55 int i;
56 56
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
65} 65}
66 66
67/* Initializes an uninitialized inode bitmap */ 67/* Initializes an uninitialized inode bitmap */
68unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, 68static unsigned ext4_init_inode_bitmap(struct super_block *sb,
69 ext4_group_t block_group, 69 struct buffer_head *bh,
70 struct ext4_group_desc *gdp) 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp)
71{ 72{
72 struct ext4_sb_info *sbi = EXT4_SB(sb); 73 struct ext4_sb_info *sbi = EXT4_SB(sb);
73 74
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
85 } 86 }
86 87
87 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); 88 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
88 mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 89 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
89 bh->b_data); 90 bh->b_data);
90 91
91 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
107 desc = ext4_get_group_desc(sb, block_group, NULL); 108 desc = ext4_get_group_desc(sb, block_group, NULL);
108 if (!desc) 109 if (!desc)
109 return NULL; 110 return NULL;
111
110 bitmap_blk = ext4_inode_bitmap(sb, desc); 112 bitmap_blk = ext4_inode_bitmap(sb, desc);
111 bh = sb_getblk(sb, bitmap_blk); 113 bh = sb_getblk(sb, bitmap_blk);
112 if (unlikely(!bh)) { 114 if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
123 unlock_buffer(bh); 125 unlock_buffer(bh);
124 return bh; 126 return bh;
125 } 127 }
128
126 ext4_lock_group(sb, block_group); 129 ext4_lock_group(sb, block_group);
127 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 130 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
128 ext4_init_inode_bitmap(sb, bh, block_group, desc); 131 ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
133 return bh; 136 return bh;
134 } 137 }
135 ext4_unlock_group(sb, block_group); 138 ext4_unlock_group(sb, block_group);
139
136 if (buffer_uptodate(bh)) { 140 if (buffer_uptodate(bh)) {
137 /* 141 /*
138 * if not uninit if bh is uptodate, 142 * if not uninit if bh is uptodate,
@@ -411,8 +415,8 @@ struct orlov_stats {
411 * for a particular block group or flex_bg. If flex_size is 1, then g 415 * for a particular block group or flex_bg. If flex_size is 1, then g
412 * is a block group number; otherwise it is flex_bg number. 416 * is a block group number; otherwise it is flex_bg number.
413 */ 417 */
414void get_orlov_stats(struct super_block *sb, ext4_group_t g, 418static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
415 int flex_size, struct orlov_stats *stats) 419 int flex_size, struct orlov_stats *stats)
416{ 420{
417 struct ext4_group_desc *desc; 421 struct ext4_group_desc *desc;
418 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; 422 struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
712{ 716{
713 int free = 0, retval = 0, count; 717 int free = 0, retval = 0, count;
714 struct ext4_sb_info *sbi = EXT4_SB(sb); 718 struct ext4_sb_info *sbi = EXT4_SB(sb);
719 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
715 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); 720 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
716 721
722 /*
723 * We have to be sure that new inode allocation does not race with
724 * inode table initialization, because otherwise we may end up
725 * allocating and writing new inode right before sb_issue_zeroout
726 * takes place and overwriting our new inode with zeroes. So we
727 * take alloc_sem to prevent it.
728 */
729 down_read(&grp->alloc_sem);
717 ext4_lock_group(sb, group); 730 ext4_lock_group(sb, group);
718 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 731 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
719 /* not a free inode */ 732 /* not a free inode */
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
724 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 737 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
725 ino > EXT4_INODES_PER_GROUP(sb)) { 738 ino > EXT4_INODES_PER_GROUP(sb)) {
726 ext4_unlock_group(sb, group); 739 ext4_unlock_group(sb, group);
740 up_read(&grp->alloc_sem);
727 ext4_error(sb, "reserved inode or inode > inodes count - " 741 ext4_error(sb, "reserved inode or inode > inodes count - "
728 "block_group = %u, inode=%lu", group, 742 "block_group = %u, inode=%lu", group,
729 ino + group * EXT4_INODES_PER_GROUP(sb)); 743 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
772 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 786 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
773err_ret: 787err_ret:
774 ext4_unlock_group(sb, group); 788 ext4_unlock_group(sb, group);
789 up_read(&grp->alloc_sem);
775 return retval; 790 return retval;
776} 791}
777 792
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1205 } 1220 }
1206 return count; 1221 return count;
1207} 1222}
1223
1224/*
1225 * Zeroes not yet zeroed inode table - just write zeroes through the whole
1226 * inode table. Must be called without any spinlock held. The only place
1227 * where it is called from on active part of filesystem is ext4lazyinit
1228 * thread, so we do not need any special locks, however we have to prevent
1229 * inode allocation from the current group, so we take alloc_sem lock, to
1230 * block ext4_claim_inode until we are finished.
1231 */
1232extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1233 int barrier)
1234{
1235 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1236 struct ext4_sb_info *sbi = EXT4_SB(sb);
1237 struct ext4_group_desc *gdp = NULL;
1238 struct buffer_head *group_desc_bh;
1239 handle_t *handle;
1240 ext4_fsblk_t blk;
1241 int num, ret = 0, used_blks = 0;
1242
1243 /* This should not happen, but just to be sure check this */
1244 if (sb->s_flags & MS_RDONLY) {
1245 ret = 1;
1246 goto out;
1247 }
1248
1249 gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
1250 if (!gdp)
1251 goto out;
1252
1253 /*
1254 * We do not need to lock this, because we are the only one
1255 * handling this flag.
1256 */
1257 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1258 goto out;
1259
1260 handle = ext4_journal_start_sb(sb, 1);
1261 if (IS_ERR(handle)) {
1262 ret = PTR_ERR(handle);
1263 goto out;
1264 }
1265
1266 down_write(&grp->alloc_sem);
1267 /*
1268 * If inode bitmap was already initialized there may be some
1269 * used inodes so we need to skip blocks with used inodes in
1270 * inode table.
1271 */
1272 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
1273 used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
1274 ext4_itable_unused_count(sb, gdp)),
1275 sbi->s_inodes_per_block);
1276
1277 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1278 ext4_error(sb, "Something is wrong with group %u\n"
1279 "Used itable blocks: %d"
1280 "itable unused count: %u\n",
1281 group, used_blks,
1282 ext4_itable_unused_count(sb, gdp));
1283 ret = 1;
1284 goto out;
1285 }
1286
1287 blk = ext4_inode_table(sb, gdp) + used_blks;
1288 num = sbi->s_itb_per_group - used_blks;
1289
1290 BUFFER_TRACE(group_desc_bh, "get_write_access");
1291 ret = ext4_journal_get_write_access(handle,
1292 group_desc_bh);
1293 if (ret)
1294 goto err_out;
1295
1296 /*
1297 * Skip zeroout if the inode table is full. But we set the ZEROED
1298 * flag anyway, because obviously, when it is full it does not need
1299 * further zeroing.
1300 */
1301 if (unlikely(num == 0))
1302 goto skip_zeroout;
1303
1304 ext4_debug("going to zero out inode table in group %d\n",
1305 group);
1306 ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
1307 if (ret < 0)
1308 goto err_out;
1309 if (barrier)
1310 blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
1311
1312skip_zeroout:
1313 ext4_lock_group(sb, group);
1314 gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
1315 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1316 ext4_unlock_group(sb, group);
1317
1318 BUFFER_TRACE(group_desc_bh,
1319 "call ext4_handle_dirty_metadata");
1320 ret = ext4_handle_dirty_metadata(handle, NULL,
1321 group_desc_bh);
1322
1323err_out:
1324 up_write(&grp->alloc_sem);
1325 ext4_journal_stop(handle);
1326out:
1327 return ret;
1328}