aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 20:19:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 20:19:27 -0400
commitae67d9a888a000a9df43de10eb9950075e93508c (patch)
tree4251a8b5fb19fb294b917bd36b4d0b92d2e5c51b /fs/ext4/ialloc.c
parent71c7356f864dc41e4bd6b884596a422f8954afe0 (diff)
parentad4eec613536dc7e5ea0c6e59849e6edca634d8b (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "New features for 3.12: - Added aggressive extent caching using the extent status tree. This can actually decrease memory usage in read-mostly workloads since the information is much more compactly stored in the extent status tree than if we had to keep the extent tree metadata blocks in the buffer cache. This also improves Asynchronous I/O since it is it makes much less likely that we need to do metadata I/O to lookup the extent tree information. - Improve the recovery after corrupted allocation bitmaps are found when running in errors=ignore mode. Also fixed some writeback vs truncate races when using a blocksize less than the page size" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits) ext4: allow specifying external journal by pathname mount option ext4: mark group corrupt on group descriptor checksum ext4: mark block group as corrupt on inode bitmap error ext4: mark block group as corrupt on block bitmap error ext4: fix type declaration of ext4_validate_block_bitmap ext4: error out if verifying the block bitmap fails jbd2: Fix endian mixing problems in the checksumming code ext4: isolate ext4_extents.h file ext4: Fix misspellings using 'codespell' tool ext4: convert write_begin methods to stable_page_writes semantics ext4: fix use of potentially uninitialized variables in debugging code ext4: fix lost truncate due to race with writeback ext4: simplify truncation code in ext4_setattr() ext4: fix ext4_writepages() in presence of truncate ext4: move test whether extent to map can be extended to one place ext4: fix warning in ext4_da_update_reserve_space() quota: provide interface for readding allocated space into reserved space ext4: avoid reusing recently deleted inodes in no journal mode ext4: allocate delayed allocation blocks before rename ext4: start handle at least possible moment when renaming files ...
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c90
1 files changed, 80 insertions, 10 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 8bf5999875ee..137193ff389b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -70,18 +70,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
70 ext4_group_t block_group, 70 ext4_group_t block_group,
71 struct ext4_group_desc *gdp) 71 struct ext4_group_desc *gdp)
72{ 72{
73 struct ext4_group_info *grp;
73 J_ASSERT_BH(bh, buffer_locked(bh)); 74 J_ASSERT_BH(bh, buffer_locked(bh));
74 75
75 /* If checksum is bad mark all blocks and inodes use to prevent 76 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 77 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
78 ext4_error(sb, "Checksum bad for group %u", block_group); 79 ext4_error(sb, "Checksum bad for group %u", block_group);
79 ext4_free_group_clusters_set(sb, gdp, 0); 80 grp = ext4_get_group_info(sb, block_group);
80 ext4_free_inodes_set(sb, gdp, 0); 81 set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
81 ext4_itable_unused_set(sb, gdp, 0); 82 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
82 memset(bh->b_data, 0xff, sb->s_blocksize);
83 ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
84 EXT4_INODES_PER_GROUP(sb) / 8);
85 return 0; 83 return 0;
86 } 84 }
87 85
@@ -117,6 +115,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
117 struct ext4_group_desc *desc; 115 struct ext4_group_desc *desc;
118 struct buffer_head *bh = NULL; 116 struct buffer_head *bh = NULL;
119 ext4_fsblk_t bitmap_blk; 117 ext4_fsblk_t bitmap_blk;
118 struct ext4_group_info *grp;
120 119
121 desc = ext4_get_group_desc(sb, block_group, NULL); 120 desc = ext4_get_group_desc(sb, block_group, NULL);
122 if (!desc) 121 if (!desc)
@@ -185,6 +184,8 @@ verify:
185 put_bh(bh); 184 put_bh(bh);
186 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " 185 ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
187 "inode_bitmap = %llu", block_group, bitmap_blk); 186 "inode_bitmap = %llu", block_group, bitmap_blk);
187 grp = ext4_get_group_info(sb, block_group);
188 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
188 return NULL; 189 return NULL;
189 } 190 }
190 ext4_unlock_group(sb, block_group); 191 ext4_unlock_group(sb, block_group);
@@ -221,6 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
221 struct ext4_super_block *es; 222 struct ext4_super_block *es;
222 struct ext4_sb_info *sbi; 223 struct ext4_sb_info *sbi;
223 int fatal = 0, err, count, cleared; 224 int fatal = 0, err, count, cleared;
225 struct ext4_group_info *grp;
224 226
225 if (!sb) { 227 if (!sb) {
226 printk(KERN_ERR "EXT4-fs: %s:%d: inode on " 228 printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
@@ -266,7 +268,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
266 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 268 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
267 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 269 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
268 bitmap_bh = ext4_read_inode_bitmap(sb, block_group); 270 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
269 if (!bitmap_bh) 271 /* Don't bother if the inode bitmap is corrupt. */
272 grp = ext4_get_group_info(sb, block_group);
273 if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh)
270 goto error_return; 274 goto error_return;
271 275
272 BUFFER_TRACE(bitmap_bh, "get_write_access"); 276 BUFFER_TRACE(bitmap_bh, "get_write_access");
@@ -315,8 +319,10 @@ out:
315 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 319 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
316 if (!fatal) 320 if (!fatal)
317 fatal = err; 321 fatal = err;
318 } else 322 } else {
319 ext4_error(sb, "bit already cleared for inode %lu", ino); 323 ext4_error(sb, "bit already cleared for inode %lu", ino);
324 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
325 }
320 326
321error_return: 327error_return:
322 brelse(bitmap_bh); 328 brelse(bitmap_bh);
@@ -625,6 +631,51 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
625} 631}
626 632
627/* 633/*
634 * In no journal mode, if an inode has recently been deleted, we want
635 * to avoid reusing it until we're reasonably sure the inode table
636 * block has been written back to disk. (Yes, these values are
637 * somewhat arbitrary...)
638 */
639#define RECENTCY_MIN 5
640#define RECENTCY_DIRTY 30
641
642static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
643{
644 struct ext4_group_desc *gdp;
645 struct ext4_inode *raw_inode;
646 struct buffer_head *bh;
647 unsigned long dtime, now;
648 int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
649 int offset, ret = 0, recentcy = RECENTCY_MIN;
650
651 gdp = ext4_get_group_desc(sb, group, NULL);
652 if (unlikely(!gdp))
653 return 0;
654
655 bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
656 (ino / inodes_per_block));
657 if (unlikely(!bh) || !buffer_uptodate(bh))
658 /*
659 * If the block is not in the buffer cache, then it
660 * must have been written out.
661 */
662 goto out;
663
664 offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
665 raw_inode = (struct ext4_inode *) (bh->b_data + offset);
666 dtime = le32_to_cpu(raw_inode->i_dtime);
667 now = get_seconds();
668 if (buffer_dirty(bh))
669 recentcy += RECENTCY_DIRTY;
670
671 if (dtime && (dtime < now) && (now < dtime + recentcy))
672 ret = 1;
673out:
674 brelse(bh);
675 return ret;
676}
677
678/*
628 * There are two policies for allocating an inode. If the new inode is 679 * There are two policies for allocating an inode. If the new inode is
629 * a directory, then a forward search is made for a block group with both 680 * a directory, then a forward search is made for a block group with both
630 * free space and a low directory-to-inode ratio; if that fails, then of 681 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -652,6 +703,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
652 struct inode *ret; 703 struct inode *ret;
653 ext4_group_t i; 704 ext4_group_t i;
654 ext4_group_t flex_group; 705 ext4_group_t flex_group;
706 struct ext4_group_info *grp;
655 707
656 /* Cannot create files in a deleted directory */ 708 /* Cannot create files in a deleted directory */
657 if (!dir || !dir->i_nlink) 709 if (!dir || !dir->i_nlink)
@@ -725,10 +777,22 @@ got_group:
725 continue; 777 continue;
726 } 778 }
727 779
780 grp = ext4_get_group_info(sb, group);
781 /* Skip groups with already-known suspicious inode tables */
782 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
783 if (++group == ngroups)
784 group = 0;
785 continue;
786 }
787
728 brelse(inode_bitmap_bh); 788 brelse(inode_bitmap_bh);
729 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); 789 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
730 if (!inode_bitmap_bh) 790 /* Skip groups with suspicious inode tables */
731 goto out; 791 if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) {
792 if (++group == ngroups)
793 group = 0;
794 continue;
795 }
732 796
733repeat_in_this_group: 797repeat_in_this_group:
734 ino = ext4_find_next_zero_bit((unsigned long *) 798 ino = ext4_find_next_zero_bit((unsigned long *)
@@ -741,6 +805,11 @@ repeat_in_this_group:
741 "inode=%lu", ino + 1); 805 "inode=%lu", ino + 1);
742 continue; 806 continue;
743 } 807 }
808 if ((EXT4_SB(sb)->s_journal == NULL) &&
809 recently_deleted(sb, group, ino)) {
810 ino++;
811 goto next_inode;
812 }
744 if (!handle) { 813 if (!handle) {
745 BUG_ON(nblocks <= 0); 814 BUG_ON(nblocks <= 0);
746 handle = __ext4_journal_start_sb(dir->i_sb, line_no, 815 handle = __ext4_journal_start_sb(dir->i_sb, line_no,
@@ -764,6 +833,7 @@ repeat_in_this_group:
764 ino++; /* the inode bitmap is zero-based */ 833 ino++; /* the inode bitmap is zero-based */
765 if (!ret2) 834 if (!ret2)
766 goto got; /* we grabbed the inode! */ 835 goto got; /* we grabbed the inode! */
836next_inode:
767 if (ino < EXT4_INODES_PER_GROUP(sb)) 837 if (ino < EXT4_INODES_PER_GROUP(sb))
768 goto repeat_in_this_group; 838 goto repeat_in_this_group;
769next_group: 839next_group: