aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ialloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r--fs/ext4/ialloc.c260
1 files changed, 107 insertions, 153 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25d8c9781ad9..409c2ee7750a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
92 return EXT4_INODES_PER_GROUP(sb); 92 return EXT4_INODES_PER_GROUP(sb);
93} 93}
94 94
95void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
96{
97 if (uptodate) {
98 set_buffer_uptodate(bh);
99 set_bitmap_uptodate(bh);
100 }
101 unlock_buffer(bh);
102 put_bh(bh);
103}
104
95/* 105/*
96 * Read the inode allocation bitmap for a given block_group, reading 106 * Read the inode allocation bitmap for a given block_group, reading
97 * into the specified slot in the superblock's bitmap cache. 107 * into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
147 return bh; 157 return bh;
148 } 158 }
149 /* 159 /*
150 * submit the buffer_head for read. We can 160 * submit the buffer_head for reading
151 * safely mark the bitmap as uptodate now.
152 * We do it here so the bitmap uptodate bit
153 * get set with buffer lock held.
154 */ 161 */
155 trace_ext4_load_inode_bitmap(sb, block_group); 162 trace_ext4_load_inode_bitmap(sb, block_group);
156 set_bitmap_uptodate(bh); 163 bh->b_end_io = ext4_end_bitmap_read;
157 if (bh_submit_read(bh) < 0) { 164 get_bh(bh);
165 submit_bh(READ, bh);
166 wait_on_buffer(bh);
167 if (!buffer_uptodate(bh)) {
158 put_bh(bh); 168 put_bh(bh);
159 ext4_error(sb, "Cannot read inode bitmap - " 169 ext4_error(sb, "Cannot read inode bitmap - "
160 "block_group = %u, inode_bitmap = %llu", 170 "block_group = %u, inode_bitmap = %llu",
161 block_group, bitmap_blk); 171 block_group, bitmap_blk);
162 return NULL; 172 return NULL;
163 } 173 }
164 return bh; 174 return bh;
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
194 struct ext4_sb_info *sbi; 204 struct ext4_sb_info *sbi;
195 int fatal = 0, err, count, cleared; 205 int fatal = 0, err, count, cleared;
196 206
197 if (atomic_read(&inode->i_count) > 1) { 207 if (!sb) {
198 printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", 208 printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
199 atomic_read(&inode->i_count)); 209 "nonexistent device\n", __func__, __LINE__);
200 return; 210 return;
201 } 211 }
202 if (inode->i_nlink) { 212 if (atomic_read(&inode->i_count) > 1) {
203 printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", 213 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
204 inode->i_nlink); 214 __func__, __LINE__, inode->i_ino,
215 atomic_read(&inode->i_count));
205 return; 216 return;
206 } 217 }
207 if (!sb) { 218 if (inode->i_nlink) {
208 printk(KERN_ERR "ext4_free_inode: inode on " 219 ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
209 "nonexistent device\n"); 220 __func__, __LINE__, inode->i_ino, inode->i_nlink);
210 return; 221 return;
211 } 222 }
212 sbi = EXT4_SB(sb); 223 sbi = EXT4_SB(sb);
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
593} 604}
594 605
595/* 606/*
596 * claim the inode from the inode bitmap. If the group
597 * is uninit we need to take the groups's ext4_group_lock
598 * and clear the uninit flag. The inode bitmap update
599 * and group desc uninit flag clear should be done
600 * after holding ext4_group_lock so that ext4_read_inode_bitmap
601 * doesn't race with the ext4_claim_inode
602 */
603static int ext4_claim_inode(struct super_block *sb,
604 struct buffer_head *inode_bitmap_bh,
605 unsigned long ino, ext4_group_t group, umode_t mode)
606{
607 int free = 0, retval = 0, count;
608 struct ext4_sb_info *sbi = EXT4_SB(sb);
609 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
610 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
611
612 /*
613 * We have to be sure that new inode allocation does not race with
614 * inode table initialization, because otherwise we may end up
615 * allocating and writing new inode right before sb_issue_zeroout
616 * takes place and overwriting our new inode with zeroes. So we
617 * take alloc_sem to prevent it.
618 */
619 down_read(&grp->alloc_sem);
620 ext4_lock_group(sb, group);
621 if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
622 /* not a free inode */
623 retval = 1;
624 goto err_ret;
625 }
626 ino++;
627 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
628 ino > EXT4_INODES_PER_GROUP(sb)) {
629 ext4_unlock_group(sb, group);
630 up_read(&grp->alloc_sem);
631 ext4_error(sb, "reserved inode or inode > inodes count - "
632 "block_group = %u, inode=%lu", group,
633 ino + group * EXT4_INODES_PER_GROUP(sb));
634 return 1;
635 }
636 /* If we didn't allocate from within the initialized part of the inode
637 * table then we need to initialize up to this inode. */
638 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
639
640 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
641 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
642 /* When marking the block group with
643 * ~EXT4_BG_INODE_UNINIT we don't want to depend
644 * on the value of bg_itable_unused even though
645 * mke2fs could have initialized the same for us.
646 * Instead we calculated the value below
647 */
648
649 free = 0;
650 } else {
651 free = EXT4_INODES_PER_GROUP(sb) -
652 ext4_itable_unused_count(sb, gdp);
653 }
654
655 /*
656 * Check the relative inode number against the last used
657 * relative inode number in this group. if it is greater
658 * we need to update the bg_itable_unused count
659 *
660 */
661 if (ino > free)
662 ext4_itable_unused_set(sb, gdp,
663 (EXT4_INODES_PER_GROUP(sb) - ino));
664 }
665 count = ext4_free_inodes_count(sb, gdp) - 1;
666 ext4_free_inodes_set(sb, gdp, count);
667 if (S_ISDIR(mode)) {
668 count = ext4_used_dirs_count(sb, gdp) + 1;
669 ext4_used_dirs_set(sb, gdp, count);
670 if (sbi->s_log_groups_per_flex) {
671 ext4_group_t f = ext4_flex_group(sbi, group);
672
673 atomic_inc(&sbi->s_flex_groups[f].used_dirs);
674 }
675 }
676 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
677err_ret:
678 ext4_unlock_group(sb, group);
679 up_read(&grp->alloc_sem);
680 return retval;
681}
682
683/*
684 * There are two policies for allocating an inode. If the new inode is 607 * There are two policies for allocating an inode. If the new inode is
685 * a directory, then a forward search is made for a block group with both 608 * a directory, then a forward search is made for a block group with both
686 * free space and a low directory-to-inode ratio; if that fails, then of 609 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -741,6 +664,11 @@ got_group:
741 if (ret2 == -1) 664 if (ret2 == -1)
742 goto out; 665 goto out;
743 666
667 /*
668 * Normally we will only go through one pass of this loop,
669 * unless we get unlucky and it turns out the group we selected
670 * had its last inode grabbed by someone else.
671 */
744 for (i = 0; i < ngroups; i++, ino = 0) { 672 for (i = 0; i < ngroups; i++, ino = 0) {
745 err = -EIO; 673 err = -EIO;
746 674
@@ -757,51 +685,24 @@ repeat_in_this_group:
757 ino = ext4_find_next_zero_bit((unsigned long *) 685 ino = ext4_find_next_zero_bit((unsigned long *)
758 inode_bitmap_bh->b_data, 686 inode_bitmap_bh->b_data,
759 EXT4_INODES_PER_GROUP(sb), ino); 687 EXT4_INODES_PER_GROUP(sb), ino);
760 688 if (ino >= EXT4_INODES_PER_GROUP(sb)) {
761 if (ino < EXT4_INODES_PER_GROUP(sb)) { 689 if (++group == ngroups)
762 690 group = 0;
763 BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 691 continue;
764 err = ext4_journal_get_write_access(handle,
765 inode_bitmap_bh);
766 if (err)
767 goto fail;
768
769 BUFFER_TRACE(group_desc_bh, "get_write_access");
770 err = ext4_journal_get_write_access(handle,
771 group_desc_bh);
772 if (err)
773 goto fail;
774 if (!ext4_claim_inode(sb, inode_bitmap_bh,
775 ino, group, mode)) {
776 /* we won it */
777 BUFFER_TRACE(inode_bitmap_bh,
778 "call ext4_handle_dirty_metadata");
779 err = ext4_handle_dirty_metadata(handle,
780 NULL,
781 inode_bitmap_bh);
782 if (err)
783 goto fail;
784 /* zero bit is inode number 1*/
785 ino++;
786 goto got;
787 }
788 /* we lost it */
789 ext4_handle_release_buffer(handle, inode_bitmap_bh);
790 ext4_handle_release_buffer(handle, group_desc_bh);
791
792 if (++ino < EXT4_INODES_PER_GROUP(sb))
793 goto repeat_in_this_group;
794 } 692 }
795 693 if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
796 /* 694 ext4_error(sb, "reserved inode found cleared - "
797 * This case is possible in concurrent environment. It is very 695 "inode=%lu", ino + 1);
798 * rare. We cannot repeat the find_group_xxx() call because 696 continue;
799 * that will simply return the same blockgroup, because the 697 }
800 * group descriptor metadata has not yet been updated. 698 ext4_lock_group(sb, group);
801 * So we just go onto the next blockgroup. 699 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
802 */ 700 ext4_unlock_group(sb, group);
803 if (++group == ngroups) 701 ino++; /* the inode bitmap is zero-based */
804 group = 0; 702 if (!ret2)
703 goto got; /* we grabbed the inode! */
704 if (ino < EXT4_INODES_PER_GROUP(sb))
705 goto repeat_in_this_group;
805 } 706 }
806 err = -ENOSPC; 707 err = -ENOSPC;
807 goto out; 708 goto out;
@@ -838,6 +739,59 @@ got:
838 if (err) 739 if (err)
839 goto fail; 740 goto fail;
840 } 741 }
742
743 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
744 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
745 if (err)
746 goto fail;
747
748 BUFFER_TRACE(group_desc_bh, "get_write_access");
749 err = ext4_journal_get_write_access(handle, group_desc_bh);
750 if (err)
751 goto fail;
752
753 /* Update the relevant bg descriptor fields */
754 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
755 int free;
756 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
757
758 down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
759 ext4_lock_group(sb, group); /* while we modify the bg desc */
760 free = EXT4_INODES_PER_GROUP(sb) -
761 ext4_itable_unused_count(sb, gdp);
762 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
763 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
764 free = 0;
765 }
766 /*
767 * Check the relative inode number against the last used
768 * relative inode number in this group. if it is greater
769 * we need to update the bg_itable_unused count
770 */
771 if (ino > free)
772 ext4_itable_unused_set(sb, gdp,
773 (EXT4_INODES_PER_GROUP(sb) - ino));
774 up_read(&grp->alloc_sem);
775 }
776 ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
777 if (S_ISDIR(mode)) {
778 ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
779 if (sbi->s_log_groups_per_flex) {
780 ext4_group_t f = ext4_flex_group(sbi, group);
781
782 atomic_inc(&sbi->s_flex_groups[f].used_dirs);
783 }
784 }
785 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
786 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
787 ext4_unlock_group(sb, group);
788 }
789
790 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
791 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
792 if (err)
793 goto fail;
794
841 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 795 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
842 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 796 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
843 if (err) 797 if (err)
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1101 * where it is called from on active part of filesystem is ext4lazyinit 1055 * where it is called from on active part of filesystem is ext4lazyinit
1102 * thread, so we do not need any special locks, however we have to prevent 1056 * thread, so we do not need any special locks, however we have to prevent
1103 * inode allocation from the current group, so we take alloc_sem lock, to 1057 * inode allocation from the current group, so we take alloc_sem lock, to
1104 * block ext4_claim_inode until we are finished. 1058 * block ext4_new_inode() until we are finished.
1105 */ 1059 */
1106int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1060int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1107 int barrier) 1061 int barrier)
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1149 sbi->s_inodes_per_block); 1103 sbi->s_inodes_per_block);
1150 1104
1151 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { 1105 if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
1152 ext4_error(sb, "Something is wrong with group %u\n" 1106 ext4_error(sb, "Something is wrong with group %u: "
1153 "Used itable blocks: %d" 1107 "used itable blocks: %d; "
1154 "itable unused count: %u\n", 1108 "itable unused count: %u",
1155 group, used_blks, 1109 group, used_blks,
1156 ext4_itable_unused_count(sb, gdp)); 1110 ext4_itable_unused_count(sb, gdp));
1157 ret = 1; 1111 ret = 1;