diff options
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 260 |
1 files changed, 107 insertions, 153 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25d8c9781ad9..409c2ee7750a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
92 | return EXT4_INODES_PER_GROUP(sb); | 92 | return EXT4_INODES_PER_GROUP(sb); |
93 | } | 93 | } |
94 | 94 | ||
95 | void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) | ||
96 | { | ||
97 | if (uptodate) { | ||
98 | set_buffer_uptodate(bh); | ||
99 | set_bitmap_uptodate(bh); | ||
100 | } | ||
101 | unlock_buffer(bh); | ||
102 | put_bh(bh); | ||
103 | } | ||
104 | |||
95 | /* | 105 | /* |
96 | * Read the inode allocation bitmap for a given block_group, reading | 106 | * Read the inode allocation bitmap for a given block_group, reading |
97 | * into the specified slot in the superblock's bitmap cache. | 107 | * into the specified slot in the superblock's bitmap cache. |
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
147 | return bh; | 157 | return bh; |
148 | } | 158 | } |
149 | /* | 159 | /* |
150 | * submit the buffer_head for read. We can | 160 | * submit the buffer_head for reading |
151 | * safely mark the bitmap as uptodate now. | ||
152 | * We do it here so the bitmap uptodate bit | ||
153 | * get set with buffer lock held. | ||
154 | */ | 161 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | 162 | trace_ext4_load_inode_bitmap(sb, block_group); |
156 | set_bitmap_uptodate(bh); | 163 | bh->b_end_io = ext4_end_bitmap_read; |
157 | if (bh_submit_read(bh) < 0) { | 164 | get_bh(bh); |
165 | submit_bh(READ, bh); | ||
166 | wait_on_buffer(bh); | ||
167 | if (!buffer_uptodate(bh)) { | ||
158 | put_bh(bh); | 168 | put_bh(bh); |
159 | ext4_error(sb, "Cannot read inode bitmap - " | 169 | ext4_error(sb, "Cannot read inode bitmap - " |
160 | "block_group = %u, inode_bitmap = %llu", | 170 | "block_group = %u, inode_bitmap = %llu", |
161 | block_group, bitmap_blk); | 171 | block_group, bitmap_blk); |
162 | return NULL; | 172 | return NULL; |
163 | } | 173 | } |
164 | return bh; | 174 | return bh; |
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
194 | struct ext4_sb_info *sbi; | 204 | struct ext4_sb_info *sbi; |
195 | int fatal = 0, err, count, cleared; | 205 | int fatal = 0, err, count, cleared; |
196 | 206 | ||
197 | if (atomic_read(&inode->i_count) > 1) { | 207 | if (!sb) { |
198 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", | 208 | printk(KERN_ERR "EXT4-fs: %s:%d: inode on " |
199 | atomic_read(&inode->i_count)); | 209 | "nonexistent device\n", __func__, __LINE__); |
200 | return; | 210 | return; |
201 | } | 211 | } |
202 | if (inode->i_nlink) { | 212 | if (atomic_read(&inode->i_count) > 1) { |
203 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", | 213 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d", |
204 | inode->i_nlink); | 214 | __func__, __LINE__, inode->i_ino, |
215 | atomic_read(&inode->i_count)); | ||
205 | return; | 216 | return; |
206 | } | 217 | } |
207 | if (!sb) { | 218 | if (inode->i_nlink) { |
208 | printk(KERN_ERR "ext4_free_inode: inode on " | 219 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n", |
209 | "nonexistent device\n"); | 220 | __func__, __LINE__, inode->i_ino, inode->i_nlink); |
210 | return; | 221 | return; |
211 | } | 222 | } |
212 | sbi = EXT4_SB(sb); | 223 | sbi = EXT4_SB(sb); |
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
593 | } | 604 | } |
594 | 605 | ||
595 | /* | 606 | /* |
596 | * claim the inode from the inode bitmap. If the group | ||
597 | * is uninit we need to take the groups's ext4_group_lock | ||
598 | * and clear the uninit flag. The inode bitmap update | ||
599 | * and group desc uninit flag clear should be done | ||
600 | * after holding ext4_group_lock so that ext4_read_inode_bitmap | ||
601 | * doesn't race with the ext4_claim_inode | ||
602 | */ | ||
603 | static int ext4_claim_inode(struct super_block *sb, | ||
604 | struct buffer_head *inode_bitmap_bh, | ||
605 | unsigned long ino, ext4_group_t group, umode_t mode) | ||
606 | { | ||
607 | int free = 0, retval = 0, count; | ||
608 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
609 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
610 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | ||
611 | |||
612 | /* | ||
613 | * We have to be sure that new inode allocation does not race with | ||
614 | * inode table initialization, because otherwise we may end up | ||
615 | * allocating and writing new inode right before sb_issue_zeroout | ||
616 | * takes place and overwriting our new inode with zeroes. So we | ||
617 | * take alloc_sem to prevent it. | ||
618 | */ | ||
619 | down_read(&grp->alloc_sem); | ||
620 | ext4_lock_group(sb, group); | ||
621 | if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { | ||
622 | /* not a free inode */ | ||
623 | retval = 1; | ||
624 | goto err_ret; | ||
625 | } | ||
626 | ino++; | ||
627 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | ||
628 | ino > EXT4_INODES_PER_GROUP(sb)) { | ||
629 | ext4_unlock_group(sb, group); | ||
630 | up_read(&grp->alloc_sem); | ||
631 | ext4_error(sb, "reserved inode or inode > inodes count - " | ||
632 | "block_group = %u, inode=%lu", group, | ||
633 | ino + group * EXT4_INODES_PER_GROUP(sb)); | ||
634 | return 1; | ||
635 | } | ||
636 | /* If we didn't allocate from within the initialized part of the inode | ||
637 | * table then we need to initialize up to this inode. */ | ||
638 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
639 | |||
640 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
641 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | ||
642 | /* When marking the block group with | ||
643 | * ~EXT4_BG_INODE_UNINIT we don't want to depend | ||
644 | * on the value of bg_itable_unused even though | ||
645 | * mke2fs could have initialized the same for us. | ||
646 | * Instead we calculated the value below | ||
647 | */ | ||
648 | |||
649 | free = 0; | ||
650 | } else { | ||
651 | free = EXT4_INODES_PER_GROUP(sb) - | ||
652 | ext4_itable_unused_count(sb, gdp); | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * Check the relative inode number against the last used | ||
657 | * relative inode number in this group. if it is greater | ||
658 | * we need to update the bg_itable_unused count | ||
659 | * | ||
660 | */ | ||
661 | if (ino > free) | ||
662 | ext4_itable_unused_set(sb, gdp, | ||
663 | (EXT4_INODES_PER_GROUP(sb) - ino)); | ||
664 | } | ||
665 | count = ext4_free_inodes_count(sb, gdp) - 1; | ||
666 | ext4_free_inodes_set(sb, gdp, count); | ||
667 | if (S_ISDIR(mode)) { | ||
668 | count = ext4_used_dirs_count(sb, gdp) + 1; | ||
669 | ext4_used_dirs_set(sb, gdp, count); | ||
670 | if (sbi->s_log_groups_per_flex) { | ||
671 | ext4_group_t f = ext4_flex_group(sbi, group); | ||
672 | |||
673 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); | ||
674 | } | ||
675 | } | ||
676 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
677 | err_ret: | ||
678 | ext4_unlock_group(sb, group); | ||
679 | up_read(&grp->alloc_sem); | ||
680 | return retval; | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * There are two policies for allocating an inode. If the new inode is | 607 | * There are two policies for allocating an inode. If the new inode is |
685 | * a directory, then a forward search is made for a block group with both | 608 | * a directory, then a forward search is made for a block group with both |
686 | * free space and a low directory-to-inode ratio; if that fails, then of | 609 | * free space and a low directory-to-inode ratio; if that fails, then of |
@@ -741,6 +664,11 @@ got_group: | |||
741 | if (ret2 == -1) | 664 | if (ret2 == -1) |
742 | goto out; | 665 | goto out; |
743 | 666 | ||
667 | /* | ||
668 | * Normally we will only go through one pass of this loop, | ||
669 | * unless we get unlucky and it turns out the group we selected | ||
670 | * had its last inode grabbed by someone else. | ||
671 | */ | ||
744 | for (i = 0; i < ngroups; i++, ino = 0) { | 672 | for (i = 0; i < ngroups; i++, ino = 0) { |
745 | err = -EIO; | 673 | err = -EIO; |
746 | 674 | ||
@@ -757,51 +685,24 @@ repeat_in_this_group: | |||
757 | ino = ext4_find_next_zero_bit((unsigned long *) | 685 | ino = ext4_find_next_zero_bit((unsigned long *) |
758 | inode_bitmap_bh->b_data, | 686 | inode_bitmap_bh->b_data, |
759 | EXT4_INODES_PER_GROUP(sb), ino); | 687 | EXT4_INODES_PER_GROUP(sb), ino); |
760 | 688 | if (ino >= EXT4_INODES_PER_GROUP(sb)) { | |
761 | if (ino < EXT4_INODES_PER_GROUP(sb)) { | 689 | if (++group == ngroups) |
762 | 690 | group = 0; | |
763 | BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); | 691 | continue; |
764 | err = ext4_journal_get_write_access(handle, | ||
765 | inode_bitmap_bh); | ||
766 | if (err) | ||
767 | goto fail; | ||
768 | |||
769 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
770 | err = ext4_journal_get_write_access(handle, | ||
771 | group_desc_bh); | ||
772 | if (err) | ||
773 | goto fail; | ||
774 | if (!ext4_claim_inode(sb, inode_bitmap_bh, | ||
775 | ino, group, mode)) { | ||
776 | /* we won it */ | ||
777 | BUFFER_TRACE(inode_bitmap_bh, | ||
778 | "call ext4_handle_dirty_metadata"); | ||
779 | err = ext4_handle_dirty_metadata(handle, | ||
780 | NULL, | ||
781 | inode_bitmap_bh); | ||
782 | if (err) | ||
783 | goto fail; | ||
784 | /* zero bit is inode number 1*/ | ||
785 | ino++; | ||
786 | goto got; | ||
787 | } | ||
788 | /* we lost it */ | ||
789 | ext4_handle_release_buffer(handle, inode_bitmap_bh); | ||
790 | ext4_handle_release_buffer(handle, group_desc_bh); | ||
791 | |||
792 | if (++ino < EXT4_INODES_PER_GROUP(sb)) | ||
793 | goto repeat_in_this_group; | ||
794 | } | 692 | } |
795 | 693 | if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { | |
796 | /* | 694 | ext4_error(sb, "reserved inode found cleared - " |
797 | * This case is possible in concurrent environment. It is very | 695 | "inode=%lu", ino + 1); |
798 | * rare. We cannot repeat the find_group_xxx() call because | 696 | continue; |
799 | * that will simply return the same blockgroup, because the | 697 | } |
800 | * group descriptor metadata has not yet been updated. | 698 | ext4_lock_group(sb, group); |
801 | * So we just go onto the next blockgroup. | 699 | ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); |
802 | */ | 700 | ext4_unlock_group(sb, group); |
803 | if (++group == ngroups) | 701 | ino++; /* the inode bitmap is zero-based */ |
804 | group = 0; | 702 | if (!ret2) |
703 | goto got; /* we grabbed the inode! */ | ||
704 | if (ino < EXT4_INODES_PER_GROUP(sb)) | ||
705 | goto repeat_in_this_group; | ||
805 | } | 706 | } |
806 | err = -ENOSPC; | 707 | err = -ENOSPC; |
807 | goto out; | 708 | goto out; |
@@ -838,6 +739,59 @@ got: | |||
838 | if (err) | 739 | if (err) |
839 | goto fail; | 740 | goto fail; |
840 | } | 741 | } |
742 | |||
743 | BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); | ||
744 | err = ext4_journal_get_write_access(handle, inode_bitmap_bh); | ||
745 | if (err) | ||
746 | goto fail; | ||
747 | |||
748 | BUFFER_TRACE(group_desc_bh, "get_write_access"); | ||
749 | err = ext4_journal_get_write_access(handle, group_desc_bh); | ||
750 | if (err) | ||
751 | goto fail; | ||
752 | |||
753 | /* Update the relevant bg descriptor fields */ | ||
754 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
755 | int free; | ||
756 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | ||
757 | |||
758 | down_read(&grp->alloc_sem); /* protect vs itable lazyinit */ | ||
759 | ext4_lock_group(sb, group); /* while we modify the bg desc */ | ||
760 | free = EXT4_INODES_PER_GROUP(sb) - | ||
761 | ext4_itable_unused_count(sb, gdp); | ||
762 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
763 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | ||
764 | free = 0; | ||
765 | } | ||
766 | /* | ||
767 | * Check the relative inode number against the last used | ||
768 | * relative inode number in this group. if it is greater | ||
769 | * we need to update the bg_itable_unused count | ||
770 | */ | ||
771 | if (ino > free) | ||
772 | ext4_itable_unused_set(sb, gdp, | ||
773 | (EXT4_INODES_PER_GROUP(sb) - ino)); | ||
774 | up_read(&grp->alloc_sem); | ||
775 | } | ||
776 | ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); | ||
777 | if (S_ISDIR(mode)) { | ||
778 | ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); | ||
779 | if (sbi->s_log_groups_per_flex) { | ||
780 | ext4_group_t f = ext4_flex_group(sbi, group); | ||
781 | |||
782 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); | ||
783 | } | ||
784 | } | ||
785 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
787 | ext4_unlock_group(sb, group); | ||
788 | } | ||
789 | |||
790 | BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); | ||
791 | err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); | ||
792 | if (err) | ||
793 | goto fail; | ||
794 | |||
841 | BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); | 795 | BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); |
842 | err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); | 796 | err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); |
843 | if (err) | 797 | if (err) |
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1101 | * where it is called from on active part of filesystem is ext4lazyinit | 1055 | * where it is called from on active part of filesystem is ext4lazyinit |
1102 | * thread, so we do not need any special locks, however we have to prevent | 1056 | * thread, so we do not need any special locks, however we have to prevent |
1103 | * inode allocation from the current group, so we take alloc_sem lock, to | 1057 | * inode allocation from the current group, so we take alloc_sem lock, to |
1104 | * block ext4_claim_inode until we are finished. | 1058 | * block ext4_new_inode() until we are finished. |
1105 | */ | 1059 | */ |
1106 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | 1060 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
1107 | int barrier) | 1061 | int barrier) |
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | |||
1149 | sbi->s_inodes_per_block); | 1103 | sbi->s_inodes_per_block); |
1150 | 1104 | ||
1151 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { | 1105 | if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { |
1152 | ext4_error(sb, "Something is wrong with group %u\n" | 1106 | ext4_error(sb, "Something is wrong with group %u: " |
1153 | "Used itable blocks: %d" | 1107 | "used itable blocks: %d; " |
1154 | "itable unused count: %u\n", | 1108 | "itable unused count: %u", |
1155 | group, used_blks, | 1109 | group, used_blks, |
1156 | ext4_itable_unused_count(sb, gdp)); | 1110 | ext4_itable_unused_count(sb, gdp)); |
1157 | ret = 1; | 1111 | ret = 1; |