ext4: fold ext4_claim_inode into ext4_new_inode

The function ext4_claim_inode() is only called by one function, ext4_new_inode(), and by folding the functionality into ext4_new_inode(), we can remove almost 50 lines of code, and put all of the logic of allocating a new inode into a single place. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
author: Theodore Ts'o <tytso@mit.edu> 2012-02-06 20:12:03 -0500
committer: Theodore Ts'o <tytso@mit.edu> 2012-02-06 20:12:03 -0500
commit: 119c0d4460b001e44b41dcf73dc6ee794b98bd31 (patch)
tree: 55a278470454bf3a066eec7f807222782cc9b77d /fs
parent: 62aa2b537c6f5957afd98e29f96897419ed5ebab (diff)
1 files changed, 76 insertions, 133 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25d8c9781ad9..84e6e9a3986b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -593,94 +593,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 }
 /*
- * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's ext4_group_lock
- * and clear the uninit flag. The inode bitmap update
- * and group desc uninit flag clear should be done
- * after holding ext4_group_lock so that ext4_read_inode_bitmap
- * doesn't race with the ext4_claim_inode
- */
-static int ext4_claim_inode(struct super_block *sb,
-                        struct buffer_head *inode_bitmap_bh,
-                        unsigned long ino, ext4_group_t group, umode_t mode)
-{
-        int free = 0, retval = 0, count;
-        struct ext4_sb_info *sbi = EXT4_SB(sb);
-        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-        struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-        /*
-         * We have to be sure that new inode allocation does not race with
-         * inode table initialization, because otherwise we may end up
-         * allocating and writing new inode right before sb_issue_zeroout
-         * takes place and overwriting our new inode with zeroes. So we
-         * take alloc_sem to prevent it.
-         */
-        down_read(&grp->alloc_sem);
-        ext4_lock_group(sb, group);
-        if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
-                /* not a free inode */
-                retval = 1;
-                goto err_ret;
-        }
-        ino++;
-        if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
-                        ino > EXT4_INODES_PER_GROUP(sb)) {
-                ext4_unlock_group(sb, group);
-                up_read(&grp->alloc_sem);
-                ext4_error(sb, "reserved inode or inode > inodes count - "
-                           "block_group = %u, inode=%lu", group,
-                           ino + group * EXT4_INODES_PER_GROUP(sb));
-                return 1;
-        }
-        /* If we didn't allocate from within the initialized part of the inode
-         * table then we need to initialize up to this inode. */
-        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-                if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-                        gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-                        /* When marking the block group with
-                         * ~EXT4_BG_INODE_UNINIT we don't want to depend
-                         * on the value of bg_itable_unused even though
-                         * mke2fs could have initialized the same for us.
-                         * Instead we calculated the value below
-                         */
-                        free = 0;
-                } else {
-                        free = EXT4_INODES_PER_GROUP(sb) -
-                                ext4_itable_unused_count(sb, gdp);
-                }
-                /*
-                 * Check the relative inode number against the last used
-                 * relative inode number in this group. if it is greater
-                 * we need to  update the bg_itable_unused count
-                 *
-                 */
-                if (ino > free)
-                        ext4_itable_unused_set(sb, gdp,
-                                        (EXT4_INODES_PER_GROUP(sb) - ino));
-        }
-        count = ext4_free_inodes_count(sb, gdp) - 1;
-        ext4_free_inodes_set(sb, gdp, count);
-        if (S_ISDIR(mode)) {
-                count = ext4_used_dirs_count(sb, gdp) + 1;
-                ext4_used_dirs_set(sb, gdp, count);
-                if (sbi->s_log_groups_per_flex) {
-                        ext4_group_t f = ext4_flex_group(sbi, group);
-                        atomic_inc(&sbi->s_flex_groups[f].used_dirs);
-                }
-        }
-        gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-err_ret:
-        ext4_unlock_group(sb, group);
-        up_read(&grp->alloc_sem);
-        return retval;
-}
-/*
 * There are two policies for allocating an inode.  If the new inode is
 * a directory, then a forward search is made for a block group with both
 * free space and a low directory-to-inode ratio; if that fails, then of
@@ -741,6 +653,11 @@ got_group:
        if (ret2 == -1)
                goto out;
+        /*
+         * Normally we will only go through one pass of this loop,
+         * unless we get unlucky and it turns out the group we selected
+         * had its last inode grabbed by someone else.
+         */
        for (i = 0; i < ngroups; i++, ino = 0) {
                err = -EIO;
@@ -757,51 +674,24 @@ repeat_in_this_group:
                ino = ext4_find_next_zero_bit((unsigned long *)
                                              inode_bitmap_bh->b_data,
                                              EXT4_INODES_PER_GROUP(sb), ino);
+                if (ino >= EXT4_INODES_PER_GROUP(sb)) {
-                if (ino < EXT4_INODES_PER_GROUP(sb)) {
+                        if (++group == ngroups)
+                                group = 0;
-                        BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+                        continue;
-                        err = ext4_journal_get_write_access(handle,
-                                                            inode_bitmap_bh);
-                        if (err)
-                                goto fail;
-                        BUFFER_TRACE(group_desc_bh, "get_write_access");
-                        err = ext4_journal_get_write_access(handle,
-                                                                group_desc_bh);
-                        if (err)
-                                goto fail;
-                        if (!ext4_claim_inode(sb, inode_bitmap_bh,
-                                                ino, group, mode)) {
-                                /* we won it */
-                                BUFFER_TRACE(inode_bitmap_bh,
-                                        "call ext4_handle_dirty_metadata");
-                                err = ext4_handle_dirty_metadata(handle,
-                                                                 NULL,
-                                                        inode_bitmap_bh);
-                                if (err)
-                                        goto fail;
-                                /* zero bit is inode number 1*/
-                                ino++;
-                                goto got;
-                        }
-                        /* we lost it */
-                        ext4_handle_release_buffer(handle, inode_bitmap_bh);
-                        ext4_handle_release_buffer(handle, group_desc_bh);
-                        if (++ino < EXT4_INODES_PER_GROUP(sb))
-                                goto repeat_in_this_group;
                }
+                if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
-                /*
+                        ext4_error(sb, "reserved inode found cleared - "
-                 * This case is possible in concurrent environment.  It is very
+                                   "inode=%lu", ino + 1);
-                 * rare.  We cannot repeat the find_group_xxx() call because
+                        continue;
-                 * that will simply return the same blockgroup, because the
+                }
-                 * group descriptor metadata has not yet been updated.
+                ext4_lock_group(sb, group);
-                 * So we just go onto the next blockgroup.
+                ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
-                 */
+                ext4_unlock_group(sb, group);
-                if (++group == ngroups)
+                ino++;          /* the inode bitmap is zero-based */
-                        group = 0;
+                if (!ret2)
+                        goto got; /* we grabbed the inode! */
+                if (ino < EXT4_INODES_PER_GROUP(sb))
+                        goto repeat_in_this_group;
        }
        err = -ENOSPC;
        goto out;
@@ -838,6 +728,59 @@ got:
                if (err)
                        goto fail;
        }
+        BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+        err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+        if (err)
+                goto fail;
+        BUFFER_TRACE(group_desc_bh, "get_write_access");
+        err = ext4_journal_get_write_access(handle, group_desc_bh);
+        if (err)
+                goto fail;
+        /* Update the relevant bg descriptor fields */
+        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+                int free;
+                struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+                down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+                ext4_lock_group(sb, group); /* while we modify the bg desc */
+                free = EXT4_INODES_PER_GROUP(sb) -
+                        ext4_itable_unused_count(sb, gdp);
+                if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+                        gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+                        free = 0;
+                }
+                /*
+                 * Check the relative inode number against the last used
+                 * relative inode number in this group. if it is greater
+                 * we need to update the bg_itable_unused count
+                 */
+                if (ino > free)
+                        ext4_itable_unused_set(sb, gdp,
+                                        (EXT4_INODES_PER_GROUP(sb) - ino));
+                up_read(&grp->alloc_sem);
+        }
+        ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
+        if (S_ISDIR(mode)) {
+                ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
+                if (sbi->s_log_groups_per_flex) {
+                        ext4_group_t f = ext4_flex_group(sbi, group);
+                        atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+                }
+        }
+        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+                gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+                ext4_unlock_group(sb, group);
+        }
+        BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+        err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+        if (err)
+                goto fail;
        BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
        err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
        if (err)
@@ -1101,7 +1044,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
 * where it is called from on active part of filesystem is ext4lazyinit
 * thread, so we do not need any special locks, however we have to prevent
 * inode allocation from the current group, so we take alloc_sem lock, to
- * block ext4_claim_inode until we are finished.
+ * block ext4_new_inode() until we are finished.
 */
 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
                                 int barrier)
author	Theodore Ts'o <tytso@mit.edu>	2012-02-06 20:12:03 -0500
committer	Theodore Ts'o <tytso@mit.edu>	2012-02-06 20:12:03 -0500
commit	119c0d4460b001e44b41dcf73dc6ee794b98bd31 (patch)
tree	55a278470454bf3a066eec7f807222782cc9b77d /fs
parent	62aa2b537c6f5957afd98e29f96897419ed5ebab (diff)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25d8c9781ad9..84e6e9a3986b 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c
@@ -593,94 +593,6 @@ static int find_group_other(struct super_block sb, struct inode parent,
593	}	593	}
594		594
595	/*	595	/*
596	* claim the inode from the inode bitmap. If the group
597	* is uninit we need to take the groups's ext4_group_lock
598	* and clear the uninit flag. The inode bitmap update
599	* and group desc uninit flag clear should be done
600	* after holding ext4_group_lock so that ext4_read_inode_bitmap
601	* doesn't race with the ext4_claim_inode
602	*/
603	static int ext4_claim_inode(struct super_block *sb,
604	struct buffer_head *inode_bitmap_bh,
605	unsigned long ino, ext4_group_t group, umode_t mode)
606	{
607	int free = 0, retval = 0, count;
608	struct ext4_sb_info *sbi = EXT4_SB(sb);
609	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
610	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
611
612	/*
613	* We have to be sure that new inode allocation does not race with
614	* inode table initialization, because otherwise we may end up
615	* allocating and writing new inode right before sb_issue_zeroout
616	* takes place and overwriting our new inode with zeroes. So we
617	* take alloc_sem to prevent it.
618	*/
619	down_read(&grp->alloc_sem);
620	ext4_lock_group(sb, group);
621	if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
622	/* not a free inode */
623	retval = 1;
624	goto err_ret;
625	}
626	ino++;
627	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) \|\|
628	ino > EXT4_INODES_PER_GROUP(sb)) {
629	ext4_unlock_group(sb, group);
630	up_read(&grp->alloc_sem);
631	ext4_error(sb, "reserved inode or inode > inodes count - "
632	"block_group = %u, inode=%lu", group,
633	ino + group * EXT4_INODES_PER_GROUP(sb));
634	return 1;
635	}
636	/* If we didn't allocate from within the initialized part of the inode
637	* table then we need to initialize up to this inode. */
638	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
639
640	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
641	gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
642	/* When marking the block group with
643	* ~EXT4_BG_INODE_UNINIT we don't want to depend
644	* on the value of bg_itable_unused even though
645	* mke2fs could have initialized the same for us.
646	* Instead we calculated the value below
647	*/
648
649	free = 0;
650	} else {
651	free = EXT4_INODES_PER_GROUP(sb) -
652	ext4_itable_unused_count(sb, gdp);
653	}
654
655	/*
656	* Check the relative inode number against the last used
657	* relative inode number in this group. if it is greater
658	* we need to update the bg_itable_unused count
659	*
660	*/
661	if (ino > free)
662	ext4_itable_unused_set(sb, gdp,
663	(EXT4_INODES_PER_GROUP(sb) - ino));
664	}
665	count = ext4_free_inodes_count(sb, gdp) - 1;
666	ext4_free_inodes_set(sb, gdp, count);
667	if (S_ISDIR(mode)) {
668	count = ext4_used_dirs_count(sb, gdp) + 1;
669	ext4_used_dirs_set(sb, gdp, count);
670	if (sbi->s_log_groups_per_flex) {
671	ext4_group_t f = ext4_flex_group(sbi, group);
672
673	atomic_inc(&sbi->s_flex_groups[f].used_dirs);
674	}
675	}
676	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
677	err_ret:
678	ext4_unlock_group(sb, group);
679	up_read(&grp->alloc_sem);
680	return retval;
681	}
682
683	/*
684	* There are two policies for allocating an inode. If the new inode is	596	* There are two policies for allocating an inode. If the new inode is
685	* a directory, then a forward search is made for a block group with both	597	* a directory, then a forward search is made for a block group with both
686	* free space and a low directory-to-inode ratio; if that fails, then of	598	* free space and a low directory-to-inode ratio; if that fails, then of
@@ -741,6 +653,11 @@ got_group:
741	if (ret2 == -1)	653	if (ret2 == -1)
742	goto out;	654	goto out;
743		655
		656	/*
		657	* Normally we will only go through one pass of this loop,
		658	* unless we get unlucky and it turns out the group we selected
		659	* had its last inode grabbed by someone else.
		660	*/
744	for (i = 0; i < ngroups; i++, ino = 0) {	661	for (i = 0; i < ngroups; i++, ino = 0) {
745	err = -EIO;	662	err = -EIO;
746		663
@@ -757,51 +674,24 @@ repeat_in_this_group:
757	ino = ext4_find_next_zero_bit((unsigned long *)	674	ino = ext4_find_next_zero_bit((unsigned long *)
758	inode_bitmap_bh->b_data,	675	inode_bitmap_bh->b_data,
759	EXT4_INODES_PER_GROUP(sb), ino);	676	EXT4_INODES_PER_GROUP(sb), ino);
760		677	if (ino >= EXT4_INODES_PER_GROUP(sb)) {
761	if (ino < EXT4_INODES_PER_GROUP(sb)) {	678	if (++group == ngroups)
762		679	group = 0;
763	BUFFER_TRACE(inode_bitmap_bh, "get_write_access");	680	continue;
764	err = ext4_journal_get_write_access(handle,
765	inode_bitmap_bh);
766	if (err)
767	goto fail;
768
769	BUFFER_TRACE(group_desc_bh, "get_write_access");
770	err = ext4_journal_get_write_access(handle,
771	group_desc_bh);
772	if (err)
773	goto fail;
774	if (!ext4_claim_inode(sb, inode_bitmap_bh,
775	ino, group, mode)) {
776	/* we won it */
777	BUFFER_TRACE(inode_bitmap_bh,
778	"call ext4_handle_dirty_metadata");
779	err = ext4_handle_dirty_metadata(handle,
780	NULL,
781	inode_bitmap_bh);
782	if (err)
783	goto fail;
784	/* zero bit is inode number 1*/
785	ino++;
786	goto got;
787	}
788	/* we lost it */
789	ext4_handle_release_buffer(handle, inode_bitmap_bh);
790	ext4_handle_release_buffer(handle, group_desc_bh);
791
792	if (++ino < EXT4_INODES_PER_GROUP(sb))
793	goto repeat_in_this_group;
794	}	681	}
795		682	if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
796	/*	683	ext4_error(sb, "reserved inode found cleared - "
797	* This case is possible in concurrent environment. It is very	684	"inode=%lu", ino + 1);
798	* rare. We cannot repeat the find_group_xxx() call because	685	continue;
799	* that will simply return the same blockgroup, because the	686	}
800	* group descriptor metadata has not yet been updated.	687	ext4_lock_group(sb, group);
801	* So we just go onto the next blockgroup.	688	ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
802	*/	689	ext4_unlock_group(sb, group);
803	if (++group == ngroups)	690	ino++; /* the inode bitmap is zero-based */
804	group = 0;	691	if (!ret2)
		692	goto got; /* we grabbed the inode! */
		693	if (ino < EXT4_INODES_PER_GROUP(sb))
		694	goto repeat_in_this_group;
805	}	695	}
806	err = -ENOSPC;	696	err = -ENOSPC;
807	goto out;	697	goto out;
@@ -838,6 +728,59 @@ got:
838	if (err)	728	if (err)
839	goto fail;	729	goto fail;
840	}	730	}
		731
		732	BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
		733	err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
		734	if (err)
		735	goto fail;
		736
		737	BUFFER_TRACE(group_desc_bh, "get_write_access");
		738	err = ext4_journal_get_write_access(handle, group_desc_bh);
		739	if (err)
		740	goto fail;
		741
		742	/* Update the relevant bg descriptor fields */
		743	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
		744	int free;
		745	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
		746
		747	down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
		748	ext4_lock_group(sb, group); /* while we modify the bg desc */
		749	free = EXT4_INODES_PER_GROUP(sb) -
		750	ext4_itable_unused_count(sb, gdp);
		751	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
		752	gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
		753	free = 0;
		754	}
		755	/*
		756	* Check the relative inode number against the last used
		757	* relative inode number in this group. if it is greater
		758	* we need to update the bg_itable_unused count
		759	*/
		760	if (ino > free)
		761	ext4_itable_unused_set(sb, gdp,
		762	(EXT4_INODES_PER_GROUP(sb) - ino));
		763	up_read(&grp->alloc_sem);
		764	}
		765	ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
		766	if (S_ISDIR(mode)) {
		767	ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
		768	if (sbi->s_log_groups_per_flex) {
		769	ext4_group_t f = ext4_flex_group(sbi, group);
		770
		771	atomic_inc(&sbi->s_flex_groups[f].used_dirs);
		772	}
		773	}
		774	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
		775	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
		776	ext4_unlock_group(sb, group);
		777	}
		778
		779	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
		780	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
		781	if (err)
		782	goto fail;
		783
841	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");	784	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
842	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);	785	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
843	if (err)	786	if (err)
@@ -1101,7 +1044,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1101	* where it is called from on active part of filesystem is ext4lazyinit	1044	* where it is called from on active part of filesystem is ext4lazyinit
1102	* thread, so we do not need any special locks, however we have to prevent	1045	* thread, so we do not need any special locks, however we have to prevent
1103	* inode allocation from the current group, so we take alloc_sem lock, to	1046	* inode allocation from the current group, so we take alloc_sem lock, to
1104	* block ext4_claim_inode until we are finished.	1047	* block ext4_new_inode() until we are finished.
1105	*/	1048	*/
1106	int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,	1049	int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1107	int barrier)	1050	int barrier)