aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.com>2010-08-13 18:15:17 -0400
committerTao Ma <tao.ma@oracle.com>2010-09-08 02:25:59 -0400
commite49e27674d1dd2717ad90b21ece8f83102153315 (patch)
tree3ef61e9e4273a236dde61af12cb1e43d8c421c21
parentd51349829c378c06ba4aa7d4b16ca23739858608 (diff)
ocfs2: allow return of new inode block location before allocation of the inode
This allows code which needs to know the eventual block number of an inode but can't allocate it yet due to transaction or lock ordering. For example, ocfs2_create_inode_in_orphan() currently gives a junk blkno for preparation of the orphan dir because it can't yet know where the actual inode is placed - that code is actually in ocfs2_mknod_locked. This is a problem when the orphan dirs are indexed as the junk inode number will create an index entry which goes unused (and fails the later removal from the orphan dir). Now with these interfaces, ocfs2_create_inode_in_orphan() can run the block group search (and get back the inode block number) *before* any actual allocation occurs. Signed-off-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: Tao Ma <tao.ma@oracle.com>
-rw-r--r--fs/ocfs2/suballoc.c159
-rw-r--r--fs/ocfs2/suballoc.h21
2 files changed, 180 insertions, 0 deletions
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index e7edda8c6a11..8a286f54dca1 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -57,6 +57,12 @@ struct ocfs2_suballoc_result {
57 u64 sr_bg_blkno; /* The bg we allocated from. Set 57 u64 sr_bg_blkno; /* The bg we allocated from. Set
58 to 0 when a block group is 58 to 0 when a block group is
59 contiguous. */ 59 contiguous. */
60 u64 sr_bg_stable_blkno; /*
61 * Doesn't change, always
62 * set to target block
63 * group descriptor
64 * block.
65 */
60 u64 sr_blkno; /* The first allocated block */ 66 u64 sr_blkno; /* The first allocated block */
61 unsigned int sr_bit_offset; /* The bit in the bg */ 67 unsigned int sr_bit_offset; /* The bit in the bg */
62 unsigned int sr_bits; /* How many bits we claimed */ 68 unsigned int sr_bits; /* How many bits we claimed */
@@ -149,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
149 brelse(ac->ac_bh); 155 brelse(ac->ac_bh);
150 ac->ac_bh = NULL; 156 ac->ac_bh = NULL;
151 ac->ac_resv = NULL; 157 ac->ac_resv = NULL;
158 if (ac->ac_find_loc_priv) {
159 kfree(ac->ac_find_loc_priv);
160 ac->ac_find_loc_priv = NULL;
161 }
152} 162}
153 163
154void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 164void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -1689,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1689 if (!ret) 1699 if (!ret)
1690 ocfs2_bg_discontig_fix_result(ac, gd, res); 1700 ocfs2_bg_discontig_fix_result(ac, gd, res);
1691 1701
1702 /*
1703 * sr_bg_blkno might have been changed by
1704 * ocfs2_bg_discontig_fix_result
1705 */
1706 res->sr_bg_stable_blkno = group_bh->b_blocknr;
1707
1708 if (ac->ac_find_loc_only)
1709 goto out_loc_only;
1710
1692 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, 1711 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1693 res->sr_bits, 1712 res->sr_bits,
1694 le16_to_cpu(gd->bg_chain)); 1713 le16_to_cpu(gd->bg_chain));
@@ -1702,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1702 if (ret < 0) 1721 if (ret < 0)
1703 mlog_errno(ret); 1722 mlog_errno(ret);
1704 1723
1724out_loc_only:
1705 *bits_left = le16_to_cpu(gd->bg_free_bits_count); 1725 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1706 1726
1707out: 1727out:
@@ -1780,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1780 if (!status) 1800 if (!status)
1781 ocfs2_bg_discontig_fix_result(ac, bg, res); 1801 ocfs2_bg_discontig_fix_result(ac, bg, res);
1782 1802
1803 /*
1804 * sr_bg_blkno might have been changed by
1805 * ocfs2_bg_discontig_fix_result
1806 */
1807 res->sr_bg_stable_blkno = group_bh->b_blocknr;
1783 1808
1784 /* 1809 /*
1785 * Keep track of previous block descriptor read. When 1810 * Keep track of previous block descriptor read. When
@@ -1806,6 +1831,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1806 } 1831 }
1807 } 1832 }
1808 1833
1834 if (ac->ac_find_loc_only)
1835 goto out_loc_only;
1836
1809 status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, 1837 status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
1810 ac->ac_bh, res->sr_bits, 1838 ac->ac_bh, res->sr_bits,
1811 chain); 1839 chain);
@@ -1828,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1828 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, 1856 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
1829 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1857 (unsigned long long)le64_to_cpu(fe->i_blkno));
1830 1858
1859out_loc_only:
1831 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1860 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1832bail: 1861bail:
1833 brelse(group_bh); 1862 brelse(group_bh);
@@ -2023,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
2023 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; 2052 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
2024} 2053}
2025 2054
2055int ocfs2_find_new_inode_loc(struct inode *dir,
2056 struct buffer_head *parent_fe_bh,
2057 struct ocfs2_alloc_context *ac,
2058 u64 *fe_blkno)
2059{
2060 int ret;
2061 handle_t *handle = NULL;
2062 struct ocfs2_suballoc_result *res;
2063
2064 BUG_ON(!ac);
2065 BUG_ON(ac->ac_bits_given != 0);
2066 BUG_ON(ac->ac_bits_wanted != 1);
2067 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
2068
2069 res = kzalloc(sizeof(*res), GFP_NOFS);
2070 if (res == NULL) {
2071 ret = -ENOMEM;
2072 mlog_errno(ret);
2073 goto out;
2074 }
2075
2076 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
2077
2078 /*
2079 * The handle started here is for chain relink. Alternatively,
2080 * we could just disable relink for these calls.
2081 */
2082 handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
2083 if (IS_ERR(handle)) {
2084 ret = PTR_ERR(handle);
2085 handle = NULL;
2086 mlog_errno(ret);
2087 goto out;
2088 }
2089
2090 /*
2091 * This will instruct ocfs2_claim_suballoc_bits and
2092 * ocfs2_search_one_group to search but save actual allocation
2093 * for later.
2094 */
2095 ac->ac_find_loc_only = 1;
2096
2097 ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
2098 if (ret < 0) {
2099 mlog_errno(ret);
2100 goto out;
2101 }
2102
2103 ac->ac_find_loc_priv = res;
2104 *fe_blkno = res->sr_blkno;
2105
2106out:
2107 if (handle)
2108 ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
2109
2110 if (ret)
2111 kfree(res);
2112
2113 return ret;
2114}
2115
2116int ocfs2_claim_new_inode_at_loc(handle_t *handle,
2117 struct inode *dir,
2118 struct ocfs2_alloc_context *ac,
2119 u64 *suballoc_loc,
2120 u16 *suballoc_bit,
2121 u64 di_blkno)
2122{
2123 int ret;
2124 u16 chain;
2125 struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
2126 struct buffer_head *bg_bh = NULL;
2127 struct ocfs2_group_desc *bg;
2128 struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
2129
2130 /*
2131 * Since di_blkno is being passed back in, we check for any
2132 * inconsistencies which may have happened between
2133 * calls. These are code bugs as di_blkno is not expected to
2134 * change once returned from ocfs2_find_new_inode_loc()
2135 */
2136 BUG_ON(res->sr_blkno != di_blkno);
2137
2138 ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
2139 res->sr_bg_stable_blkno, &bg_bh);
2140 if (ret) {
2141 mlog_errno(ret);
2142 goto out;
2143 }
2144
2145 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
2146 chain = le16_to_cpu(bg->bg_chain);
2147
2148 ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
2149 ac->ac_bh, res->sr_bits,
2150 chain);
2151 if (ret) {
2152 mlog_errno(ret);
2153 goto out;
2154 }
2155
2156 ret = ocfs2_block_group_set_bits(handle,
2157 ac->ac_inode,
2158 bg,
2159 bg_bh,
2160 res->sr_bit_offset,
2161 res->sr_bits);
2162 if (ret < 0) {
2163 mlog_errno(ret);
2164 goto out;
2165 }
2166
2167 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
2168 (unsigned long long)di_blkno);
2169
2170 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2171
2172 BUG_ON(res->sr_bits != 1);
2173
2174 *suballoc_loc = res->sr_bg_blkno;
2175 *suballoc_bit = res->sr_bit_offset;
2176 ac->ac_bits_given++;
2177 ocfs2_save_inode_ac_group(dir, ac);
2178
2179out:
2180 brelse(bg_bh);
2181
2182 return ret;
2183}
2184
2026int ocfs2_claim_new_inode(handle_t *handle, 2185int ocfs2_claim_new_inode(handle_t *handle,
2027 struct inode *dir, 2186 struct inode *dir,
2028 struct buffer_head *parent_fe_bh, 2187 struct buffer_head *parent_fe_bh,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a017dd3ee7d9..b8afabfeede4 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context {
56 u64 ac_max_block; /* Highest block number to allocate. 0 is 56 u64 ac_max_block; /* Highest block number to allocate. 0 is
57 is the same as ~0 - unlimited */ 57 is the same as ~0 - unlimited */
58 58
59 int ac_find_loc_only; /* hack for reflink operation ordering */
60 struct ocfs2_suballoc_result *ac_find_loc_priv; /* */
61
59 struct ocfs2_alloc_reservation *ac_resv; 62 struct ocfs2_alloc_reservation *ac_resv;
60}; 63};
61 64
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
197 struct ocfs2_alloc_context **meta_ac); 200 struct ocfs2_alloc_context **meta_ac);
198 201
199int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); 202int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
203
204
205
206/*
207 * The following two interfaces are for ocfs2_create_inode_in_orphan().
208 */
209int ocfs2_find_new_inode_loc(struct inode *dir,
210 struct buffer_head *parent_fe_bh,
211 struct ocfs2_alloc_context *ac,
212 u64 *fe_blkno);
213
214int ocfs2_claim_new_inode_at_loc(handle_t *handle,
215 struct inode *dir,
216 struct ocfs2_alloc_context *ac,
217 u64 *suballoc_loc,
218 u16 *suballoc_bit,
219 u64 di_blkno);
220
200#endif /* _CHAINALLOC_H_ */ 221#endif /* _CHAINALLOC_H_ */