aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/suballoc.c
diff options
context:
space:
mode:
authorwengang wang <wen.gang.wang@oracle.com>2009-03-06 08:29:10 -0500
committerMark Fasheh <mfasheh@suse.com>2009-04-03 14:39:25 -0400
commit6ca497a83e592d64e050c4d04b6dedb8c915f39a (patch)
tree0b9cd611d6d907881841eca73d12a7f3b85f1716 /fs/ocfs2/suballoc.c
parent9405dccfd3201d2b76e120949bec81ba8cfbd2d0 (diff)
ocfs2: fix rare stale inode errors when exporting via nfs
For nfs exporting, ocfs2_get_dentry() returns the dentry for fh. ocfs2_get_dentry() may read from disk when the inode is not in memory, without any cross cluster lock. this leads to the file system loading a stale inode. This patch fixes above problem. Solution is that in case of inode is not in memory, we get the cluster lock(PR) of alloc inode where the inode in question is allocated from (this causes node on which deletion is done sync the alloc inode) before reading out the inode itsself. then we check the bitmap in the group (the inode in question allcated from) to see if the bit is clear. if it's clear then it's stale. if the bit is set, we then check generation as the existing code does. We have to read out the inode in question from disk first to know its alloc slot and allot bit. And if its not stale we read it out using ocfs2_iget(). The second read should then be from cache. And also we have to add a per superblock nfs_sync_lock to cover the lock for alloc inode and that for inode in question. this is because ocfs2_get_dentry() and ocfs2_delete_inode() lock on them in reverse order. nfs_sync_lock is locked in EX mode in ocfs2_get_dentry() and in PR mode in ocfs2_delete_inode(). so that mutliple ocfs2_delete_inode() can run concurrently in normal case. [mfasheh@suse.com: build warning fixes and comment cleanups] Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com> Acked-by: Joel Becker <joel.becker@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2/suballoc.c')
-rw-r--r--fs/ocfs2/suballoc.c159
1 files changed, 159 insertions, 0 deletions
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 4c1399cc03f3..b4ca5911caaf 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2185,3 +2185,162 @@ out:
2185 2185
2186 return ret; 2186 return ret;
2187} 2187}
2188
2189/*
2190 * Read the inode specified by blkno to get suballoc_slot and
2191 * suballoc_bit.
2192 */
2193static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2194 u16 *suballoc_slot, u16 *suballoc_bit)
2195{
2196 int status;
2197 struct buffer_head *inode_bh = NULL;
2198 struct ocfs2_dinode *inode_fe;
2199
2200 mlog_entry("blkno: %llu\n", blkno);
2201
2202 /* dirty read disk */
2203 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
2204 if (status < 0) {
2205 mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status);
2206 goto bail;
2207 }
2208
2209 inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
2210 if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
2211 mlog(ML_ERROR, "invalid inode %llu requested\n", blkno);
2212 status = -EINVAL;
2213 goto bail;
2214 }
2215
2216 if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT &&
2217 (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
2218 mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
2219 blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
2220 status = -EINVAL;
2221 goto bail;
2222 }
2223
2224 if (suballoc_slot)
2225 *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
2226 if (suballoc_bit)
2227 *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
2228
2229bail:
2230 brelse(inode_bh);
2231
2232 mlog_exit(status);
2233 return status;
2234}
2235
2236/*
2237 * test whether bit is SET in allocator bitmap or not. on success, 0
2238 * is returned and *res is 1 for SET; 0 otherwise. when fails, errno
2239 * is returned and *res is meaningless. Call this after you have
2240 * cluster locked against suballoc, or you may get a result based on
2241 * non-up2date contents
2242 */
2243static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2244 struct inode *suballoc,
2245 struct buffer_head *alloc_bh, u64 blkno,
2246 u16 bit, int *res)
2247{
2248 struct ocfs2_dinode *alloc_fe;
2249 struct ocfs2_group_desc *group;
2250 struct buffer_head *group_bh = NULL;
2251 u64 bg_blkno;
2252 int status;
2253
2254 mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit);
2255
2256 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
2257 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
2258 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
2259 (unsigned int)bit,
2260 ocfs2_bits_per_group(&alloc_fe->id2.i_chain));
2261 status = -EINVAL;
2262 goto bail;
2263 }
2264
2265 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2266 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
2267 &group_bh);
2268 if (status < 0) {
2269 mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status);
2270 goto bail;
2271 }
2272
2273 group = (struct ocfs2_group_desc *) group_bh->b_data;
2274 *res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
2275
2276bail:
2277 brelse(group_bh);
2278
2279 mlog_exit(status);
2280 return status;
2281}
2282
2283/*
2284 * Test if the bit representing this inode (blkno) is set in the
2285 * suballocator.
2286 *
2287 * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
2288 *
2289 * In the event of failure, a negative value is returned and *res is
2290 * meaningless.
2291 *
2292 * Callers must make sure to hold nfs_sync_lock to prevent
2293 * ocfs2_delete_inode() on another node from accessing the same
2294 * suballocator concurrently.
2295 */
2296int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2297{
2298 int status;
2299 u16 suballoc_bit = 0, suballoc_slot = 0;
2300 struct inode *inode_alloc_inode;
2301 struct buffer_head *alloc_bh = NULL;
2302
2303 mlog_entry("blkno: %llu", blkno);
2304
2305 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2306 &suballoc_bit);
2307 if (status < 0) {
2308 mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
2309 goto bail;
2310 }
2311
2312 inode_alloc_inode =
2313 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
2314 suballoc_slot);
2315 if (!inode_alloc_inode) {
2316 /* the error code could be inaccurate, but we are not able to
2317 * get the correct one. */
2318 status = -EINVAL;
2319 mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
2320 (u32)suballoc_slot);
2321 goto bail;
2322 }
2323
2324 mutex_lock(&inode_alloc_inode->i_mutex);
2325 status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
2326 if (status < 0) {
2327 mutex_unlock(&inode_alloc_inode->i_mutex);
2328 mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
2329 (u32)suballoc_slot, status);
2330 goto bail;
2331 }
2332
2333 status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
2334 blkno, suballoc_bit, res);
2335 if (status < 0)
2336 mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
2337
2338 ocfs2_inode_unlock(inode_alloc_inode, 0);
2339 mutex_unlock(&inode_alloc_inode->i_mutex);
2340
2341 iput(inode_alloc_inode);
2342 brelse(alloc_bh);
2343bail:
2344 mlog_exit(status);
2345 return status;
2346}