aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2006-09-22 20:28:19 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2006-09-24 16:50:46 -0400
commit24c19ef40474c3930597f31ae233dc06319bd881 (patch)
treee05b1cf72435d25bf47e67b206aa376bbea33b7d /fs
parentf9e2d82e6395cfa0802446b54b63cc412089d82c (diff)
ocfs2: Remove i_generation from inode lock names
OCFS2 puts inode meta data in the "lock value block" provided by the DLM. Typically, i_generation is encoded in the lock name so that a deleted inode on and a new one in the same block don't share the same lvb. Unfortunately, that scheme means that the read in ocfs2_read_locked_inode() is potentially thrown away as soon as the meta data lock is taken - we cannot encode the lock name without first knowing i_generation, which requires a disk read. This patch encodes i_generation in the inode meta data lvb, and removes the value from the inode meta data lock name. This way, the read can be covered by a lock, and at the same time we can distinguish between an up to date and a stale LVB. This will help cold-cache stat(2) performance in particular. Since this patch changes the protocol version, we take the opportunity to do a minor re-organization of two of the LVB fields. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlmglue.c42
-rw-r--r--fs/ocfs2/dlmglue.h7
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/inode.c146
-rw-r--r--fs/ocfs2/inode.h8
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/ocfs2/sysfile.c2
10 files changed, 170 insertions, 53 deletions
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index da42b515cd1d..4b46aac7d243 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,6 +44,9 @@
44 * locking semantics of the file system using the protocol. It should 44 * locking semantics of the file system using the protocol. It should
45 * be somewhere else, I'm sure, but right now it isn't. 45 * be somewhere else, I'm sure, but right now it isn't.
46 * 46 *
47 * New in version 4:
48 * - Remove i_generation from lock names for better stat performance.
49 *
47 * New in version 3: 50 * New in version 3:
48 * - Replace dentry votes with a cluster lock 51 * - Replace dentry votes with a cluster lock
49 * 52 *
@@ -51,7 +54,7 @@
51 * - full 64 bit i_size in the metadata lock lvbs 54 * - full 64 bit i_size in the metadata lock lvbs
52 * - introduction of "rw" lock and pushing meta/data locking down 55 * - introduction of "rw" lock and pushing meta/data locking down
53 */ 56 */
54#define O2NET_PROTOCOL_VERSION 3ULL 57#define O2NET_PROTOCOL_VERSION 4ULL
55struct o2net_handshake { 58struct o2net_handshake {
56 __be64 protocol_version; 59 __be64 protocol_version;
57 __be64 connector_id; 60 __be64 connector_id;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6cd84dffbbf4..ecb3cba22814 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -320,6 +320,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
320 320
321void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 321void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
322 enum ocfs2_lock_type type, 322 enum ocfs2_lock_type type,
323 unsigned int generation,
323 struct inode *inode) 324 struct inode *inode)
324{ 325{
325 struct ocfs2_lock_res_ops *ops; 326 struct ocfs2_lock_res_ops *ops;
@@ -341,7 +342,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
341 }; 342 };
342 343
343 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno, 344 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
344 inode->i_generation, res->l_name); 345 generation, res->l_name);
345 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode); 346 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
346} 347}
347 348
@@ -1173,17 +1174,19 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1173 1174
1174int ocfs2_create_new_lock(struct ocfs2_super *osb, 1175int ocfs2_create_new_lock(struct ocfs2_super *osb,
1175 struct ocfs2_lock_res *lockres, 1176 struct ocfs2_lock_res *lockres,
1176 int ex) 1177 int ex,
1178 int local)
1177{ 1179{
1178 int level = ex ? LKM_EXMODE : LKM_PRMODE; 1180 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1179 unsigned long flags; 1181 unsigned long flags;
1182 int lkm_flags = local ? LKM_LOCAL : 0;
1180 1183
1181 spin_lock_irqsave(&lockres->l_lock, flags); 1184 spin_lock_irqsave(&lockres->l_lock, flags);
1182 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); 1185 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1183 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL); 1186 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1184 spin_unlock_irqrestore(&lockres->l_lock, flags); 1187 spin_unlock_irqrestore(&lockres->l_lock, flags);
1185 1188
1186 return ocfs2_lock_create(osb, lockres, level, LKM_LOCAL); 1189 return ocfs2_lock_create(osb, lockres, level, lkm_flags);
1187} 1190}
1188 1191
1189/* Grants us an EX lock on the data and metadata resources, skipping 1192/* Grants us an EX lock on the data and metadata resources, skipping
@@ -1212,19 +1215,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1212 * on a resource which has an invalid one -- we'll set it 1215 * on a resource which has an invalid one -- we'll set it
1213 * valid when we release the EX. */ 1216 * valid when we release the EX. */
1214 1217
1215 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1); 1218 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
1216 if (ret) { 1219 if (ret) {
1217 mlog_errno(ret); 1220 mlog_errno(ret);
1218 goto bail; 1221 goto bail;
1219 } 1222 }
1220 1223
1221 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1); 1224 /*
1225 * We don't want to use LKM_LOCAL on a meta data lock as they
1226 * don't use a generation in their lock names.
1227 */
1228 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
1222 if (ret) { 1229 if (ret) {
1223 mlog_errno(ret); 1230 mlog_errno(ret);
1224 goto bail; 1231 goto bail;
1225 } 1232 }
1226 1233
1227 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1); 1234 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
1228 if (ret) { 1235 if (ret) {
1229 mlog_errno(ret); 1236 mlog_errno(ret);
1230 goto bail; 1237 goto bail;
@@ -1413,6 +1420,16 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1413 1420
1414 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; 1421 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1415 1422
1423 /*
1424 * Invalidate the LVB of a deleted inode - this way other
1425 * nodes are forced to go to disk and discover the new inode
1426 * status.
1427 */
1428 if (oi->ip_flags & OCFS2_INODE_DELETED) {
1429 lvb->lvb_version = 0;
1430 goto out;
1431 }
1432
1416 lvb->lvb_version = OCFS2_LVB_VERSION; 1433 lvb->lvb_version = OCFS2_LVB_VERSION;
1417 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 1434 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
1418 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 1435 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
@@ -1429,6 +1446,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1429 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 1446 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1430 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1447 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
1431 1448
1449out:
1432 mlog_meta_lvb(0, lockres); 1450 mlog_meta_lvb(0, lockres);
1433 1451
1434 mlog_exit_void(); 1452 mlog_exit_void();
@@ -1727,6 +1745,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
1727 wait_event(osb->recovery_event, 1745 wait_event(osb->recovery_event,
1728 ocfs2_node_map_is_empty(osb, &osb->recovery_map)); 1746 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1729 1747
1748 /*
1749 * We only see this flag if we're being called from
1750 * ocfs2_read_locked_inode(). It means we're locking an inode
1751 * which hasn't been populated yet, so clear the refresh flag
1752 * and let the caller handle it.
1753 */
1754 if (inode->i_state & I_NEW) {
1755 status = 0;
1756 ocfs2_complete_lock_res_refresh(lockres, 0);
1757 goto bail;
1758 }
1759
1730 /* This is fun. The caller may want a bh back, or it may 1760 /* This is fun. The caller may want a bh back, or it may
1731 * not. ocfs2_meta_lock_update definitely wants one in, but 1761 * not. ocfs2_meta_lock_update definitely wants one in, but
1732 * may or may not read one, depending on what's in the 1762 * may or may not read one, depending on what's in the
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 45a74f44b688..4a2769387229 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -32,9 +32,9 @@
32#define OCFS2_LVB_VERSION 4 32#define OCFS2_LVB_VERSION 4
33 33
34struct ocfs2_meta_lvb { 34struct ocfs2_meta_lvb {
35 __be16 lvb_reserved0;
36 __u8 lvb_reserved1;
37 __u8 lvb_version; 35 __u8 lvb_version;
36 __u8 lvb_reserved0;
37 __be16 lvb_reserved1;
38 __be32 lvb_iclusters; 38 __be32 lvb_iclusters;
39 __be32 lvb_iuid; 39 __be32 lvb_iuid;
40 __be32 lvb_igid; 40 __be32 lvb_igid;
@@ -62,13 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
62void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 62void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
63void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, 63void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
64 enum ocfs2_lock_type type, 64 enum ocfs2_lock_type type,
65 unsigned int generation,
65 struct inode *inode); 66 struct inode *inode);
66void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, 67void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
67 u64 parent, struct inode *inode); 68 u64 parent, struct inode *inode);
68void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 69void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
69int ocfs2_create_new_inode_locks(struct inode *inode); 70int ocfs2_create_new_inode_locks(struct inode *inode);
70int ocfs2_create_new_lock(struct ocfs2_super *osb, 71int ocfs2_create_new_lock(struct ocfs2_super *osb,
71 struct ocfs2_lock_res *lockres, int ex); 72 struct ocfs2_lock_res *lockres, int ex, int local);
72int ocfs2_drop_inode_locks(struct inode *inode); 73int ocfs2_drop_inode_locks(struct inode *inode);
73int ocfs2_data_lock_full(struct inode *inode, 74int ocfs2_data_lock_full(struct inode *inode,
74 int write, 75 int write,
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index ffcd79749e0d..fb91089a60a7 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
58 return ERR_PTR(-ESTALE); 58 return ERR_PTR(-ESTALE);
59 } 59 }
60 60
61 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno); 61 inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
62 62
63 if (IS_ERR(inode)) { 63 if (IS_ERR(inode)) {
64 mlog_errno(PTR_ERR(inode)); 64 mlog_errno(PTR_ERR(inode));
@@ -115,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
115 goto bail_unlock; 115 goto bail_unlock;
116 } 116 }
117 117
118 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 118 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
119 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
120 mlog(ML_ERROR, "Unable to create inode %llu\n", 120 mlog(ML_ERROR, "Unable to create inode %llu\n",
121 (unsigned long long)blkno); 121 (unsigned long long)blkno);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 66ca7a82b68a..69d3db569166 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
54 54
55#include "buffer_head_io.h" 55#include "buffer_head_io.h"
56 56
57#define OCFS2_FI_FLAG_NOWAIT 0x1
58#define OCFS2_FI_FLAG_DELETE 0x2
59struct ocfs2_find_inode_args 57struct ocfs2_find_inode_args
60{ 58{
61 u64 fi_blkno; 59 u64 fi_blkno;
@@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
109 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args); 107 return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
110} 108}
111 109
112struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno) 110struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
113{ 111{
114 struct inode *inode = NULL; 112 struct inode *inode = NULL;
115 struct super_block *sb = osb->sb; 113 struct super_block *sb = osb->sb;
@@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
127 } 125 }
128 126
129 args.fi_blkno = blkno; 127 args.fi_blkno = blkno;
130 args.fi_flags = 0; 128 args.fi_flags = flags;
131 args.fi_ino = ino_from_blkno(sb, blkno); 129 args.fi_ino = ino_from_blkno(sb, blkno);
132 130
133 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor, 131 inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
@@ -297,15 +295,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
297 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; 295 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
298 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 296 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
299 297
300 if (create_ino)
301 inode->i_ino = ino_from_blkno(inode->i_sb,
302 le64_to_cpu(fe->i_blkno));
303
304 mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
305 (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
306
307 inode->i_nlink = le16_to_cpu(fe->i_links_count); 298 inode->i_nlink = le16_to_cpu(fe->i_links_count);
308 299
300 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
301 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
302
309 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 303 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
310 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 304 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
311 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); 305 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
@@ -343,12 +337,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
343 break; 337 break;
344 } 338 }
345 339
340 if (create_ino) {
341 inode->i_ino = ino_from_blkno(inode->i_sb,
342 le64_to_cpu(fe->i_blkno));
343
344 /*
345 * If we ever want to create system files from kernel,
346 * the generation argument to
347 * ocfs2_inode_lock_res_init() will have to change.
348 */
349 BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
350
351 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
352 OCFS2_LOCK_TYPE_META, 0, inode);
353 }
354
346 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 355 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
347 OCFS2_LOCK_TYPE_RW, inode); 356 OCFS2_LOCK_TYPE_RW, inode->i_generation,
348 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 357 inode);
349 OCFS2_LOCK_TYPE_META, inode); 358
350 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres, 359 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
351 OCFS2_LOCK_TYPE_DATA, inode); 360 OCFS2_LOCK_TYPE_DATA, inode->i_generation,
361 inode);
352 362
353 ocfs2_set_inode_flags(inode); 363 ocfs2_set_inode_flags(inode);
354 inode->i_flags |= S_NOATIME; 364 inode->i_flags |= S_NOATIME;
@@ -366,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
366 struct ocfs2_super *osb; 376 struct ocfs2_super *osb;
367 struct ocfs2_dinode *fe; 377 struct ocfs2_dinode *fe;
368 struct buffer_head *bh = NULL; 378 struct buffer_head *bh = NULL;
369 int status; 379 int status, can_lock;
370 int sysfile = 0; 380 u32 generation = 0;
371 381
372 mlog_entry("(0x%p, 0x%p)\n", inode, args); 382 mlog_entry("(0x%p, 0x%p)\n", inode, args);
373 383
374 status = -EINVAL; 384 status = -EINVAL;
375 if (inode == NULL || inode->i_sb == NULL) { 385 if (inode == NULL || inode->i_sb == NULL) {
376 mlog(ML_ERROR, "bad inode\n"); 386 mlog(ML_ERROR, "bad inode\n");
377 goto bail; 387 return status;
378 } 388 }
379 sb = inode->i_sb; 389 sb = inode->i_sb;
380 osb = OCFS2_SB(sb); 390 osb = OCFS2_SB(sb);
@@ -382,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
382 if (!args) { 392 if (!args) {
383 mlog(ML_ERROR, "bad inode args\n"); 393 mlog(ML_ERROR, "bad inode args\n");
384 make_bad_inode(inode); 394 make_bad_inode(inode);
385 goto bail; 395 return status;
386 } 396 }
387 397
388 /* Read the FE off disk. This is safe because the kernel only 398 /*
389 * does one read_inode2 for a new inode, and if it doesn't 399 * To improve performance of cold-cache inode stats, we take
390 * exist yet then nobody can be working on it! */ 400 * the cluster lock here if possible.
391 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL); 401 *
402 * Generally, OCFS2 never trusts the contents of an inode
403 * unless it's holding a cluster lock, so taking it here isn't
404 * a correctness issue as much as it is a performance
405 * improvement.
406 *
407 * There are three times when taking the lock is not a good idea:
408 *
409 * 1) During startup, before we have initialized the DLM.
410 *
411 * 2) If we are reading certain system files which never get
412 * cluster locks (local alloc, truncate log).
413 *
414 * 3) If the process doing the iget() is responsible for
415 * orphan dir recovery. We're holding the orphan dir lock and
416 * can get into a deadlock with another process on another
417 * node in ->delete_inode().
418 *
419 * #1 and #2 can be simply solved by never taking the lock
420 * here for system files (which are the only type we read
421 * during mount). It's a heavier approach, but our main
422 * concern is user-accesible files anyway.
423 *
424 * #3 works itself out because we'll eventually take the
425 * cluster lock before trusting anything anyway.
426 */
427 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
428 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
429
430 /*
431 * To maintain backwards compatibility with older versions of
432 * ocfs2-tools, we still store the generation value for system
433 * files. The only ones that actually matter to userspace are
434 * the journals, but it's easier and inexpensive to just flag
435 * all system files similarly.
436 */
437 if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
438 generation = osb->fs_generation;
439
440 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
441 OCFS2_LOCK_TYPE_META,
442 generation, inode);
443
444 if (can_lock) {
445 status = ocfs2_meta_lock(inode, NULL, NULL, 0);
446 if (status) {
447 make_bad_inode(inode);
448 mlog_errno(status);
449 return status;
450 }
451 }
452
453 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
454 can_lock ? inode : NULL);
392 if (status < 0) { 455 if (status < 0) {
393 mlog_errno(status); 456 mlog_errno(status);
394 make_bad_inode(inode);
395 goto bail; 457 goto bail;
396 } 458 }
397 459
460 status = -EINVAL;
398 fe = (struct ocfs2_dinode *) bh->b_data; 461 fe = (struct ocfs2_dinode *) bh->b_data;
399 if (!OCFS2_IS_VALID_DINODE(fe)) { 462 if (!OCFS2_IS_VALID_DINODE(fe)) {
400 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", 463 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
401 (unsigned long long)fe->i_blkno, 7, fe->i_signature); 464 (unsigned long long)fe->i_blkno, 7, fe->i_signature);
402 make_bad_inode(inode);
403 goto bail; 465 goto bail;
404 } 466 }
405 467
406 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 468 /*
407 sysfile = 1; 469 * This is a code bug. Right now the caller needs to
470 * understand whether it is asking for a system file inode or
471 * not so the proper lock names can be built.
472 */
473 mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
474 !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
475 "Inode %llu: system file state is ambigous\n",
476 (unsigned long long)args->fi_blkno);
408 477
409 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 478 if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
410 S_ISBLK(le16_to_cpu(fe->i_mode))) 479 S_ISBLK(le16_to_cpu(fe->i_mode)))
411 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 480 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
412 481
413 status = -EINVAL;
414 if (ocfs2_populate_inode(inode, fe, 0) < 0) { 482 if (ocfs2_populate_inode(inode, fe, 0) < 0) {
415 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n", 483 mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
416 (unsigned long long)fe->i_blkno, inode->i_ino); 484 (unsigned long long)fe->i_blkno, inode->i_ino);
417 make_bad_inode(inode);
418 goto bail; 485 goto bail;
419 } 486 }
420 487
421 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 488 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
422 489
423 if (sysfile)
424 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
425
426 status = 0; 490 status = 0;
427 491
428bail: 492bail:
493 if (can_lock)
494 ocfs2_meta_unlock(inode, 0);
495
496 if (status < 0)
497 make_bad_inode(inode);
498
429 if (args && bh) 499 if (args && bh)
430 brelse(bh); 500 brelse(bh);
431 501
@@ -898,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode)
898 goto bail_unlock_inode; 968 goto bail_unlock_inode;
899 } 969 }
900 970
901 /* Mark the inode as successfully deleted. This is important 971 /*
902 * for ocfs2_clear_inode as it will check this flag and skip 972 * Mark the inode as successfully deleted.
903 * any checkpointing work */ 973 *
974 * This is important for ocfs2_clear_inode() as it will check
975 * this flag and skip any checkpointing work
976 *
977 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
978 * the LVB for other nodes.
979 */
904 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; 980 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
905 981
906bail_unlock_inode: 982bail_unlock_inode:
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 4d1e53992566..9957810fdf85 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block,
122void ocfs2_clear_inode(struct inode *inode); 122void ocfs2_clear_inode(struct inode *inode);
123void ocfs2_delete_inode(struct inode *inode); 123void ocfs2_delete_inode(struct inode *inode);
124void ocfs2_drop_inode(struct inode *inode); 124void ocfs2_drop_inode(struct inode *inode);
125struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff); 125
126/* Flags for ocfs2_iget() */
127#define OCFS2_FI_FLAG_NOWAIT 0x1
128#define OCFS2_FI_FLAG_DELETE 0x2
129#define OCFS2_FI_FLAG_SYSFILE 0x4
130#define OCFS2_FI_FLAG_NOLOCK 0x8
131struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
126struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 132struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
127 u64 blkno, 133 u64 blkno,
128 int delete_vote); 134 int delete_vote);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f92bf1dd379a..fd9734def551 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1493 if (de->name_len == 2 && !strncmp("..", de->name, 2)) 1493 if (de->name_len == 2 && !strncmp("..", de->name, 2))
1494 continue; 1494 continue;
1495 1495
1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode)); 1496 iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
1497 OCFS2_FI_FLAG_NOLOCK);
1497 if (IS_ERR(iter)) 1498 if (IS_ERR(iter))
1498 continue; 1499 continue;
1499 1500
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 6fa978874c33..849c3b4bb94a 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
179 if (status < 0) 179 if (status < 0)
180 goto bail_add; 180 goto bail_add;
181 181
182 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno); 182 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
183 if (IS_ERR(inode)) { 183 if (IS_ERR(inode)) {
184 mlog(ML_ERROR, "Unable to create inode %llu\n", 184 mlog(ML_ERROR, "Unable to create inode %llu\n",
185 (unsigned long long)blkno); 185 (unsigned long long)blkno);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 33a6de6fc612..4c29cd7cc8e6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
202 202
203 mlog_entry_void(); 203 mlog_entry_void();
204 204
205 new = ocfs2_iget(osb, osb->root_blkno); 205 new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
206 if (IS_ERR(new)) { 206 if (IS_ERR(new)) {
207 status = PTR_ERR(new); 207 status = PTR_ERR(new);
208 mlog_errno(status); 208 mlog_errno(status);
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
210 } 210 }
211 osb->root_inode = new; 211 osb->root_inode = new;
212 212
213 new = ocfs2_iget(osb, osb->system_dir_blkno); 213 new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
214 if (IS_ERR(new)) { 214 if (IS_ERR(new)) {
215 status = PTR_ERR(new); 215 status = PTR_ERR(new);
216 mlog_errno(status); 216 mlog_errno(status);
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 98435002ac44..5df6e35d09b1 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
115 goto bail; 115 goto bail;
116 } 116 }
117 117
118 inode = ocfs2_iget(osb, blkno); 118 inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
119 if (IS_ERR(inode)) { 119 if (IS_ERR(inode)) {
120 mlog_errno(PTR_ERR(inode)); 120 mlog_errno(PTR_ERR(inode));
121 inode = NULL; 121 inode = NULL;