aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTiger Yang <tiger.yang@oracle.com>2007-03-20 19:01:38 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-04-26 17:39:48 -0400
commit500086300e6dc5308a7328990bd50d17e075162b (patch)
tree4083cda09445c260c2cc2ac1d0f68c05ad2b958e
parenta9f5f70739363ccca2e771c274c4f015c5fb7a88 (diff)
ocfs2: Remove delete inode vote
Ocfs2 currently does cluster-wide node messaging to check the open state of an inode during delete. This patch removes that mechanism in favor of an inode cluster lock which is taken at shared read when an inode is first read and dropped in clear_inode(). This allows a deleting node to test the liveness of an inode by attempting to take an exclusive lock. Signed-off-by: Tiger Yang <tiger.yang@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlmglue.c119
-rw-r--r--fs/ocfs2/dlmglue.h3
-rw-r--r--fs/ocfs2/inode.c93
-rw-r--r--fs/ocfs2/inode.h5
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/namei.c5
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/super.c1
10 files changed, 205 insertions, 38 deletions
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 4dae5df5e467..9606111fe89d 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,9 @@
38 * locking semantics of the file system using the protocol. It should 38 * locking semantics of the file system using the protocol. It should
39 * be somewhere else, I'm sure, but right now it isn't. 39 * be somewhere else, I'm sure, but right now it isn't.
40 * 40 *
41 * New in version 8:
42 * - Replace delete inode votes with a cluster lock
43 *
41 * New in version 7: 44 * New in version 7:
42 * - DLM join domain includes the live nodemap 45 * - DLM join domain includes the live nodemap
43 * 46 *
@@ -57,7 +60,7 @@
57 * - full 64 bit i_size in the metadata lock lvbs 60 * - full 64 bit i_size in the metadata lock lvbs
58 * - introduction of "rw" lock and pushing meta/data locking down 61 * - introduction of "rw" lock and pushing meta/data locking down
59 */ 62 */
60#define O2NET_PROTOCOL_VERSION 7ULL 63#define O2NET_PROTOCOL_VERSION 8ULL
61struct o2net_handshake { 64struct o2net_handshake {
62 __be64 protocol_version; 65 __be64 protocol_version;
63 __be64 connector_id; 66 __be64 connector_id;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 31d519a6dbd2..ca4f0e0e7587 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -225,11 +225,17 @@ static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
225 .flags = 0, 225 .flags = 0,
226}; 226};
227 227
228static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
229 .get_osb = ocfs2_get_inode_osb,
230 .flags = 0,
231};
232
228static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 233static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
229{ 234{
230 return lockres->l_type == OCFS2_LOCK_TYPE_META || 235 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
231 lockres->l_type == OCFS2_LOCK_TYPE_DATA || 236 lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
232 lockres->l_type == OCFS2_LOCK_TYPE_RW; 237 lockres->l_type == OCFS2_LOCK_TYPE_RW ||
238 lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
233} 239}
234 240
235static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres) 241static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
@@ -373,6 +379,9 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
373 case OCFS2_LOCK_TYPE_DATA: 379 case OCFS2_LOCK_TYPE_DATA:
374 ops = &ocfs2_inode_data_lops; 380 ops = &ocfs2_inode_data_lops;
375 break; 381 break;
382 case OCFS2_LOCK_TYPE_OPEN:
383 ops = &ocfs2_inode_open_lops;
384 break;
376 default: 385 default:
377 mlog_bug_on_msg(1, "type: %d\n", type); 386 mlog_bug_on_msg(1, "type: %d\n", type);
378 ops = NULL; /* thanks, gcc */ 387 ops = NULL; /* thanks, gcc */
@@ -1129,6 +1138,12 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
1129 goto bail; 1138 goto bail;
1130 } 1139 }
1131 1140
1141 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
1142 if (ret) {
1143 mlog_errno(ret);
1144 goto bail;
1145 }
1146
1132bail: 1147bail:
1133 mlog_exit(ret); 1148 mlog_exit(ret);
1134 return ret; 1149 return ret;
@@ -1182,6 +1197,99 @@ void ocfs2_rw_unlock(struct inode *inode, int write)
1182 mlog_exit_void(); 1197 mlog_exit_void();
1183} 1198}
1184 1199
1200/*
1201 * ocfs2_open_lock always get PR mode lock.
1202 */
1203int ocfs2_open_lock(struct inode *inode)
1204{
1205 int status = 0;
1206 struct ocfs2_lock_res *lockres;
1207 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1208
1209 BUG_ON(!inode);
1210
1211 mlog_entry_void();
1212
1213 mlog(0, "inode %llu take PRMODE open lock\n",
1214 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1215
1216 if (ocfs2_mount_local(osb))
1217 goto out;
1218
1219 lockres = &OCFS2_I(inode)->ip_open_lockres;
1220
1221 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1222 LKM_PRMODE, 0, 0);
1223 if (status < 0)
1224 mlog_errno(status);
1225
1226out:
1227 mlog_exit(status);
1228 return status;
1229}
1230
1231int ocfs2_try_open_lock(struct inode *inode, int write)
1232{
1233 int status = 0, level;
1234 struct ocfs2_lock_res *lockres;
1235 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1236
1237 BUG_ON(!inode);
1238
1239 mlog_entry_void();
1240
1241 mlog(0, "inode %llu try to take %s open lock\n",
1242 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1243 write ? "EXMODE" : "PRMODE");
1244
1245 if (ocfs2_mount_local(osb))
1246 goto out;
1247
1248 lockres = &OCFS2_I(inode)->ip_open_lockres;
1249
1250 level = write ? LKM_EXMODE : LKM_PRMODE;
1251
1252 /*
1253 * The file system may already holding a PRMODE/EXMODE open lock.
1254 * Since we pass LKM_NOQUEUE, the request won't block waiting on
1255 * other nodes and the -EAGAIN will indicate to the caller that
1256 * this inode is still in use.
1257 */
1258 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres,
1259 level, LKM_NOQUEUE, 0);
1260
1261out:
1262 mlog_exit(status);
1263 return status;
1264}
1265
1266/*
1267 * ocfs2_open_unlock unlock PR and EX mode open locks.
1268 */
1269void ocfs2_open_unlock(struct inode *inode)
1270{
1271 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_open_lockres;
1272 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1273
1274 mlog_entry_void();
1275
1276 mlog(0, "inode %llu drop open lock\n",
1277 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1278
1279 if (ocfs2_mount_local(osb))
1280 goto out;
1281
1282 if(lockres->l_ro_holders)
1283 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1284 LKM_PRMODE);
1285 if(lockres->l_ex_holders)
1286 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres,
1287 LKM_EXMODE);
1288
1289out:
1290 mlog_exit_void();
1291}
1292
1185int ocfs2_data_lock_full(struct inode *inode, 1293int ocfs2_data_lock_full(struct inode *inode,
1186 int write, 1294 int write,
1187 int arg_flags) 1295 int arg_flags)
@@ -2455,13 +2563,20 @@ int ocfs2_drop_inode_locks(struct inode *inode)
2455 * ocfs2_clear_inode has done it for us. */ 2563 * ocfs2_clear_inode has done it for us. */
2456 2564
2457 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2565 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2458 &OCFS2_I(inode)->ip_data_lockres); 2566 &OCFS2_I(inode)->ip_open_lockres);
2459 if (err < 0) 2567 if (err < 0)
2460 mlog_errno(err); 2568 mlog_errno(err);
2461 2569
2462 status = err; 2570 status = err;
2463 2571
2464 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), 2572 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2573 &OCFS2_I(inode)->ip_data_lockres);
2574 if (err < 0)
2575 mlog_errno(err);
2576 if (err < 0 && !status)
2577 status = err;
2578
2579 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2465 &OCFS2_I(inode)->ip_meta_lockres); 2580 &OCFS2_I(inode)->ip_meta_lockres);
2466 if (err < 0) 2581 if (err < 0)
2467 mlog_errno(err); 2582 mlog_errno(err);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index c343fca68cf1..59cb566e7983 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -80,6 +80,9 @@ void ocfs2_data_unlock(struct inode *inode,
80 int write); 80 int write);
81int ocfs2_rw_lock(struct inode *inode, int write); 81int ocfs2_rw_lock(struct inode *inode, int write);
82void ocfs2_rw_unlock(struct inode *inode, int write); 82void ocfs2_rw_unlock(struct inode *inode, int write);
83int ocfs2_open_lock(struct inode *inode);
84int ocfs2_try_open_lock(struct inode *inode, int write);
85void ocfs2_open_unlock(struct inode *inode);
83int ocfs2_meta_lock_atime(struct inode *inode, 86int ocfs2_meta_lock_atime(struct inode *inode,
84 struct vfsmount *vfsmnt, 87 struct vfsmount *vfsmnt,
85 int *level); 88 int *level);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 28ab56f2b98c..10d16a9e4fda 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -289,7 +289,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
289 (unsigned long long)fe->i_blkno); 289 (unsigned long long)fe->i_blkno);
290 290
291 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 291 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
292 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
293 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 292 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
294 293
295 inode->i_nlink = le16_to_cpu(fe->i_links_count); 294 inode->i_nlink = le16_to_cpu(fe->i_links_count);
@@ -347,6 +346,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
347 346
348 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 347 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
349 OCFS2_LOCK_TYPE_META, 0, inode); 348 OCFS2_LOCK_TYPE_META, 0, inode);
349
350 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
351 OCFS2_LOCK_TYPE_OPEN, 0, inode);
350 } 352 }
351 353
352 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 354 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
@@ -421,7 +423,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
421 * cluster lock before trusting anything anyway. 423 * cluster lock before trusting anything anyway.
422 */ 424 */
423 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) 425 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
424 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK) 426 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
425 && !ocfs2_mount_local(osb); 427 && !ocfs2_mount_local(osb);
426 428
427 /* 429 /*
@@ -438,7 +440,17 @@ static int ocfs2_read_locked_inode(struct inode *inode,
438 OCFS2_LOCK_TYPE_META, 440 OCFS2_LOCK_TYPE_META,
439 generation, inode); 441 generation, inode);
440 442
443 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
444 OCFS2_LOCK_TYPE_OPEN,
445 0, inode);
446
441 if (can_lock) { 447 if (can_lock) {
448 status = ocfs2_open_lock(inode);
449 if (status) {
450 make_bad_inode(inode);
451 mlog_errno(status);
452 return status;
453 }
442 status = ocfs2_meta_lock(inode, NULL, 0); 454 status = ocfs2_meta_lock(inode, NULL, 0);
443 if (status) { 455 if (status) {
444 make_bad_inode(inode); 456 make_bad_inode(inode);
@@ -447,6 +459,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
447 } 459 }
448 } 460 }
449 461
462 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
463 status = ocfs2_try_open_lock(inode, 0);
464 if (status) {
465 make_bad_inode(inode);
466 return status;
467 }
468 }
469
450 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, 470 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
451 can_lock ? inode : NULL); 471 can_lock ? inode : NULL);
452 if (status < 0) { 472 if (status < 0) {
@@ -678,10 +698,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
678 struct inode *orphan_dir_inode = NULL; 698 struct inode *orphan_dir_inode = NULL;
679 struct buffer_head *orphan_dir_bh = NULL; 699 struct buffer_head *orphan_dir_bh = NULL;
680 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 700 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
701 struct ocfs2_dinode *di;
681 702
682 /* We've already voted on this so it should be readonly - no 703 di = (struct ocfs2_dinode *) di_bh->b_data;
683 * spinlock needed. */ 704 orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
684 orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
685 705
686 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 706 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
687 if (status) 707 if (status)
@@ -787,6 +807,35 @@ bail:
787 return ret; 807 return ret;
788} 808}
789 809
810static int ocfs2_request_delete(struct inode *inode)
811{
812 int status = 0;
813 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
814
815 if (ocfs2_inode_is_new(inode))
816 return 0;
817
818 if (ocfs2_node_map_is_only(osb, &osb->mounted_map,
819 osb->node_num))
820 return 0;
821 /*
822 * This is how ocfs2 determines whether an inode is still live
823 * within the cluster. Every node takes a shared read lock on
824 * the inode open lock in ocfs2_read_locked_inode(). When we
825 * get to ->delete_inode(), each node tries to convert it's
826 * lock to an exclusive. Trylocks are serialized by the inode
827 * meta data lock. If the upconvert suceeds, we know the inode
828 * is no longer live and can be deleted.
829 *
830 * Though we call this with the meta data lock held, the
831 * trylock keeps us from ABBA deadlock.
832 */
833 status = ocfs2_try_open_lock(inode, 1);
834 if (status < 0 && status != -EAGAIN)
835 mlog_errno(status);
836 return status;
837}
838
790/* Query the cluster to determine whether we should wipe an inode from 839/* Query the cluster to determine whether we should wipe an inode from
791 * disk or not. 840 * disk or not.
792 * 841 *
@@ -839,11 +888,11 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
839 goto bail; 888 goto bail;
840 } 889 }
841 890
842 status = ocfs2_request_delete_vote(inode); 891 status = ocfs2_request_delete(inode);
843 /* -EBUSY means that other nodes are still using the 892 /* -EAGAIN means that other nodes are still using the
844 * inode. We're done here though, so avoid doing anything on 893 * inode. We're done here though, so avoid doing anything on
845 * disk and let them worry about deleting it. */ 894 * disk and let them worry about deleting it. */
846 if (status == -EBUSY) { 895 if (status == -EAGAIN) {
847 status = 0; 896 status = 0;
848 mlog(0, "Skipping delete of %llu because it is in use on" 897 mlog(0, "Skipping delete of %llu because it is in use on"
849 "other nodes\n", (unsigned long long)oi->ip_blkno); 898 "other nodes\n", (unsigned long long)oi->ip_blkno);
@@ -854,21 +903,10 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
854 goto bail; 903 goto bail;
855 } 904 }
856 905
857 spin_lock(&oi->ip_lock); 906 *wipe = 1;
858 if (oi->ip_orphaned_slot == OCFS2_INVALID_SLOT) { 907 mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n",
859 /* Nobody knew which slot this inode was orphaned 908 (unsigned long long)oi->ip_blkno,
860 * into. This may happen during node death and 909 le16_to_cpu(di->i_orphaned_slot));
861 * recovery knows how to clean it up so we can safely
862 * ignore this inode for now on. */
863 mlog(0, "Nobody knew where inode %llu was orphaned!\n",
864 (unsigned long long)oi->ip_blkno);
865 } else {
866 *wipe = 1;
867
868 mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
869 (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
870 }
871 spin_unlock(&oi->ip_lock);
872 910
873bail: 911bail:
874 return status; 912 return status;
@@ -1001,11 +1039,16 @@ void ocfs2_clear_inode(struct inode *inode)
1001 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, 1039 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
1002 "Inode=%lu\n", inode->i_ino); 1040 "Inode=%lu\n", inode->i_ino);
1003 1041
1042 /* For remove delete_inode vote, we hold open lock before,
1043 * now it is time to unlock PR and EX open locks. */
1044 ocfs2_open_unlock(inode);
1045
1004 /* Do these before all the other work so that we don't bounce 1046 /* Do these before all the other work so that we don't bounce
1005 * the vote thread while waiting to destroy the locks. */ 1047 * the vote thread while waiting to destroy the locks. */
1006 ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); 1048 ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
1007 ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); 1049 ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
1008 ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); 1050 ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
1051 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
1009 1052
1010 /* We very well may get a clear_inode before all an inodes 1053 /* We very well may get a clear_inode before all an inodes
1011 * metadata has hit disk. Of course, we can't drop any cluster 1054 * metadata has hit disk. Of course, we can't drop any cluster
@@ -1030,6 +1073,7 @@ void ocfs2_clear_inode(struct inode *inode)
1030 ocfs2_lock_res_free(&oi->ip_rw_lockres); 1073 ocfs2_lock_res_free(&oi->ip_rw_lockres);
1031 ocfs2_lock_res_free(&oi->ip_meta_lockres); 1074 ocfs2_lock_res_free(&oi->ip_meta_lockres);
1032 ocfs2_lock_res_free(&oi->ip_data_lockres); 1075 ocfs2_lock_res_free(&oi->ip_data_lockres);
1076 ocfs2_lock_res_free(&oi->ip_open_lockres);
1033 1077
1034 ocfs2_metadata_cache_purge(inode); 1078 ocfs2_metadata_cache_purge(inode);
1035 1079
@@ -1086,9 +1130,6 @@ void ocfs2_drop_inode(struct inode *inode)
1086 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n", 1130 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
1087 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); 1131 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
1088 1132
1089 /* Testing ip_orphaned_slot here wouldn't work because we may
1090 * not have gotten a delete_inode vote from any other nodes
1091 * yet. */
1092 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) 1133 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1093 generic_delete_inode(inode); 1134 generic_delete_inode(inode);
1094 else 1135 else
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 1a7dd2945b34..92d4feb34d74 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -34,6 +34,7 @@ struct ocfs2_inode_info
34 struct ocfs2_lock_res ip_rw_lockres; 34 struct ocfs2_lock_res ip_rw_lockres;
35 struct ocfs2_lock_res ip_meta_lockres; 35 struct ocfs2_lock_res ip_meta_lockres;
36 struct ocfs2_lock_res ip_data_lockres; 36 struct ocfs2_lock_res ip_data_lockres;
37 struct ocfs2_lock_res ip_open_lockres;
37 38
38 /* protects allocation changes on this inode. */ 39 /* protects allocation changes on this inode. */
39 struct rw_semaphore ip_alloc_sem; 40 struct rw_semaphore ip_alloc_sem;
@@ -119,8 +120,8 @@ void ocfs2_drop_inode(struct inode *inode);
119/* Flags for ocfs2_iget() */ 120/* Flags for ocfs2_iget() */
120#define OCFS2_FI_FLAG_NOWAIT 0x1 121#define OCFS2_FI_FLAG_NOWAIT 0x1
121#define OCFS2_FI_FLAG_DELETE 0x2 122#define OCFS2_FI_FLAG_DELETE 0x2
122#define OCFS2_FI_FLAG_SYSFILE 0x4 123#define OCFS2_FI_FLAG_SYSFILE 0x4
123#define OCFS2_FI_FLAG_NOLOCK 0x8 124#define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x8
124struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags); 125struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
125struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb, 126struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
126 u64 blkno, 127 u64 blkno,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 825cb0ae1b4c..12445a31f733 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1306,7 +1306,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1306 continue; 1306 continue;
1307 1307
1308 iter = ocfs2_iget(osb, le64_to_cpu(de->inode), 1308 iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
1309 OCFS2_FI_FLAG_NOLOCK); 1309 OCFS2_FI_FLAG_ORPHAN_RECOVERY);
1310 if (IS_ERR(iter)) 1310 if (IS_ERR(iter))
1311 continue; 1311 continue;
1312 1312
@@ -1418,7 +1418,6 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
1418 /* Set the proper information to get us going into 1418 /* Set the proper information to get us going into
1419 * ocfs2_delete_inode. */ 1419 * ocfs2_delete_inode. */
1420 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; 1420 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
1421 oi->ip_orphaned_slot = slot;
1422 spin_unlock(&oi->ip_lock); 1421 spin_unlock(&oi->ip_lock);
1423 1422
1424 iput(inode); 1423 iput(inode);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 1fff0c02d98b..a93c15fdcef3 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -187,7 +187,6 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
187 * unlink. */ 187 * unlink. */
188 spin_lock(&oi->ip_lock); 188 spin_lock(&oi->ip_lock);
189 oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED; 189 oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
190 oi->ip_orphaned_slot = OCFS2_INVALID_SLOT;
191 spin_unlock(&oi->ip_lock); 190 spin_unlock(&oi->ip_lock);
192 191
193bail_add: 192bail_add:
@@ -2220,9 +2219,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2220 /* Record which orphan dir our inode now resides 2219 /* Record which orphan dir our inode now resides
2221 * in. delete_inode will use this to determine which orphan 2220 * in. delete_inode will use this to determine which orphan
2222 * dir to lock. */ 2221 * dir to lock. */
2223 spin_lock(&OCFS2_I(inode)->ip_lock); 2222 fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
2224 OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
2225 spin_unlock(&OCFS2_I(inode)->ip_lock);
2226 2223
2227 mlog(0, "Inode %llu orphaned in slot %d\n", 2224 mlog(0, "Inode %llu orphaned in slot %d\n",
2228 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); 2225 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e61e218f5e0b..a476b63e2e60 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -446,7 +446,9 @@ struct ocfs2_dinode {
446 __le32 i_ctime_nsec; 446 __le32 i_ctime_nsec;
447 __le32 i_mtime_nsec; 447 __le32 i_mtime_nsec;
448 __le32 i_attr; 448 __le32 i_attr;
449 __le32 i_reserved1; 449 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
450 was set in i_flags */
451 __le16 i_reserved1;
450/*70*/ __le64 i_reserved2[8]; 452/*70*/ __le64 i_reserved2[8];
451/*B8*/ union { 453/*B8*/ union {
452 __le64 i_pad1; /* Generic way to refer to this 454 __le64 i_pad1; /* Generic way to refer to this
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 4d5d5655c185..4ca02b1c38ac 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -44,6 +44,7 @@ enum ocfs2_lock_type {
44 OCFS2_LOCK_TYPE_RENAME, 44 OCFS2_LOCK_TYPE_RENAME,
45 OCFS2_LOCK_TYPE_RW, 45 OCFS2_LOCK_TYPE_RW,
46 OCFS2_LOCK_TYPE_DENTRY, 46 OCFS2_LOCK_TYPE_DENTRY,
47 OCFS2_LOCK_TYPE_OPEN,
47 OCFS2_NUM_LOCK_TYPES 48 OCFS2_NUM_LOCK_TYPES
48}; 49};
49 50
@@ -69,6 +70,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
69 case OCFS2_LOCK_TYPE_DENTRY: 70 case OCFS2_LOCK_TYPE_DENTRY:
70 c = 'N'; 71 c = 'N';
71 break; 72 break;
73 case OCFS2_LOCK_TYPE_OPEN:
74 c = 'O';
75 break;
72 default: 76 default:
73 c = '\0'; 77 c = '\0';
74 } 78 }
@@ -85,6 +89,7 @@ static char *ocfs2_lock_type_strings[] = {
85 * important job it does, anyway. */ 89 * important job it does, anyway. */
86 [OCFS2_LOCK_TYPE_RW] = "Write/Read", 90 [OCFS2_LOCK_TYPE_RW] = "Write/Read",
87 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", 91 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
92 [OCFS2_LOCK_TYPE_OPEN] = "Open",
88}; 93};
89 94
90static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 95static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 6534f92424dd..16564ea6c141 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -963,6 +963,7 @@ static void ocfs2_inode_init_once(void *data,
963 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 963 ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
964 ocfs2_lock_res_init_once(&oi->ip_meta_lockres); 964 ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
965 ocfs2_lock_res_init_once(&oi->ip_data_lockres); 965 ocfs2_lock_res_init_once(&oi->ip_data_lockres);
966 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
966 967
967 ocfs2_metadata_cache_init(&oi->vfs_inode); 968 ocfs2_metadata_cache_init(&oi->vfs_inode);
968 969