aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/inode.c
diff options
context:
space:
mode:
authorTiger Yang <tiger.yang@oracle.com>2007-03-20 19:01:38 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-04-26 17:39:48 -0400
commit500086300e6dc5308a7328990bd50d17e075162b (patch)
tree4083cda09445c260c2cc2ac1d0f68c05ad2b958e /fs/ocfs2/inode.c
parenta9f5f70739363ccca2e771c274c4f015c5fb7a88 (diff)
ocfs2: Remove delete inode vote
Ocfs2 currently does cluster-wide node messaging to check the open state of an inode during delete. This patch removes that mechanism in favor of an inode cluster lock which is taken at shared read when an inode is first read and dropped in clear_inode(). This allows a deleting node to test the liveness of an inode by attempting to take an exclusive lock. Signed-off-by: Tiger Yang <tiger.yang@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/inode.c')
-rw-r--r--fs/ocfs2/inode.c93
1 files changed, 67 insertions, 26 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 28ab56f2b98c..10d16a9e4fda 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -289,7 +289,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
289 (unsigned long long)fe->i_blkno); 289 (unsigned long long)fe->i_blkno);
290 290
291 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 291 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
292 OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
293 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 292 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
294 293
295 inode->i_nlink = le16_to_cpu(fe->i_links_count); 294 inode->i_nlink = le16_to_cpu(fe->i_links_count);
@@ -347,6 +346,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
347 346
348 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, 347 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
349 OCFS2_LOCK_TYPE_META, 0, inode); 348 OCFS2_LOCK_TYPE_META, 0, inode);
349
350 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
351 OCFS2_LOCK_TYPE_OPEN, 0, inode);
350 } 352 }
351 353
352 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, 354 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
@@ -421,7 +423,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
421 * cluster lock before trusting anything anyway. 423 * cluster lock before trusting anything anyway.
422 */ 424 */
423 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) 425 can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
424 && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK) 426 && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
425 && !ocfs2_mount_local(osb); 427 && !ocfs2_mount_local(osb);
426 428
427 /* 429 /*
@@ -438,7 +440,17 @@ static int ocfs2_read_locked_inode(struct inode *inode,
438 OCFS2_LOCK_TYPE_META, 440 OCFS2_LOCK_TYPE_META,
439 generation, inode); 441 generation, inode);
440 442
443 ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
444 OCFS2_LOCK_TYPE_OPEN,
445 0, inode);
446
441 if (can_lock) { 447 if (can_lock) {
448 status = ocfs2_open_lock(inode);
449 if (status) {
450 make_bad_inode(inode);
451 mlog_errno(status);
452 return status;
453 }
442 status = ocfs2_meta_lock(inode, NULL, 0); 454 status = ocfs2_meta_lock(inode, NULL, 0);
443 if (status) { 455 if (status) {
444 make_bad_inode(inode); 456 make_bad_inode(inode);
@@ -447,6 +459,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
447 } 459 }
448 } 460 }
449 461
462 if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
463 status = ocfs2_try_open_lock(inode, 0);
464 if (status) {
465 make_bad_inode(inode);
466 return status;
467 }
468 }
469
450 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, 470 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
451 can_lock ? inode : NULL); 471 can_lock ? inode : NULL);
452 if (status < 0) { 472 if (status < 0) {
@@ -678,10 +698,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
678 struct inode *orphan_dir_inode = NULL; 698 struct inode *orphan_dir_inode = NULL;
679 struct buffer_head *orphan_dir_bh = NULL; 699 struct buffer_head *orphan_dir_bh = NULL;
680 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 700 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
701 struct ocfs2_dinode *di;
681 702
682 /* We've already voted on this so it should be readonly - no 703 di = (struct ocfs2_dinode *) di_bh->b_data;
683 * spinlock needed. */ 704 orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
684 orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot;
685 705
686 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 706 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
687 if (status) 707 if (status)
@@ -787,6 +807,35 @@ bail:
787 return ret; 807 return ret;
788} 808}
789 809
810static int ocfs2_request_delete(struct inode *inode)
811{
812 int status = 0;
813 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
814
815 if (ocfs2_inode_is_new(inode))
816 return 0;
817
818 if (ocfs2_node_map_is_only(osb, &osb->mounted_map,
819 osb->node_num))
820 return 0;
821 /*
822 * This is how ocfs2 determines whether an inode is still live
823 * within the cluster. Every node takes a shared read lock on
824 * the inode open lock in ocfs2_read_locked_inode(). When we
825 * get to ->delete_inode(), each node tries to convert it's
826 * lock to an exclusive. Trylocks are serialized by the inode
827 * meta data lock. If the upconvert suceeds, we know the inode
828 * is no longer live and can be deleted.
829 *
830 * Though we call this with the meta data lock held, the
831 * trylock keeps us from ABBA deadlock.
832 */
833 status = ocfs2_try_open_lock(inode, 1);
834 if (status < 0 && status != -EAGAIN)
835 mlog_errno(status);
836 return status;
837}
838
790/* Query the cluster to determine whether we should wipe an inode from 839/* Query the cluster to determine whether we should wipe an inode from
791 * disk or not. 840 * disk or not.
792 * 841 *
@@ -839,11 +888,11 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
839 goto bail; 888 goto bail;
840 } 889 }
841 890
842 status = ocfs2_request_delete_vote(inode); 891 status = ocfs2_request_delete(inode);
843 /* -EBUSY means that other nodes are still using the 892 /* -EAGAIN means that other nodes are still using the
844 * inode. We're done here though, so avoid doing anything on 893 * inode. We're done here though, so avoid doing anything on
845 * disk and let them worry about deleting it. */ 894 * disk and let them worry about deleting it. */
846 if (status == -EBUSY) { 895 if (status == -EAGAIN) {
847 status = 0; 896 status = 0;
848 mlog(0, "Skipping delete of %llu because it is in use on" 897 mlog(0, "Skipping delete of %llu because it is in use on"
849 "other nodes\n", (unsigned long long)oi->ip_blkno); 898 "other nodes\n", (unsigned long long)oi->ip_blkno);
@@ -854,21 +903,10 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
854 goto bail; 903 goto bail;
855 } 904 }
856 905
857 spin_lock(&oi->ip_lock); 906 *wipe = 1;
858 if (oi->ip_orphaned_slot == OCFS2_INVALID_SLOT) { 907 mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n",
859 /* Nobody knew which slot this inode was orphaned 908 (unsigned long long)oi->ip_blkno,
860 * into. This may happen during node death and 909 le16_to_cpu(di->i_orphaned_slot));
861 * recovery knows how to clean it up so we can safely
862 * ignore this inode for now on. */
863 mlog(0, "Nobody knew where inode %llu was orphaned!\n",
864 (unsigned long long)oi->ip_blkno);
865 } else {
866 *wipe = 1;
867
868 mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n",
869 (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot);
870 }
871 spin_unlock(&oi->ip_lock);
872 910
873bail: 911bail:
874 return status; 912 return status;
@@ -1001,11 +1039,16 @@ void ocfs2_clear_inode(struct inode *inode)
1001 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, 1039 mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
1002 "Inode=%lu\n", inode->i_ino); 1040 "Inode=%lu\n", inode->i_ino);
1003 1041
1042 /* For remove delete_inode vote, we hold open lock before,
1043 * now it is time to unlock PR and EX open locks. */
1044 ocfs2_open_unlock(inode);
1045
1004 /* Do these before all the other work so that we don't bounce 1046 /* Do these before all the other work so that we don't bounce
1005 * the vote thread while waiting to destroy the locks. */ 1047 * the vote thread while waiting to destroy the locks. */
1006 ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); 1048 ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
1007 ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); 1049 ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
1008 ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); 1050 ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
1051 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
1009 1052
1010 /* We very well may get a clear_inode before all an inodes 1053 /* We very well may get a clear_inode before all an inodes
1011 * metadata has hit disk. Of course, we can't drop any cluster 1054 * metadata has hit disk. Of course, we can't drop any cluster
@@ -1030,6 +1073,7 @@ void ocfs2_clear_inode(struct inode *inode)
1030 ocfs2_lock_res_free(&oi->ip_rw_lockres); 1073 ocfs2_lock_res_free(&oi->ip_rw_lockres);
1031 ocfs2_lock_res_free(&oi->ip_meta_lockres); 1074 ocfs2_lock_res_free(&oi->ip_meta_lockres);
1032 ocfs2_lock_res_free(&oi->ip_data_lockres); 1075 ocfs2_lock_res_free(&oi->ip_data_lockres);
1076 ocfs2_lock_res_free(&oi->ip_open_lockres);
1033 1077
1034 ocfs2_metadata_cache_purge(inode); 1078 ocfs2_metadata_cache_purge(inode);
1035 1079
@@ -1086,9 +1130,6 @@ void ocfs2_drop_inode(struct inode *inode)
1086 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n", 1130 mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n",
1087 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); 1131 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
1088 1132
1089 /* Testing ip_orphaned_slot here wouldn't work because we may
1090 * not have gotten a delete_inode vote from any other nodes
1091 * yet. */
1092 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) 1133 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1093 generic_delete_inode(inode); 1134 generic_delete_inode(inode);
1094 else 1135 else