diff options
Diffstat (limited to 'fs/ocfs2/inode.c')
-rw-r--r-- | fs/ocfs2/inode.c | 93 |
1 files changed, 67 insertions, 26 deletions
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 28ab56f2b98c..10d16a9e4fda 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -289,7 +289,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
289 | (unsigned long long)fe->i_blkno); | 289 | (unsigned long long)fe->i_blkno); |
290 | 290 | ||
291 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | 291 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); |
292 | OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT; | ||
293 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); | 292 | OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); |
294 | 293 | ||
295 | inode->i_nlink = le16_to_cpu(fe->i_links_count); | 294 | inode->i_nlink = le16_to_cpu(fe->i_links_count); |
@@ -347,6 +346,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
347 | 346 | ||
348 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, | 347 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, |
349 | OCFS2_LOCK_TYPE_META, 0, inode); | 348 | OCFS2_LOCK_TYPE_META, 0, inode); |
349 | |||
350 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, | ||
351 | OCFS2_LOCK_TYPE_OPEN, 0, inode); | ||
350 | } | 352 | } |
351 | 353 | ||
352 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, | 354 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres, |
@@ -421,7 +423,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
421 | * cluster lock before trusting anything anyway. | 423 | * cluster lock before trusting anything anyway. |
422 | */ | 424 | */ |
423 | can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) | 425 | can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) |
424 | && !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK) | 426 | && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) |
425 | && !ocfs2_mount_local(osb); | 427 | && !ocfs2_mount_local(osb); |
426 | 428 | ||
427 | /* | 429 | /* |
@@ -438,7 +440,17 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
438 | OCFS2_LOCK_TYPE_META, | 440 | OCFS2_LOCK_TYPE_META, |
439 | generation, inode); | 441 | generation, inode); |
440 | 442 | ||
443 | ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, | ||
444 | OCFS2_LOCK_TYPE_OPEN, | ||
445 | 0, inode); | ||
446 | |||
441 | if (can_lock) { | 447 | if (can_lock) { |
448 | status = ocfs2_open_lock(inode); | ||
449 | if (status) { | ||
450 | make_bad_inode(inode); | ||
451 | mlog_errno(status); | ||
452 | return status; | ||
453 | } | ||
442 | status = ocfs2_meta_lock(inode, NULL, 0); | 454 | status = ocfs2_meta_lock(inode, NULL, 0); |
443 | if (status) { | 455 | if (status) { |
444 | make_bad_inode(inode); | 456 | make_bad_inode(inode); |
@@ -447,6 +459,14 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
447 | } | 459 | } |
448 | } | 460 | } |
449 | 461 | ||
462 | if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { | ||
463 | status = ocfs2_try_open_lock(inode, 0); | ||
464 | if (status) { | ||
465 | make_bad_inode(inode); | ||
466 | return status; | ||
467 | } | ||
468 | } | ||
469 | |||
450 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, | 470 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, |
451 | can_lock ? inode : NULL); | 471 | can_lock ? inode : NULL); |
452 | if (status < 0) { | 472 | if (status < 0) { |
@@ -678,10 +698,10 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
678 | struct inode *orphan_dir_inode = NULL; | 698 | struct inode *orphan_dir_inode = NULL; |
679 | struct buffer_head *orphan_dir_bh = NULL; | 699 | struct buffer_head *orphan_dir_bh = NULL; |
680 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 700 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
701 | struct ocfs2_dinode *di; | ||
681 | 702 | ||
682 | /* We've already voted on this so it should be readonly - no | 703 | di = (struct ocfs2_dinode *) di_bh->b_data; |
683 | * spinlock needed. */ | 704 | orphaned_slot = le16_to_cpu(di->i_orphaned_slot); |
684 | orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot; | ||
685 | 705 | ||
686 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); | 706 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); |
687 | if (status) | 707 | if (status) |
@@ -787,6 +807,35 @@ bail: | |||
787 | return ret; | 807 | return ret; |
788 | } | 808 | } |
789 | 809 | ||
810 | static int ocfs2_request_delete(struct inode *inode) | ||
811 | { | ||
812 | int status = 0; | ||
813 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
814 | |||
815 | if (ocfs2_inode_is_new(inode)) | ||
816 | return 0; | ||
817 | |||
818 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
819 | osb->node_num)) | ||
820 | return 0; | ||
821 | /* | ||
822 | * This is how ocfs2 determines whether an inode is still live | ||
823 | * within the cluster. Every node takes a shared read lock on | ||
824 | * the inode open lock in ocfs2_read_locked_inode(). When we | ||
825 | * get to ->delete_inode(), each node tries to convert it's | ||
826 | * lock to an exclusive. Trylocks are serialized by the inode | ||
827 | * meta data lock. If the upconvert suceeds, we know the inode | ||
828 | * is no longer live and can be deleted. | ||
829 | * | ||
830 | * Though we call this with the meta data lock held, the | ||
831 | * trylock keeps us from ABBA deadlock. | ||
832 | */ | ||
833 | status = ocfs2_try_open_lock(inode, 1); | ||
834 | if (status < 0 && status != -EAGAIN) | ||
835 | mlog_errno(status); | ||
836 | return status; | ||
837 | } | ||
838 | |||
790 | /* Query the cluster to determine whether we should wipe an inode from | 839 | /* Query the cluster to determine whether we should wipe an inode from |
791 | * disk or not. | 840 | * disk or not. |
792 | * | 841 | * |
@@ -839,11 +888,11 @@ static int ocfs2_query_inode_wipe(struct inode *inode, | |||
839 | goto bail; | 888 | goto bail; |
840 | } | 889 | } |
841 | 890 | ||
842 | status = ocfs2_request_delete_vote(inode); | 891 | status = ocfs2_request_delete(inode); |
843 | /* -EBUSY means that other nodes are still using the | 892 | /* -EAGAIN means that other nodes are still using the |
844 | * inode. We're done here though, so avoid doing anything on | 893 | * inode. We're done here though, so avoid doing anything on |
845 | * disk and let them worry about deleting it. */ | 894 | * disk and let them worry about deleting it. */ |
846 | if (status == -EBUSY) { | 895 | if (status == -EAGAIN) { |
847 | status = 0; | 896 | status = 0; |
848 | mlog(0, "Skipping delete of %llu because it is in use on" | 897 | mlog(0, "Skipping delete of %llu because it is in use on" |
849 | "other nodes\n", (unsigned long long)oi->ip_blkno); | 898 | "other nodes\n", (unsigned long long)oi->ip_blkno); |
@@ -854,21 +903,10 @@ static int ocfs2_query_inode_wipe(struct inode *inode, | |||
854 | goto bail; | 903 | goto bail; |
855 | } | 904 | } |
856 | 905 | ||
857 | spin_lock(&oi->ip_lock); | 906 | *wipe = 1; |
858 | if (oi->ip_orphaned_slot == OCFS2_INVALID_SLOT) { | 907 | mlog(0, "Inode %llu is ok to wipe from orphan dir %u\n", |
859 | /* Nobody knew which slot this inode was orphaned | 908 | (unsigned long long)oi->ip_blkno, |
860 | * into. This may happen during node death and | 909 | le16_to_cpu(di->i_orphaned_slot)); |
861 | * recovery knows how to clean it up so we can safely | ||
862 | * ignore this inode for now on. */ | ||
863 | mlog(0, "Nobody knew where inode %llu was orphaned!\n", | ||
864 | (unsigned long long)oi->ip_blkno); | ||
865 | } else { | ||
866 | *wipe = 1; | ||
867 | |||
868 | mlog(0, "Inode %llu is ok to wipe from orphan dir %d\n", | ||
869 | (unsigned long long)oi->ip_blkno, oi->ip_orphaned_slot); | ||
870 | } | ||
871 | spin_unlock(&oi->ip_lock); | ||
872 | 910 | ||
873 | bail: | 911 | bail: |
874 | return status; | 912 | return status; |
@@ -1001,11 +1039,16 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1001 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 1039 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, |
1002 | "Inode=%lu\n", inode->i_ino); | 1040 | "Inode=%lu\n", inode->i_ino); |
1003 | 1041 | ||
1042 | /* For remove delete_inode vote, we hold open lock before, | ||
1043 | * now it is time to unlock PR and EX open locks. */ | ||
1044 | ocfs2_open_unlock(inode); | ||
1045 | |||
1004 | /* Do these before all the other work so that we don't bounce | 1046 | /* Do these before all the other work so that we don't bounce |
1005 | * the vote thread while waiting to destroy the locks. */ | 1047 | * the vote thread while waiting to destroy the locks. */ |
1006 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 1048 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); |
1007 | ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); | 1049 | ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); |
1008 | ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); | 1050 | ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); |
1051 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); | ||
1009 | 1052 | ||
1010 | /* We very well may get a clear_inode before all an inodes | 1053 | /* We very well may get a clear_inode before all an inodes |
1011 | * metadata has hit disk. Of course, we can't drop any cluster | 1054 | * metadata has hit disk. Of course, we can't drop any cluster |
@@ -1030,6 +1073,7 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1030 | ocfs2_lock_res_free(&oi->ip_rw_lockres); | 1073 | ocfs2_lock_res_free(&oi->ip_rw_lockres); |
1031 | ocfs2_lock_res_free(&oi->ip_meta_lockres); | 1074 | ocfs2_lock_res_free(&oi->ip_meta_lockres); |
1032 | ocfs2_lock_res_free(&oi->ip_data_lockres); | 1075 | ocfs2_lock_res_free(&oi->ip_data_lockres); |
1076 | ocfs2_lock_res_free(&oi->ip_open_lockres); | ||
1033 | 1077 | ||
1034 | ocfs2_metadata_cache_purge(inode); | 1078 | ocfs2_metadata_cache_purge(inode); |
1035 | 1079 | ||
@@ -1086,9 +1130,6 @@ void ocfs2_drop_inode(struct inode *inode) | |||
1086 | mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n", | 1130 | mlog(0, "Drop inode %llu, nlink = %u, ip_flags = 0x%x\n", |
1087 | (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); | 1131 | (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); |
1088 | 1132 | ||
1089 | /* Testing ip_orphaned_slot here wouldn't work because we may | ||
1090 | * not have gotten a delete_inode vote from any other nodes | ||
1091 | * yet. */ | ||
1092 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) | 1133 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) |
1093 | generic_delete_inode(inode); | 1134 | generic_delete_inode(inode); |
1094 | else | 1135 | else |