diff options
Diffstat (limited to 'fs/ocfs2/journal.c')
-rw-r--r-- | fs/ocfs2/journal.c | 181 |
1 files changed, 166 insertions, 15 deletions
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index ed0c6d0850d7..ca4c0ea5a4cd 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
64 | int slot); | 64 | int slot); |
65 | static int ocfs2_commit_thread(void *arg); | 65 | static int ocfs2_commit_thread(void *arg); |
66 | 66 | ||
67 | |||
68 | /* | ||
69 | * The recovery_list is a simple linked list of node numbers to recover. | ||
70 | * It is protected by the recovery_lock. | ||
71 | */ | ||
72 | |||
73 | struct ocfs2_recovery_map { | ||
74 | int rm_used; | ||
75 | unsigned int *rm_entries; | ||
76 | }; | ||
77 | |||
78 | int ocfs2_recovery_init(struct ocfs2_super *osb) | ||
79 | { | ||
80 | struct ocfs2_recovery_map *rm; | ||
81 | |||
82 | mutex_init(&osb->recovery_lock); | ||
83 | osb->disable_recovery = 0; | ||
84 | osb->recovery_thread_task = NULL; | ||
85 | init_waitqueue_head(&osb->recovery_event); | ||
86 | |||
87 | rm = kzalloc(sizeof(struct ocfs2_recovery_map) + | ||
88 | osb->max_slots * sizeof(unsigned int), | ||
89 | GFP_KERNEL); | ||
90 | if (!rm) { | ||
91 | mlog_errno(-ENOMEM); | ||
92 | return -ENOMEM; | ||
93 | } | ||
94 | |||
95 | rm->rm_entries = (unsigned int *)((char *)rm + | ||
96 | sizeof(struct ocfs2_recovery_map)); | ||
97 | osb->recovery_map = rm; | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
103 | * memory barriers to make sure that we'll see the null task before | ||
104 | * being woken up */ | ||
105 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
106 | { | ||
107 | mb(); | ||
108 | return osb->recovery_thread_task != NULL; | ||
109 | } | ||
110 | |||
111 | void ocfs2_recovery_exit(struct ocfs2_super *osb) | ||
112 | { | ||
113 | struct ocfs2_recovery_map *rm; | ||
114 | |||
115 | /* disable any new recovery threads and wait for any currently | ||
116 | * running ones to exit. Do this before setting the vol_state. */ | ||
117 | mutex_lock(&osb->recovery_lock); | ||
118 | osb->disable_recovery = 1; | ||
119 | mutex_unlock(&osb->recovery_lock); | ||
120 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
121 | |||
122 | /* At this point, we know that no more recovery threads can be | ||
123 | * launched, so wait for any recovery completion work to | ||
124 | * complete. */ | ||
125 | flush_workqueue(ocfs2_wq); | ||
126 | |||
127 | /* | ||
128 | * Now that recovery is shut down, and the osb is about to be | ||
129 | * freed, the osb_lock is not taken here. | ||
130 | */ | ||
131 | rm = osb->recovery_map; | ||
132 | /* XXX: Should we bug if there are dirty entries? */ | ||
133 | |||
134 | kfree(rm); | ||
135 | } | ||
136 | |||
137 | static int __ocfs2_recovery_map_test(struct ocfs2_super *osb, | ||
138 | unsigned int node_num) | ||
139 | { | ||
140 | int i; | ||
141 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
142 | |||
143 | assert_spin_locked(&osb->osb_lock); | ||
144 | |||
145 | for (i = 0; i < rm->rm_used; i++) { | ||
146 | if (rm->rm_entries[i] == node_num) | ||
147 | return 1; | ||
148 | } | ||
149 | |||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | /* Behaves like test-and-set. Returns the previous value */ | ||
154 | static int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
155 | unsigned int node_num) | ||
156 | { | ||
157 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
158 | |||
159 | spin_lock(&osb->osb_lock); | ||
160 | if (__ocfs2_recovery_map_test(osb, node_num)) { | ||
161 | spin_unlock(&osb->osb_lock); | ||
162 | return 1; | ||
163 | } | ||
164 | |||
165 | /* XXX: Can this be exploited? Not from o2dlm... */ | ||
166 | BUG_ON(rm->rm_used >= osb->max_slots); | ||
167 | |||
168 | rm->rm_entries[rm->rm_used] = node_num; | ||
169 | rm->rm_used++; | ||
170 | spin_unlock(&osb->osb_lock); | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
176 | unsigned int node_num) | ||
177 | { | ||
178 | int i; | ||
179 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
180 | |||
181 | spin_lock(&osb->osb_lock); | ||
182 | |||
183 | for (i = 0; i < rm->rm_used; i++) { | ||
184 | if (rm->rm_entries[i] == node_num) | ||
185 | break; | ||
186 | } | ||
187 | |||
188 | if (i < rm->rm_used) { | ||
189 | /* XXX: be careful with the pointer math */ | ||
190 | memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), | ||
191 | (rm->rm_used - i - 1) * sizeof(unsigned int)); | ||
192 | rm->rm_used--; | ||
193 | } | ||
194 | |||
195 | spin_unlock(&osb->osb_lock); | ||
196 | } | ||
197 | |||
67 | static int ocfs2_commit_cache(struct ocfs2_super *osb) | 198 | static int ocfs2_commit_cache(struct ocfs2_super *osb) |
68 | { | 199 | { |
69 | int status = 0; | 200 | int status = 0; |
@@ -650,6 +781,23 @@ bail: | |||
650 | return status; | 781 | return status; |
651 | } | 782 | } |
652 | 783 | ||
784 | static int ocfs2_recovery_completed(struct ocfs2_super *osb) | ||
785 | { | ||
786 | int empty; | ||
787 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
788 | |||
789 | spin_lock(&osb->osb_lock); | ||
790 | empty = (rm->rm_used == 0); | ||
791 | spin_unlock(&osb->osb_lock); | ||
792 | |||
793 | return empty; | ||
794 | } | ||
795 | |||
796 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb) | ||
797 | { | ||
798 | wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); | ||
799 | } | ||
800 | |||
653 | /* | 801 | /* |
654 | * JBD Might read a cached version of another nodes journal file. We | 802 | * JBD Might read a cached version of another nodes journal file. We |
655 | * don't want this as this file changes often and we get no | 803 | * don't want this as this file changes often and we get no |
@@ -848,6 +996,7 @@ static int __ocfs2_recovery_thread(void *arg) | |||
848 | { | 996 | { |
849 | int status, node_num; | 997 | int status, node_num; |
850 | struct ocfs2_super *osb = arg; | 998 | struct ocfs2_super *osb = arg; |
999 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
851 | 1000 | ||
852 | mlog_entry_void(); | 1001 | mlog_entry_void(); |
853 | 1002 | ||
@@ -863,26 +1012,29 @@ restart: | |||
863 | goto bail; | 1012 | goto bail; |
864 | } | 1013 | } |
865 | 1014 | ||
866 | while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 1015 | spin_lock(&osb->osb_lock); |
867 | node_num = ocfs2_node_map_first_set_bit(osb, | 1016 | while (rm->rm_used) { |
868 | &osb->recovery_map); | 1017 | /* It's always safe to remove entry zero, as we won't |
869 | if (node_num == O2NM_INVALID_NODE_NUM) { | 1018 | * clear it until ocfs2_recover_node() has succeeded. */ |
870 | mlog(0, "Out of nodes to recover.\n"); | 1019 | node_num = rm->rm_entries[0]; |
871 | break; | 1020 | spin_unlock(&osb->osb_lock); |
872 | } | ||
873 | 1021 | ||
874 | status = ocfs2_recover_node(osb, node_num); | 1022 | status = ocfs2_recover_node(osb, node_num); |
875 | if (status < 0) { | 1023 | if (!status) { |
1024 | ocfs2_recovery_map_clear(osb, node_num); | ||
1025 | } else { | ||
876 | mlog(ML_ERROR, | 1026 | mlog(ML_ERROR, |
877 | "Error %d recovering node %d on device (%u,%u)!\n", | 1027 | "Error %d recovering node %d on device (%u,%u)!\n", |
878 | status, node_num, | 1028 | status, node_num, |
879 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1029 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
880 | mlog(ML_ERROR, "Volume requires unmount.\n"); | 1030 | mlog(ML_ERROR, "Volume requires unmount.\n"); |
881 | continue; | ||
882 | } | 1031 | } |
883 | 1032 | ||
884 | ocfs2_recovery_map_clear(osb, node_num); | 1033 | spin_lock(&osb->osb_lock); |
885 | } | 1034 | } |
1035 | spin_unlock(&osb->osb_lock); | ||
1036 | mlog(0, "All nodes recovered\n"); | ||
1037 | |||
886 | ocfs2_super_unlock(osb, 1); | 1038 | ocfs2_super_unlock(osb, 1); |
887 | 1039 | ||
888 | /* We always run recovery on our own orphan dir - the dead | 1040 | /* We always run recovery on our own orphan dir - the dead |
@@ -893,8 +1045,7 @@ restart: | |||
893 | 1045 | ||
894 | bail: | 1046 | bail: |
895 | mutex_lock(&osb->recovery_lock); | 1047 | mutex_lock(&osb->recovery_lock); |
896 | if (!status && | 1048 | if (!status && !ocfs2_recovery_completed(osb)) { |
897 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | ||
898 | mutex_unlock(&osb->recovery_lock); | 1049 | mutex_unlock(&osb->recovery_lock); |
899 | goto restart; | 1050 | goto restart; |
900 | } | 1051 | } |
@@ -924,8 +1075,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | |||
924 | 1075 | ||
925 | /* People waiting on recovery will wait on | 1076 | /* People waiting on recovery will wait on |
926 | * the recovery map to empty. */ | 1077 | * the recovery map to empty. */ |
927 | if (!ocfs2_recovery_map_set(osb, node_num)) | 1078 | if (ocfs2_recovery_map_set(osb, node_num)) |
928 | mlog(0, "node %d already be in recovery.\n", node_num); | 1079 | mlog(0, "node %d already in recovery map.\n", node_num); |
929 | 1080 | ||
930 | mlog(0, "starting recovery thread...\n"); | 1081 | mlog(0, "starting recovery thread...\n"); |
931 | 1082 | ||
@@ -1197,7 +1348,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1197 | if (status == -ENOENT) | 1348 | if (status == -ENOENT) |
1198 | continue; | 1349 | continue; |
1199 | 1350 | ||
1200 | if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) | 1351 | if (__ocfs2_recovery_map_test(osb, node_num)) |
1201 | continue; | 1352 | continue; |
1202 | spin_unlock(&osb->osb_lock); | 1353 | spin_unlock(&osb->osb_lock); |
1203 | 1354 | ||