diff options
| author | Joel Becker <joel.becker@oracle.com> | 2008-02-01 15:03:57 -0500 |
|---|---|---|
| committer | Mark Fasheh <mfasheh@suse.com> | 2008-04-18 11:56:02 -0400 |
| commit | 553abd046af609191a91af7289d87d477adc659f (patch) | |
| tree | cff21f65d49c0041993095a051edf76840c2af28 /fs | |
| parent | d85b20e4b300edfd290f21fc2d790ba16d2f225b (diff) | |
ocfs2: Change the recovery map to an array of node numbers.
The old recovery map was a bitmap of node numbers. This was sufficient
for the maximum node number of 254. Going forward, we want node numbers
to be UINT32. Thus, we need a new recovery map.
Note that we can't keep track of slots here. We must write down the
node number to recovery *before* we get the locks needed to convert a
node number into a slot number.
The recovery map is now an array of unsigned ints, max_slots in size.
It moves to journal.c with the rest of recovery.
Because it needs to be initialized, we move all of recovery initialization
into a new function, ocfs2_recovery_init(). This actually cleans up
ocfs2_initialize_super() a little as well. Following on, recovery cleaup
becomes part of ocfs2_recovery_exit().
A number of node map functions are rendered obsolete and are removed.
Finally, waiting on recovery is wrapped in a function rather than naked
checks on the recovery_event. This is a cleanup from Mark.
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/ocfs2/dlmglue.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/heartbeat.c | 111 | ||||
| -rw-r--r-- | fs/ocfs2/heartbeat.h | 14 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 181 | ||||
| -rw-r--r-- | fs/ocfs2/journal.h | 4 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2.h | 3 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 33 |
7 files changed, 182 insertions, 170 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1a80fa9e7c9..15a5167e051 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -1950,8 +1950,7 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1950 | goto local; | 1950 | goto local; |
| 1951 | 1951 | ||
| 1952 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 1952 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
| 1953 | wait_event(osb->recovery_event, | 1953 | ocfs2_wait_for_recovery(osb); |
| 1954 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
| 1955 | 1954 | ||
| 1956 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 1955 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
| 1957 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 1956 | level = ex ? LKM_EXMODE : LKM_PRMODE; |
| @@ -1974,8 +1973,7 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1974 | * committed to owning this lock so we don't allow signals to | 1973 | * committed to owning this lock so we don't allow signals to |
| 1975 | * abort the operation. */ | 1974 | * abort the operation. */ |
| 1976 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 1975 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
| 1977 | wait_event(osb->recovery_event, | 1976 | ocfs2_wait_for_recovery(osb); |
| 1978 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
| 1979 | 1977 | ||
| 1980 | local: | 1978 | local: |
| 1981 | /* | 1979 | /* |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 0758daf64da..80de2397c16 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
| @@ -48,7 +48,6 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | |||
| 48 | int bit); | 48 | int bit); |
| 49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, |
| 50 | int bit); | 50 | int bit); |
| 51 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map); | ||
| 52 | 51 | ||
| 53 | /* special case -1 for now | 52 | /* special case -1 for now |
| 54 | * TODO: should *really* make sure the calling func never passes -1!! */ | 53 | * TODO: should *really* make sure the calling func never passes -1!! */ |
| @@ -62,7 +61,6 @@ static void ocfs2_node_map_init(struct ocfs2_node_map *map) | |||
| 62 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 61 | void ocfs2_init_node_maps(struct ocfs2_super *osb) |
| 63 | { | 62 | { |
| 64 | spin_lock_init(&osb->node_map_lock); | 63 | spin_lock_init(&osb->node_map_lock); |
| 65 | ocfs2_node_map_init(&osb->recovery_map); | ||
| 66 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 64 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); |
| 67 | } | 65 | } |
| 68 | 66 | ||
| @@ -192,112 +190,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | |||
| 192 | return ret; | 190 | return ret; |
| 193 | } | 191 | } |
| 194 | 192 | ||
| 195 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map) | ||
| 196 | { | ||
| 197 | int bit; | ||
| 198 | bit = find_next_bit(map->map, map->num_nodes, 0); | ||
| 199 | if (bit < map->num_nodes) | ||
| 200 | return 0; | ||
| 201 | return 1; | ||
| 202 | } | ||
| 203 | |||
| 204 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
| 205 | struct ocfs2_node_map *map) | ||
| 206 | { | ||
| 207 | int ret; | ||
| 208 | BUG_ON(map->num_nodes == 0); | ||
| 209 | spin_lock(&osb->node_map_lock); | ||
| 210 | ret = __ocfs2_node_map_is_empty(map); | ||
| 211 | spin_unlock(&osb->node_map_lock); | ||
| 212 | return ret; | ||
| 213 | } | ||
| 214 | |||
| 215 | #if 0 | ||
| 216 | |||
| 217 | static void __ocfs2_node_map_dup(struct ocfs2_node_map *target, | ||
| 218 | struct ocfs2_node_map *from) | ||
| 219 | { | ||
| 220 | BUG_ON(from->num_nodes == 0); | ||
| 221 | ocfs2_node_map_init(target); | ||
| 222 | __ocfs2_node_map_set(target, from); | ||
| 223 | } | ||
| 224 | |||
| 225 | /* returns 1 if bit is the only bit set in target, 0 otherwise */ | ||
| 226 | int ocfs2_node_map_is_only(struct ocfs2_super *osb, | ||
| 227 | struct ocfs2_node_map *target, | ||
| 228 | int bit) | ||
| 229 | { | ||
| 230 | struct ocfs2_node_map temp; | ||
| 231 | int ret; | ||
| 232 | |||
| 233 | spin_lock(&osb->node_map_lock); | ||
| 234 | __ocfs2_node_map_dup(&temp, target); | ||
| 235 | __ocfs2_node_map_clear_bit(&temp, bit); | ||
| 236 | ret = __ocfs2_node_map_is_empty(&temp); | ||
| 237 | spin_unlock(&osb->node_map_lock); | ||
| 238 | |||
| 239 | return ret; | ||
| 240 | } | ||
| 241 | |||
| 242 | static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | ||
| 243 | struct ocfs2_node_map *from) | ||
| 244 | { | ||
| 245 | int num_longs, i; | ||
| 246 | |||
| 247 | BUG_ON(target->num_nodes != from->num_nodes); | ||
| 248 | BUG_ON(target->num_nodes == 0); | ||
| 249 | |||
| 250 | num_longs = BITS_TO_LONGS(target->num_nodes); | ||
| 251 | for (i = 0; i < num_longs; i++) | ||
| 252 | target->map[i] = from->map[i]; | ||
| 253 | } | ||
| 254 | |||
| 255 | #endif /* 0 */ | ||
| 256 | |||
| 257 | /* Returns whether the recovery bit was actually set - it may not be | ||
| 258 | * if a node is still marked as needing recovery */ | ||
| 259 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 260 | int num) | ||
| 261 | { | ||
| 262 | int set = 0; | ||
| 263 | |||
| 264 | spin_lock(&osb->node_map_lock); | ||
| 265 | |||
| 266 | if (!test_bit(num, osb->recovery_map.map)) { | ||
| 267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | ||
| 268 | set = 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | spin_unlock(&osb->node_map_lock); | ||
| 272 | |||
| 273 | return set; | ||
| 274 | } | ||
| 275 | |||
| 276 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 277 | int num) | ||
| 278 | { | ||
| 279 | ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num); | ||
| 280 | } | ||
| 281 | |||
| 282 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
| 283 | struct ocfs2_node_map *map, | ||
| 284 | int idx) | ||
| 285 | { | ||
| 286 | int i = idx; | ||
| 287 | |||
| 288 | idx = O2NM_INVALID_NODE_NUM; | ||
| 289 | spin_lock(&osb->node_map_lock); | ||
| 290 | if ((i != O2NM_INVALID_NODE_NUM) && | ||
| 291 | (i >= 0) && | ||
| 292 | (i < map->num_nodes)) { | ||
| 293 | while(i < map->num_nodes) { | ||
| 294 | if (test_bit(i, map->map)) { | ||
| 295 | idx = i; | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | i++; | ||
| 299 | } | ||
| 300 | } | ||
| 301 | spin_unlock(&osb->node_map_lock); | ||
| 302 | return idx; | ||
| 303 | } | ||
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index eac63aed761..98d8ffc995b 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h | |||
| @@ -33,8 +33,6 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | |||
| 33 | 33 | ||
| 34 | /* node map functions - used to keep track of mounted and in-recovery | 34 | /* node map functions - used to keep track of mounted and in-recovery |
| 35 | * nodes. */ | 35 | * nodes. */ |
| 36 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
| 37 | struct ocfs2_node_map *map); | ||
| 38 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, | 36 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, |
| 39 | struct ocfs2_node_map *map, | 37 | struct ocfs2_node_map *map, |
| 40 | int bit); | 38 | int bit); |
| @@ -44,17 +42,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb, | |||
| 44 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | 42 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, |
| 45 | struct ocfs2_node_map *map, | 43 | struct ocfs2_node_map *map, |
| 46 | int bit); | 44 | int bit); |
| 47 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
| 48 | struct ocfs2_node_map *map, | ||
| 49 | int idx); | ||
| 50 | static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb, | ||
| 51 | struct ocfs2_node_map *map) | ||
| 52 | { | ||
| 53 | return ocfs2_node_map_iterate(osb, map, 0); | ||
| 54 | } | ||
| 55 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 56 | int num); | ||
| 57 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 58 | int num); | ||
| 59 | 45 | ||
| 60 | #endif /* OCFS2_HEARTBEAT_H */ | 46 | #endif /* OCFS2_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index ed0c6d0850d..ca4c0ea5a4c 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 64 | int slot); | 64 | int slot); |
| 65 | static int ocfs2_commit_thread(void *arg); | 65 | static int ocfs2_commit_thread(void *arg); |
| 66 | 66 | ||
| 67 | |||
| 68 | /* | ||
| 69 | * The recovery_list is a simple linked list of node numbers to recover. | ||
| 70 | * It is protected by the recovery_lock. | ||
| 71 | */ | ||
| 72 | |||
| 73 | struct ocfs2_recovery_map { | ||
| 74 | int rm_used; | ||
| 75 | unsigned int *rm_entries; | ||
| 76 | }; | ||
| 77 | |||
| 78 | int ocfs2_recovery_init(struct ocfs2_super *osb) | ||
| 79 | { | ||
| 80 | struct ocfs2_recovery_map *rm; | ||
| 81 | |||
| 82 | mutex_init(&osb->recovery_lock); | ||
| 83 | osb->disable_recovery = 0; | ||
| 84 | osb->recovery_thread_task = NULL; | ||
| 85 | init_waitqueue_head(&osb->recovery_event); | ||
| 86 | |||
| 87 | rm = kzalloc(sizeof(struct ocfs2_recovery_map) + | ||
| 88 | osb->max_slots * sizeof(unsigned int), | ||
| 89 | GFP_KERNEL); | ||
| 90 | if (!rm) { | ||
| 91 | mlog_errno(-ENOMEM); | ||
| 92 | return -ENOMEM; | ||
| 93 | } | ||
| 94 | |||
| 95 | rm->rm_entries = (unsigned int *)((char *)rm + | ||
| 96 | sizeof(struct ocfs2_recovery_map)); | ||
| 97 | osb->recovery_map = rm; | ||
| 98 | |||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
| 103 | * memory barriers to make sure that we'll see the null task before | ||
| 104 | * being woken up */ | ||
| 105 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
| 106 | { | ||
| 107 | mb(); | ||
| 108 | return osb->recovery_thread_task != NULL; | ||
| 109 | } | ||
| 110 | |||
| 111 | void ocfs2_recovery_exit(struct ocfs2_super *osb) | ||
| 112 | { | ||
| 113 | struct ocfs2_recovery_map *rm; | ||
| 114 | |||
| 115 | /* disable any new recovery threads and wait for any currently | ||
| 116 | * running ones to exit. Do this before setting the vol_state. */ | ||
| 117 | mutex_lock(&osb->recovery_lock); | ||
| 118 | osb->disable_recovery = 1; | ||
| 119 | mutex_unlock(&osb->recovery_lock); | ||
| 120 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
| 121 | |||
| 122 | /* At this point, we know that no more recovery threads can be | ||
| 123 | * launched, so wait for any recovery completion work to | ||
| 124 | * complete. */ | ||
| 125 | flush_workqueue(ocfs2_wq); | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Now that recovery is shut down, and the osb is about to be | ||
| 129 | * freed, the osb_lock is not taken here. | ||
| 130 | */ | ||
| 131 | rm = osb->recovery_map; | ||
| 132 | /* XXX: Should we bug if there are dirty entries? */ | ||
| 133 | |||
| 134 | kfree(rm); | ||
| 135 | } | ||
| 136 | |||
| 137 | static int __ocfs2_recovery_map_test(struct ocfs2_super *osb, | ||
| 138 | unsigned int node_num) | ||
| 139 | { | ||
| 140 | int i; | ||
| 141 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 142 | |||
| 143 | assert_spin_locked(&osb->osb_lock); | ||
| 144 | |||
| 145 | for (i = 0; i < rm->rm_used; i++) { | ||
| 146 | if (rm->rm_entries[i] == node_num) | ||
| 147 | return 1; | ||
| 148 | } | ||
| 149 | |||
| 150 | return 0; | ||
| 151 | } | ||
| 152 | |||
| 153 | /* Behaves like test-and-set. Returns the previous value */ | ||
| 154 | static int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 155 | unsigned int node_num) | ||
| 156 | { | ||
| 157 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 158 | |||
| 159 | spin_lock(&osb->osb_lock); | ||
| 160 | if (__ocfs2_recovery_map_test(osb, node_num)) { | ||
| 161 | spin_unlock(&osb->osb_lock); | ||
| 162 | return 1; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* XXX: Can this be exploited? Not from o2dlm... */ | ||
| 166 | BUG_ON(rm->rm_used >= osb->max_slots); | ||
| 167 | |||
| 168 | rm->rm_entries[rm->rm_used] = node_num; | ||
| 169 | rm->rm_used++; | ||
| 170 | spin_unlock(&osb->osb_lock); | ||
| 171 | |||
| 172 | return 0; | ||
| 173 | } | ||
| 174 | |||
| 175 | static void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 176 | unsigned int node_num) | ||
| 177 | { | ||
| 178 | int i; | ||
| 179 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 180 | |||
| 181 | spin_lock(&osb->osb_lock); | ||
| 182 | |||
| 183 | for (i = 0; i < rm->rm_used; i++) { | ||
| 184 | if (rm->rm_entries[i] == node_num) | ||
| 185 | break; | ||
| 186 | } | ||
| 187 | |||
| 188 | if (i < rm->rm_used) { | ||
| 189 | /* XXX: be careful with the pointer math */ | ||
| 190 | memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), | ||
| 191 | (rm->rm_used - i - 1) * sizeof(unsigned int)); | ||
| 192 | rm->rm_used--; | ||
| 193 | } | ||
| 194 | |||
| 195 | spin_unlock(&osb->osb_lock); | ||
| 196 | } | ||
| 197 | |||
| 67 | static int ocfs2_commit_cache(struct ocfs2_super *osb) | 198 | static int ocfs2_commit_cache(struct ocfs2_super *osb) |
| 68 | { | 199 | { |
| 69 | int status = 0; | 200 | int status = 0; |
| @@ -650,6 +781,23 @@ bail: | |||
| 650 | return status; | 781 | return status; |
| 651 | } | 782 | } |
| 652 | 783 | ||
| 784 | static int ocfs2_recovery_completed(struct ocfs2_super *osb) | ||
| 785 | { | ||
| 786 | int empty; | ||
| 787 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 788 | |||
| 789 | spin_lock(&osb->osb_lock); | ||
| 790 | empty = (rm->rm_used == 0); | ||
| 791 | spin_unlock(&osb->osb_lock); | ||
| 792 | |||
| 793 | return empty; | ||
| 794 | } | ||
| 795 | |||
| 796 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb) | ||
| 797 | { | ||
| 798 | wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); | ||
| 799 | } | ||
| 800 | |||
| 653 | /* | 801 | /* |
| 654 | * JBD Might read a cached version of another nodes journal file. We | 802 | * JBD Might read a cached version of another nodes journal file. We |
| 655 | * don't want this as this file changes often and we get no | 803 | * don't want this as this file changes often and we get no |
| @@ -848,6 +996,7 @@ static int __ocfs2_recovery_thread(void *arg) | |||
| 848 | { | 996 | { |
| 849 | int status, node_num; | 997 | int status, node_num; |
| 850 | struct ocfs2_super *osb = arg; | 998 | struct ocfs2_super *osb = arg; |
| 999 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 851 | 1000 | ||
| 852 | mlog_entry_void(); | 1001 | mlog_entry_void(); |
| 853 | 1002 | ||
| @@ -863,26 +1012,29 @@ restart: | |||
| 863 | goto bail; | 1012 | goto bail; |
| 864 | } | 1013 | } |
| 865 | 1014 | ||
| 866 | while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 1015 | spin_lock(&osb->osb_lock); |
| 867 | node_num = ocfs2_node_map_first_set_bit(osb, | 1016 | while (rm->rm_used) { |
| 868 | &osb->recovery_map); | 1017 | /* It's always safe to remove entry zero, as we won't |
| 869 | if (node_num == O2NM_INVALID_NODE_NUM) { | 1018 | * clear it until ocfs2_recover_node() has succeeded. */ |
| 870 | mlog(0, "Out of nodes to recover.\n"); | 1019 | node_num = rm->rm_entries[0]; |
| 871 | break; | 1020 | spin_unlock(&osb->osb_lock); |
| 872 | } | ||
| 873 | 1021 | ||
| 874 | status = ocfs2_recover_node(osb, node_num); | 1022 | status = ocfs2_recover_node(osb, node_num); |
| 875 | if (status < 0) { | 1023 | if (!status) { |
| 1024 | ocfs2_recovery_map_clear(osb, node_num); | ||
| 1025 | } else { | ||
| 876 | mlog(ML_ERROR, | 1026 | mlog(ML_ERROR, |
| 877 | "Error %d recovering node %d on device (%u,%u)!\n", | 1027 | "Error %d recovering node %d on device (%u,%u)!\n", |
| 878 | status, node_num, | 1028 | status, node_num, |
| 879 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1029 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
| 880 | mlog(ML_ERROR, "Volume requires unmount.\n"); | 1030 | mlog(ML_ERROR, "Volume requires unmount.\n"); |
| 881 | continue; | ||
| 882 | } | 1031 | } |
| 883 | 1032 | ||
| 884 | ocfs2_recovery_map_clear(osb, node_num); | 1033 | spin_lock(&osb->osb_lock); |
| 885 | } | 1034 | } |
| 1035 | spin_unlock(&osb->osb_lock); | ||
| 1036 | mlog(0, "All nodes recovered\n"); | ||
| 1037 | |||
| 886 | ocfs2_super_unlock(osb, 1); | 1038 | ocfs2_super_unlock(osb, 1); |
| 887 | 1039 | ||
| 888 | /* We always run recovery on our own orphan dir - the dead | 1040 | /* We always run recovery on our own orphan dir - the dead |
| @@ -893,8 +1045,7 @@ restart: | |||
| 893 | 1045 | ||
| 894 | bail: | 1046 | bail: |
| 895 | mutex_lock(&osb->recovery_lock); | 1047 | mutex_lock(&osb->recovery_lock); |
| 896 | if (!status && | 1048 | if (!status && !ocfs2_recovery_completed(osb)) { |
| 897 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | ||
| 898 | mutex_unlock(&osb->recovery_lock); | 1049 | mutex_unlock(&osb->recovery_lock); |
| 899 | goto restart; | 1050 | goto restart; |
| 900 | } | 1051 | } |
| @@ -924,8 +1075,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | |||
| 924 | 1075 | ||
| 925 | /* People waiting on recovery will wait on | 1076 | /* People waiting on recovery will wait on |
| 926 | * the recovery map to empty. */ | 1077 | * the recovery map to empty. */ |
| 927 | if (!ocfs2_recovery_map_set(osb, node_num)) | 1078 | if (ocfs2_recovery_map_set(osb, node_num)) |
| 928 | mlog(0, "node %d already be in recovery.\n", node_num); | 1079 | mlog(0, "node %d already in recovery map.\n", node_num); |
| 929 | 1080 | ||
| 930 | mlog(0, "starting recovery thread...\n"); | 1081 | mlog(0, "starting recovery thread...\n"); |
| 931 | 1082 | ||
| @@ -1197,7 +1348,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
| 1197 | if (status == -ENOENT) | 1348 | if (status == -ENOENT) |
| 1198 | continue; | 1349 | continue; |
| 1199 | 1350 | ||
| 1200 | if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) | 1351 | if (__ocfs2_recovery_map_test(osb, node_num)) |
| 1201 | continue; | 1352 | continue; |
| 1202 | spin_unlock(&osb->osb_lock); | 1353 | spin_unlock(&osb->osb_lock); |
| 1203 | 1354 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 220f3e818e7..db82be2532e 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, | |||
| 134 | 134 | ||
| 135 | /* Exported only for the journal struct init code in super.c. Do not call. */ | 135 | /* Exported only for the journal struct init code in super.c. Do not call. */ |
| 136 | void ocfs2_complete_recovery(struct work_struct *work); | 136 | void ocfs2_complete_recovery(struct work_struct *work); |
| 137 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb); | ||
| 138 | |||
| 139 | int ocfs2_recovery_init(struct ocfs2_super *osb); | ||
| 140 | void ocfs2_recovery_exit(struct ocfs2_super *osb); | ||
| 137 | 141 | ||
| 138 | /* | 142 | /* |
| 139 | * Journal Control: | 143 | * Journal Control: |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index ee3f675a421..c6ed8c35de0 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -180,6 +180,7 @@ enum ocfs2_mount_options | |||
| 180 | 180 | ||
| 181 | struct ocfs2_journal; | 181 | struct ocfs2_journal; |
| 182 | struct ocfs2_slot_info; | 182 | struct ocfs2_slot_info; |
| 183 | struct ocfs2_recovery_map; | ||
| 183 | struct ocfs2_super | 184 | struct ocfs2_super |
| 184 | { | 185 | { |
| 185 | struct task_struct *commit_task; | 186 | struct task_struct *commit_task; |
| @@ -191,7 +192,6 @@ struct ocfs2_super | |||
| 191 | struct ocfs2_slot_info *slot_info; | 192 | struct ocfs2_slot_info *slot_info; |
| 192 | 193 | ||
| 193 | spinlock_t node_map_lock; | 194 | spinlock_t node_map_lock; |
| 194 | struct ocfs2_node_map recovery_map; | ||
| 195 | 195 | ||
| 196 | u64 root_blkno; | 196 | u64 root_blkno; |
| 197 | u64 system_dir_blkno; | 197 | u64 system_dir_blkno; |
| @@ -226,6 +226,7 @@ struct ocfs2_super | |||
| 226 | 226 | ||
| 227 | atomic_t vol_state; | 227 | atomic_t vol_state; |
| 228 | struct mutex recovery_lock; | 228 | struct mutex recovery_lock; |
| 229 | struct ocfs2_recovery_map *recovery_map; | ||
| 229 | struct task_struct *recovery_thread_task; | 230 | struct task_struct *recovery_thread_task; |
| 230 | int disable_recovery; | 231 | int disable_recovery; |
| 231 | wait_queue_head_t checkpoint_event; | 232 | wait_queue_head_t checkpoint_event; |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fad37af2af9..1a4c7c7850f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1224,15 +1224,6 @@ leave: | |||
| 1224 | return status; | 1224 | return status; |
| 1225 | } | 1225 | } |
| 1226 | 1226 | ||
| 1227 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
| 1228 | * memory barriers to make sure that we'll see the null task before | ||
| 1229 | * being woken up */ | ||
| 1230 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
| 1231 | { | ||
| 1232 | mb(); | ||
| 1233 | return osb->recovery_thread_task != NULL; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | 1227 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) |
| 1237 | { | 1228 | { |
| 1238 | int tmp; | 1229 | int tmp; |
| @@ -1249,17 +1240,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1249 | 1240 | ||
| 1250 | ocfs2_truncate_log_shutdown(osb); | 1241 | ocfs2_truncate_log_shutdown(osb); |
| 1251 | 1242 | ||
| 1252 | /* disable any new recovery threads and wait for any currently | 1243 | /* This will disable recovery and flush any recovery work. */ |
| 1253 | * running ones to exit. Do this before setting the vol_state. */ | 1244 | ocfs2_recovery_exit(osb); |
| 1254 | mutex_lock(&osb->recovery_lock); | ||
| 1255 | osb->disable_recovery = 1; | ||
| 1256 | mutex_unlock(&osb->recovery_lock); | ||
| 1257 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
| 1258 | |||
| 1259 | /* At this point, we know that no more recovery threads can be | ||
| 1260 | * launched, so wait for any recovery completion work to | ||
| 1261 | * complete. */ | ||
| 1262 | flush_workqueue(ocfs2_wq); | ||
| 1263 | 1245 | ||
| 1264 | ocfs2_journal_shutdown(osb); | 1246 | ocfs2_journal_shutdown(osb); |
| 1265 | 1247 | ||
| @@ -1368,7 +1350,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1368 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1350 | osb->s_sectsize_bits = blksize_bits(sector_size); |
| 1369 | BUG_ON(!osb->s_sectsize_bits); | 1351 | BUG_ON(!osb->s_sectsize_bits); |
| 1370 | 1352 | ||
| 1371 | init_waitqueue_head(&osb->recovery_event); | ||
| 1372 | spin_lock_init(&osb->dc_task_lock); | 1353 | spin_lock_init(&osb->dc_task_lock); |
| 1373 | init_waitqueue_head(&osb->dc_event); | 1354 | init_waitqueue_head(&osb->dc_event); |
| 1374 | osb->dc_work_sequence = 0; | 1355 | osb->dc_work_sequence = 0; |
| @@ -1388,10 +1369,12 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1388 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 1369 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
| 1389 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1370 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
| 1390 | 1371 | ||
| 1391 | mutex_init(&osb->recovery_lock); | 1372 | status = ocfs2_recovery_init(osb); |
| 1392 | 1373 | if (status) { | |
| 1393 | osb->disable_recovery = 0; | 1374 | mlog(ML_ERROR, "Unable to initialize recovery state\n"); |
| 1394 | osb->recovery_thread_task = NULL; | 1375 | mlog_errno(status); |
| 1376 | goto bail; | ||
| 1377 | } | ||
| 1395 | 1378 | ||
| 1396 | init_waitqueue_head(&osb->checkpoint_event); | 1379 | init_waitqueue_head(&osb->checkpoint_event); |
| 1397 | atomic_set(&osb->needs_checkpoint, 0); | 1380 | atomic_set(&osb->needs_checkpoint, 0); |
