diff options
author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2014-06-07 01:45:22 -0400 |
---|---|---|
committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-02-23 10:59:05 -0500 |
commit | e94987db2ed983aea4e45d22db9e17c6bbf2a623 (patch) | |
tree | d216701edf17d2f1b6781be78da780a8c9cf6c18 /drivers/md | |
parent | 11dd35daaab86d12270d23a10e8d242846a8830a (diff) |
Initiate recovery on node failure
The DLM informs us in case of node failure with the DLM slot number.
cluster_info->recovery_map sets the bit corresponding to the slot number
and wakes up the recovery thread.
The recovery thread:
1. Derives the slot number from the recovery_map
2. Locks the bitmap corresponding to the slot
3. Copies the set bits to the node-local bitmap
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md-cluster.c | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index b59c3a0ebd08..1f82d0d731ae 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/dlm.h> | 13 | #include <linux/dlm.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include "md.h" | 15 | #include "md.h" |
16 | #include "bitmap.h" | ||
16 | #include "md-cluster.h" | 17 | #include "md-cluster.h" |
17 | 18 | ||
18 | #define LVB_SIZE 64 | 19 | #define LVB_SIZE 64 |
@@ -49,6 +50,8 @@ struct md_cluster_info { | |||
49 | struct dlm_lock_resource *bitmap_lockres; | 50 | struct dlm_lock_resource *bitmap_lockres; |
50 | struct list_head suspend_list; | 51 | struct list_head suspend_list; |
51 | spinlock_t suspend_lock; | 52 | spinlock_t suspend_lock; |
53 | struct md_thread *recovery_thread; | ||
54 | unsigned long recovery_map; | ||
52 | }; | 55 | }; |
53 | 56 | ||
54 | static void sync_ast(void *arg) | 57 | static void sync_ast(void *arg) |
@@ -184,6 +187,50 @@ out: | |||
184 | return s; | 187 | return s; |
185 | } | 188 | } |
186 | 189 | ||
190 | void recover_bitmaps(struct md_thread *thread) | ||
191 | { | ||
192 | struct mddev *mddev = thread->mddev; | ||
193 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
194 | struct dlm_lock_resource *bm_lockres; | ||
195 | char str[64]; | ||
196 | int slot, ret; | ||
197 | struct suspend_info *s, *tmp; | ||
198 | sector_t lo, hi; | ||
199 | |||
200 | while (cinfo->recovery_map) { | ||
201 | slot = fls64((u64)cinfo->recovery_map) - 1; | ||
202 | |||
203 | /* Clear suspend_area associated with the bitmap */ | ||
204 | spin_lock_irq(&cinfo->suspend_lock); | ||
205 | list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) | ||
206 | if (slot == s->slot) { | ||
207 | list_del(&s->list); | ||
208 | kfree(s); | ||
209 | } | ||
210 | spin_unlock_irq(&cinfo->suspend_lock); | ||
211 | |||
212 | snprintf(str, 64, "bitmap%04d", slot); | ||
213 | bm_lockres = lockres_init(mddev, str, NULL, 1); | ||
214 | if (!bm_lockres) { | ||
215 | pr_err("md-cluster: Cannot initialize bitmaps\n"); | ||
216 | goto clear_bit; | ||
217 | } | ||
218 | |||
219 | ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); | ||
220 | if (ret) { | ||
221 | pr_err("md-cluster: Could not DLM lock %s: %d\n", | ||
222 | str, ret); | ||
223 | goto clear_bit; | ||
224 | } | ||
225 | ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi); | ||
226 | if (ret) | ||
227 | pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); | ||
228 | dlm_unlock_sync(bm_lockres); | ||
229 | clear_bit: | ||
230 | clear_bit(slot, &cinfo->recovery_map); | ||
231 | } | ||
232 | } | ||
233 | |||
187 | static void recover_prep(void *arg) | 234 | static void recover_prep(void *arg) |
188 | { | 235 | { |
189 | } | 236 | } |
@@ -197,6 +244,16 @@ static void recover_slot(void *arg, struct dlm_slot *slot) | |||
197 | mddev->bitmap_info.cluster_name, | 244 | mddev->bitmap_info.cluster_name, |
198 | slot->nodeid, slot->slot, | 245 | slot->nodeid, slot->slot, |
199 | cinfo->slot_number); | 246 | cinfo->slot_number); |
247 | set_bit(slot->slot - 1, &cinfo->recovery_map); | ||
248 | if (!cinfo->recovery_thread) { | ||
249 | cinfo->recovery_thread = md_register_thread(recover_bitmaps, | ||
250 | mddev, "recover"); | ||
251 | if (!cinfo->recovery_thread) { | ||
252 | pr_warn("md-cluster: Could not create recovery thread\n"); | ||
253 | return; | ||
254 | } | ||
255 | } | ||
256 | md_wakeup_thread(cinfo->recovery_thread); | ||
200 | } | 257 | } |
201 | 258 | ||
202 | static void recover_done(void *arg, struct dlm_slot *slots, | 259 | static void recover_done(void *arg, struct dlm_slot *slots, |
@@ -338,6 +395,7 @@ static int leave(struct mddev *mddev) | |||
338 | 395 | ||
339 | if (!cinfo) | 396 | if (!cinfo) |
340 | return 0; | 397 | return 0; |
398 | md_unregister_thread(&cinfo->recovery_thread); | ||
341 | lockres_free(cinfo->sb_lock); | 399 | lockres_free(cinfo->sb_lock); |
342 | lockres_free(cinfo->bitmap_lockres); | 400 | lockres_free(cinfo->bitmap_lockres); |
343 | dlm_release_lockspace(cinfo->lockspace, 2); | 401 | dlm_release_lockspace(cinfo->lockspace, 2); |