diff options
| author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2014-06-07 01:45:22 -0400 | 
|---|---|---|
| committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-02-23 10:59:05 -0500 | 
| commit | e94987db2ed983aea4e45d22db9e17c6bbf2a623 (patch) | |
| tree | d216701edf17d2f1b6781be78da780a8c9cf6c18 /drivers/md/md-cluster.c | |
| parent | 11dd35daaab86d12270d23a10e8d242846a8830a (diff) | |
Initiate recovery on node failure
The DLM informs us in case of node failure with the DLM slot number.
cluster_info->recovery_map sets the bit corresponding to the slot number
and wakes up the recovery thread.
The recovery thread:
1. Derives the slot number from the recovery_map
2. Locks the bitmap corresponding to the slot
3. Copies the set bits to the node-local bitmap
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Diffstat (limited to 'drivers/md/md-cluster.c')
| -rw-r--r-- | drivers/md/md-cluster.c | 58 | 
1 files changed, 58 insertions, 0 deletions
| diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index b59c3a0ebd08..1f82d0d731ae 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/dlm.h> | 13 | #include <linux/dlm.h> | 
| 14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> | 
| 15 | #include "md.h" | 15 | #include "md.h" | 
| 16 | #include "bitmap.h" | ||
| 16 | #include "md-cluster.h" | 17 | #include "md-cluster.h" | 
| 17 | 18 | ||
| 18 | #define LVB_SIZE 64 | 19 | #define LVB_SIZE 64 | 
| @@ -49,6 +50,8 @@ struct md_cluster_info { | |||
| 49 | struct dlm_lock_resource *bitmap_lockres; | 50 | struct dlm_lock_resource *bitmap_lockres; | 
| 50 | struct list_head suspend_list; | 51 | struct list_head suspend_list; | 
| 51 | spinlock_t suspend_lock; | 52 | spinlock_t suspend_lock; | 
| 53 | struct md_thread *recovery_thread; | ||
| 54 | unsigned long recovery_map; | ||
| 52 | }; | 55 | }; | 
| 53 | 56 | ||
| 54 | static void sync_ast(void *arg) | 57 | static void sync_ast(void *arg) | 
| @@ -184,6 +187,50 @@ out: | |||
| 184 | return s; | 187 | return s; | 
| 185 | } | 188 | } | 
| 186 | 189 | ||
| 190 | void recover_bitmaps(struct md_thread *thread) | ||
| 191 | { | ||
| 192 | struct mddev *mddev = thread->mddev; | ||
| 193 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
| 194 | struct dlm_lock_resource *bm_lockres; | ||
| 195 | char str[64]; | ||
| 196 | int slot, ret; | ||
| 197 | struct suspend_info *s, *tmp; | ||
| 198 | sector_t lo, hi; | ||
| 199 | |||
| 200 | while (cinfo->recovery_map) { | ||
| 201 | slot = fls64((u64)cinfo->recovery_map) - 1; | ||
| 202 | |||
| 203 | /* Clear suspend_area associated with the bitmap */ | ||
| 204 | spin_lock_irq(&cinfo->suspend_lock); | ||
| 205 | list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list) | ||
| 206 | if (slot == s->slot) { | ||
| 207 | list_del(&s->list); | ||
| 208 | kfree(s); | ||
| 209 | } | ||
| 210 | spin_unlock_irq(&cinfo->suspend_lock); | ||
| 211 | |||
| 212 | snprintf(str, 64, "bitmap%04d", slot); | ||
| 213 | bm_lockres = lockres_init(mddev, str, NULL, 1); | ||
| 214 | if (!bm_lockres) { | ||
| 215 | pr_err("md-cluster: Cannot initialize bitmaps\n"); | ||
| 216 | goto clear_bit; | ||
| 217 | } | ||
| 218 | |||
| 219 | ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); | ||
| 220 | if (ret) { | ||
| 221 | pr_err("md-cluster: Could not DLM lock %s: %d\n", | ||
| 222 | str, ret); | ||
| 223 | goto clear_bit; | ||
| 224 | } | ||
| 225 | ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi); | ||
| 226 | if (ret) | ||
| 227 | pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); | ||
| 228 | dlm_unlock_sync(bm_lockres); | ||
| 229 | clear_bit: | ||
| 230 | clear_bit(slot, &cinfo->recovery_map); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 187 | static void recover_prep(void *arg) | 234 | static void recover_prep(void *arg) | 
| 188 | { | 235 | { | 
| 189 | } | 236 | } | 
| @@ -197,6 +244,16 @@ static void recover_slot(void *arg, struct dlm_slot *slot) | |||
| 197 | mddev->bitmap_info.cluster_name, | 244 | mddev->bitmap_info.cluster_name, | 
| 198 | slot->nodeid, slot->slot, | 245 | slot->nodeid, slot->slot, | 
| 199 | cinfo->slot_number); | 246 | cinfo->slot_number); | 
| 247 | set_bit(slot->slot - 1, &cinfo->recovery_map); | ||
| 248 | if (!cinfo->recovery_thread) { | ||
| 249 | cinfo->recovery_thread = md_register_thread(recover_bitmaps, | ||
| 250 | mddev, "recover"); | ||
| 251 | if (!cinfo->recovery_thread) { | ||
| 252 | pr_warn("md-cluster: Could not create recovery thread\n"); | ||
| 253 | return; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | md_wakeup_thread(cinfo->recovery_thread); | ||
| 200 | } | 257 | } | 
| 201 | 258 | ||
| 202 | static void recover_done(void *arg, struct dlm_slot *slots, | 259 | static void recover_done(void *arg, struct dlm_slot *slots, | 
| @@ -338,6 +395,7 @@ static int leave(struct mddev *mddev) | |||
| 338 | 395 | ||
| 339 | if (!cinfo) | 396 | if (!cinfo) | 
| 340 | return 0; | 397 | return 0; | 
| 398 | md_unregister_thread(&cinfo->recovery_thread); | ||
| 341 | lockres_free(cinfo->sb_lock); | 399 | lockres_free(cinfo->sb_lock); | 
| 342 | lockres_free(cinfo->bitmap_lockres); | 400 | lockres_free(cinfo->bitmap_lockres); | 
| 343 | dlm_release_lockspace(cinfo->lockspace, 2); | 401 | dlm_release_lockspace(cinfo->lockspace, 2); | 
