aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorGoldwyn Rodrigues <rgoldwyn@suse.com>2014-06-07 01:45:22 -0400
committerGoldwyn Rodrigues <rgoldwyn@suse.com>2015-02-23 10:59:05 -0500
commite94987db2ed983aea4e45d22db9e17c6bbf2a623 (patch)
treed216701edf17d2f1b6781be78da780a8c9cf6c18 /drivers/md
parent11dd35daaab86d12270d23a10e8d242846a8830a (diff)
Initiate recovery on node failure
The DLM informs us in case of node failure with the DLM slot number. cluster_info->recovery_map sets the bit corresponding to the slot number and wakes up the recovery thread. The recovery thread: 1. Derives the slot number from the recovery_map 2. Locks the bitmap corresponding to the slot 3. Copies the set bits to the node-local bitmap Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md-cluster.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index b59c3a0ebd08..1f82d0d731ae 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -13,6 +13,7 @@
13#include <linux/dlm.h> 13#include <linux/dlm.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include "md.h" 15#include "md.h"
16#include "bitmap.h"
16#include "md-cluster.h" 17#include "md-cluster.h"
17 18
18#define LVB_SIZE 64 19#define LVB_SIZE 64
@@ -49,6 +50,8 @@ struct md_cluster_info {
49 struct dlm_lock_resource *bitmap_lockres; 50 struct dlm_lock_resource *bitmap_lockres;
50 struct list_head suspend_list; 51 struct list_head suspend_list;
51 spinlock_t suspend_lock; 52 spinlock_t suspend_lock;
53 struct md_thread *recovery_thread;
54 unsigned long recovery_map;
52}; 55};
53 56
54static void sync_ast(void *arg) 57static void sync_ast(void *arg)
@@ -184,6 +187,50 @@ out:
184 return s; 187 return s;
185} 188}
186 189
190void recover_bitmaps(struct md_thread *thread)
191{
192 struct mddev *mddev = thread->mddev;
193 struct md_cluster_info *cinfo = mddev->cluster_info;
194 struct dlm_lock_resource *bm_lockres;
195 char str[64];
196 int slot, ret;
197 struct suspend_info *s, *tmp;
198 sector_t lo, hi;
199
200 while (cinfo->recovery_map) {
201 slot = fls64((u64)cinfo->recovery_map) - 1;
202
203 /* Clear suspend_area associated with the bitmap */
204 spin_lock_irq(&cinfo->suspend_lock);
205 list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
206 if (slot == s->slot) {
207 list_del(&s->list);
208 kfree(s);
209 }
210 spin_unlock_irq(&cinfo->suspend_lock);
211
212 snprintf(str, 64, "bitmap%04d", slot);
213 bm_lockres = lockres_init(mddev, str, NULL, 1);
214 if (!bm_lockres) {
215 pr_err("md-cluster: Cannot initialize bitmaps\n");
216 goto clear_bit;
217 }
218
219 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
220 if (ret) {
221 pr_err("md-cluster: Could not DLM lock %s: %d\n",
222 str, ret);
223 goto clear_bit;
224 }
225 ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi);
226 if (ret)
227 pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
228 dlm_unlock_sync(bm_lockres);
229clear_bit:
230 clear_bit(slot, &cinfo->recovery_map);
231 }
232}
233
187static void recover_prep(void *arg) 234static void recover_prep(void *arg)
188{ 235{
189} 236}
@@ -197,6 +244,16 @@ static void recover_slot(void *arg, struct dlm_slot *slot)
197 mddev->bitmap_info.cluster_name, 244 mddev->bitmap_info.cluster_name,
198 slot->nodeid, slot->slot, 245 slot->nodeid, slot->slot,
199 cinfo->slot_number); 246 cinfo->slot_number);
247 set_bit(slot->slot - 1, &cinfo->recovery_map);
248 if (!cinfo->recovery_thread) {
249 cinfo->recovery_thread = md_register_thread(recover_bitmaps,
250 mddev, "recover");
251 if (!cinfo->recovery_thread) {
252 pr_warn("md-cluster: Could not create recovery thread\n");
253 return;
254 }
255 }
256 md_wakeup_thread(cinfo->recovery_thread);
200} 257}
201 258
202static void recover_done(void *arg, struct dlm_slot *slots, 259static void recover_done(void *arg, struct dlm_slot *slots,
@@ -338,6 +395,7 @@ static int leave(struct mddev *mddev)
338 395
339 if (!cinfo) 396 if (!cinfo)
340 return 0; 397 return 0;
398 md_unregister_thread(&cinfo->recovery_thread);
341 lockres_free(cinfo->sb_lock); 399 lockres_free(cinfo->sb_lock);
342 lockres_free(cinfo->bitmap_lockres); 400 lockres_free(cinfo->bitmap_lockres);
343 dlm_release_lockspace(cinfo->lockspace, 2); 401 dlm_release_lockspace(cinfo->lockspace, 2);