aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGoldwyn Rodrigues <rgoldwyn@suse.com>2014-06-06 13:35:34 -0400
committerGoldwyn Rodrigues <rgoldwyn@suse.com>2015-02-23 08:30:11 -0500
commit96ae923ab659e37dd5fc1e05ecbf654e2f94bcbe (patch)
tree9a8868524e7dacb26caec999dd2a65884b22893d
parent54519c5f4b398bcfe599f652b4ef4004d5fa63ff (diff)
Gather on-going resync information of other nodes
When a node joins, it does not know of other nodes performing resync. So, each node keeps the resync information in it's LVB. When a new node joins, it reads the LVB of each "online" bitmap. [TODO] The new node attempts to get the PW lock on other bitmap, if it is successful, it reads the bitmap and performs the resync (if required) on it's behalf. If the node does not get the PW, it requests CR and reads the LVB for the resync information. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
-rw-r--r--drivers/md/md-cluster.c111
-rw-r--r--drivers/md/md-cluster.h1
-rw-r--r--drivers/md/md.c8
3 files changed, 120 insertions, 0 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 75c6602f4c75..b59c3a0ebd08 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -27,6 +27,18 @@ struct dlm_lock_resource {
27 struct mddev *mddev; /* pointing back to mddev. */ 27 struct mddev *mddev; /* pointing back to mddev. */
28}; 28};
29 29
30struct suspend_info {
31 int slot;
32 sector_t lo;
33 sector_t hi;
34 struct list_head list;
35};
36
37struct resync_info {
38 __le64 lo;
39 __le64 hi;
40};
41
30struct md_cluster_info { 42struct md_cluster_info {
31 /* dlm lock space and resources for clustered raid. */ 43 /* dlm lock space and resources for clustered raid. */
32 dlm_lockspace_t *lockspace; 44 dlm_lockspace_t *lockspace;
@@ -35,6 +47,8 @@ struct md_cluster_info {
35 struct dlm_lock_resource *sb_lock; 47 struct dlm_lock_resource *sb_lock;
36 struct mutex sb_mutex; 48 struct mutex sb_mutex;
37 struct dlm_lock_resource *bitmap_lockres; 49 struct dlm_lock_resource *bitmap_lockres;
50 struct list_head suspend_list;
51 spinlock_t suspend_lock;
38}; 52};
39 53
40static void sync_ast(void *arg) 54static void sync_ast(void *arg)
@@ -139,6 +153,37 @@ static char *pretty_uuid(char *dest, char *src)
139 return dest; 153 return dest;
140} 154}
141 155
156static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
157 sector_t lo, sector_t hi)
158{
159 struct resync_info *ri;
160
161 ri = (struct resync_info *)lockres->lksb.sb_lvbptr;
162 ri->lo = cpu_to_le64(lo);
163 ri->hi = cpu_to_le64(hi);
164}
165
166static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
167{
168 struct resync_info ri;
169 struct suspend_info *s = NULL;
170 sector_t hi = 0;
171
172 dlm_lock_sync(lockres, DLM_LOCK_CR);
173 memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
174 hi = le64_to_cpu(ri.hi);
175 if (ri.hi > 0) {
176 s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
177 if (!s)
178 goto out;
179 s->hi = hi;
180 s->lo = le64_to_cpu(ri.lo);
181 }
182 dlm_unlock_sync(lockres);
183out:
184 return s;
185}
186
142static void recover_prep(void *arg) 187static void recover_prep(void *arg)
143{ 188{
144} 189}
@@ -171,6 +216,53 @@ static const struct dlm_lockspace_ops md_ls_ops = {
171 .recover_done = recover_done, 216 .recover_done = recover_done,
172}; 217};
173 218
219static int gather_all_resync_info(struct mddev *mddev, int total_slots)
220{
221 struct md_cluster_info *cinfo = mddev->cluster_info;
222 int i, ret = 0;
223 struct dlm_lock_resource *bm_lockres;
224 struct suspend_info *s;
225 char str[64];
226
227
228 for (i = 0; i < total_slots; i++) {
229 memset(str, '\0', 64);
230 snprintf(str, 64, "bitmap%04d", i);
231 bm_lockres = lockres_init(mddev, str, NULL, 1);
232 if (!bm_lockres)
233 return -ENOMEM;
234 if (i == (cinfo->slot_number - 1))
235 continue;
236
237 bm_lockres->flags |= DLM_LKF_NOQUEUE;
238 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
239 if (ret == -EAGAIN) {
240 memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
241 s = read_resync_info(mddev, bm_lockres);
242 if (s) {
243 pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
244 __func__, __LINE__,
245 (unsigned long long) s->lo,
246 (unsigned long long) s->hi, i);
247 spin_lock_irq(&cinfo->suspend_lock);
248 s->slot = i;
249 list_add(&s->list, &cinfo->suspend_list);
250 spin_unlock_irq(&cinfo->suspend_lock);
251 }
252 ret = 0;
253 lockres_free(bm_lockres);
254 continue;
255 }
256 if (ret)
257 goto out;
258 /* TODO: Read the disk bitmap sb and check if it needs recovery */
259 dlm_unlock_sync(bm_lockres);
260 lockres_free(bm_lockres);
261 }
262out:
263 return ret;
264}
265
174static int join(struct mddev *mddev, int nodes) 266static int join(struct mddev *mddev, int nodes)
175{ 267{
176 struct md_cluster_info *cinfo; 268 struct md_cluster_info *cinfo;
@@ -221,8 +313,17 @@ static int join(struct mddev *mddev, int nodes)
221 goto err; 313 goto err;
222 } 314 }
223 315
316 INIT_LIST_HEAD(&cinfo->suspend_list);
317 spin_lock_init(&cinfo->suspend_lock);
318
319 ret = gather_all_resync_info(mddev, nodes);
320 if (ret)
321 goto err;
322
224 return 0; 323 return 0;
225err: 324err:
325 lockres_free(cinfo->bitmap_lockres);
326 lockres_free(cinfo->sb_lock);
226 if (cinfo->lockspace) 327 if (cinfo->lockspace)
227 dlm_release_lockspace(cinfo->lockspace, 2); 328 dlm_release_lockspace(cinfo->lockspace, 2);
228 mddev->cluster_info = NULL; 329 mddev->cluster_info = NULL;
@@ -254,10 +355,20 @@ static int slot_number(struct mddev *mddev)
254 return cinfo->slot_number - 1; 355 return cinfo->slot_number - 1;
255} 356}
256 357
358static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
359{
360 struct md_cluster_info *cinfo = mddev->cluster_info;
361
362 add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
363 /* Re-acquire the lock to refresh LVB */
364 dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
365}
366
257static struct md_cluster_operations cluster_ops = { 367static struct md_cluster_operations cluster_ops = {
258 .join = join, 368 .join = join,
259 .leave = leave, 369 .leave = leave,
260 .slot_number = slot_number, 370 .slot_number = slot_number,
371 .resync_info_update = resync_info_update,
261}; 372};
262 373
263static int __init cluster_init(void) 374static int __init cluster_init(void)
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 52a21e0d6dbc..51a24df15b64 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -11,6 +11,7 @@ struct md_cluster_operations {
11 int (*join)(struct mddev *mddev, int nodes); 11 int (*join)(struct mddev *mddev, int nodes);
12 int (*leave)(struct mddev *mddev); 12 int (*leave)(struct mddev *mddev);
13 int (*slot_number)(struct mddev *mddev); 13 int (*slot_number)(struct mddev *mddev);
14 void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
14}; 15};
15 16
16#endif /* _MD_CLUSTER_H */ 17#endif /* _MD_CLUSTER_H */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8f310d98f082..71f655015385 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7626,6 +7626,9 @@ void md_do_sync(struct md_thread *thread)
7626 md_new_event(mddev); 7626 md_new_event(mddev);
7627 update_time = jiffies; 7627 update_time = jiffies;
7628 7628
7629 if (mddev_is_clustered(mddev))
7630 md_cluster_ops->resync_info_update(mddev, j, max_sectors);
7631
7629 blk_start_plug(&plug); 7632 blk_start_plug(&plug);
7630 while (j < max_sectors) { 7633 while (j < max_sectors) {
7631 sector_t sectors; 7634 sector_t sectors;
@@ -7686,6 +7689,8 @@ void md_do_sync(struct md_thread *thread)
7686 j += sectors; 7689 j += sectors;
7687 if (j > 2) 7690 if (j > 2)
7688 mddev->curr_resync = j; 7691 mddev->curr_resync = j;
7692 if (mddev_is_clustered(mddev))
7693 md_cluster_ops->resync_info_update(mddev, j, max_sectors);
7689 mddev->curr_mark_cnt = io_sectors; 7694 mddev->curr_mark_cnt = io_sectors;
7690 if (last_check == 0) 7695 if (last_check == 0)
7691 /* this is the earliest that rebuild will be 7696 /* this is the earliest that rebuild will be
@@ -7746,6 +7751,9 @@ void md_do_sync(struct md_thread *thread)
7746 /* tell personality that we are finished */ 7751 /* tell personality that we are finished */
7747 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); 7752 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
7748 7753
7754 if (mddev_is_clustered(mddev))
7755 md_cluster_ops->resync_info_update(mddev, 0, 0);
7756
7749 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && 7757 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
7750 mddev->curr_resync > 2) { 7758 mddev->curr_resync > 2) {
7751 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 7759 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {