aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorDongmao Zhang <dmzhang@suse.com>2014-01-15 16:44:37 -0500
committerMike Snitzer <snitzer@redhat.com>2014-01-21 23:46:27 -0500
commit5066a4df1f427faac8372d20494483bb09a4a1cd (patch)
tree8003d4e33188a862bd9bf8ea6a06f4457084f5bf /drivers/md
parentfca028438fb903852beaf7c3fe1cd326651af57d (diff)
dm log userspace: allow mark requests to piggyback on flush requests
In the cluster evironment, cluster write has poor performance because userspace_flush() has to contact a userspace program (cmirrord) for clear/mark/flush requests. But both mark and flush requests require cmirrord to communicate the message to all the cluster nodes for each flush call. This behaviour is really slow. To address this we now merge mark and flush requests together to reduce the kernel-userspace-kernel time. We allow a new directive, "integrated_flush" that can be used to instruct the kernel log code to combine flush and mark requests when directed by userspace. If not directed by userspace (due to an older version of the userspace code perhaps), the kernel will function as it did previously - preserving backwards compatibility. Additionally, flush requests are performed lazily when only clear requests exist. Signed-off-by: Dongmao Zhang <dmzhang@suse.com> Signed-off-by: Jonathan Brassow <jbrassow@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-log-userspace-base.c206
1 files changed, 159 insertions, 47 deletions
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 9429159d9ee3..b953db6cc229 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -10,10 +10,11 @@
10#include <linux/device-mapper.h> 10#include <linux/device-mapper.h>
11#include <linux/dm-log-userspace.h> 11#include <linux/dm-log-userspace.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/workqueue.h>
13 14
14#include "dm-log-userspace-transfer.h" 15#include "dm-log-userspace-transfer.h"
15 16
16#define DM_LOG_USERSPACE_VSN "1.1.0" 17#define DM_LOG_USERSPACE_VSN "1.3.0"
17 18
18struct flush_entry { 19struct flush_entry {
19 int type; 20 int type;
@@ -58,6 +59,18 @@ struct log_c {
58 spinlock_t flush_lock; 59 spinlock_t flush_lock;
59 struct list_head mark_list; 60 struct list_head mark_list;
60 struct list_head clear_list; 61 struct list_head clear_list;
62
63 /*
64 * Workqueue for flush of clear region requests.
65 */
66 struct workqueue_struct *dmlog_wq;
67 struct delayed_work flush_log_work;
68 atomic_t sched_flush;
69
70 /*
71 * Combine userspace flush and mark requests for efficiency.
72 */
73 uint32_t integrated_flush;
61}; 74};
62 75
63static mempool_t *flush_entry_pool; 76static mempool_t *flush_entry_pool;
@@ -122,6 +135,9 @@ static int build_constructor_string(struct dm_target *ti,
122 135
123 *ctr_str = NULL; 136 *ctr_str = NULL;
124 137
138 /*
139 * Determine overall size of the string.
140 */
125 for (i = 0, str_size = 0; i < argc; i++) 141 for (i = 0, str_size = 0; i < argc; i++)
126 str_size += strlen(argv[i]) + 1; /* +1 for space between args */ 142 str_size += strlen(argv[i]) + 1; /* +1 for space between args */
127 143
@@ -141,18 +157,39 @@ static int build_constructor_string(struct dm_target *ti,
141 return str_size; 157 return str_size;
142} 158}
143 159
160static void do_flush(struct work_struct *work)
161{
162 int r;
163 struct log_c *lc = container_of(work, struct log_c, flush_log_work.work);
164
165 atomic_set(&lc->sched_flush, 0);
166
167 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL);
168
169 if (r)
170 dm_table_event(lc->ti->table);
171}
172
144/* 173/*
145 * userspace_ctr 174 * userspace_ctr
146 * 175 *
147 * argv contains: 176 * argv contains:
148 * <UUID> <other args> 177 * <UUID> [integrated_flush] <other args>
149 * Where 'other args' is the userspace implementation specific log 178 * Where 'other args' are the userspace implementation-specific log
150 * arguments. An example might be: 179 * arguments.
151 * <UUID> clustered-disk <arg count> <log dev> <region_size> [[no]sync] 180 *
181 * Example:
182 * <UUID> [integrated_flush] clustered-disk <arg count> <log dev>
183 * <region_size> [[no]sync]
184 *
185 * This module strips off the <UUID> and uses it for identification
186 * purposes when communicating with userspace about a log.
152 * 187 *
153 * So, this module will strip off the <UUID> for identification purposes 188 * If integrated_flush is defined, the kernel combines flush
154 * when communicating with userspace about a log; but will pass on everything 189 * and mark requests.
155 * else. 190 *
191 * The rest of the line, beginning with 'clustered-disk', is passed
192 * to the userspace ctr function.
156 */ 193 */
157static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, 194static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
158 unsigned argc, char **argv) 195 unsigned argc, char **argv)
@@ -188,12 +225,22 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
188 return -EINVAL; 225 return -EINVAL;
189 } 226 }
190 227
228 lc->usr_argc = argc;
229
191 strncpy(lc->uuid, argv[0], DM_UUID_LEN); 230 strncpy(lc->uuid, argv[0], DM_UUID_LEN);
231 argc--;
232 argv++;
192 spin_lock_init(&lc->flush_lock); 233 spin_lock_init(&lc->flush_lock);
193 INIT_LIST_HEAD(&lc->mark_list); 234 INIT_LIST_HEAD(&lc->mark_list);
194 INIT_LIST_HEAD(&lc->clear_list); 235 INIT_LIST_HEAD(&lc->clear_list);
195 236
196 str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); 237 if (!strcasecmp(argv[0], "integrated_flush")) {
238 lc->integrated_flush = 1;
239 argc--;
240 argv++;
241 }
242
243 str_size = build_constructor_string(ti, argc, argv, &ctr_str);
197 if (str_size < 0) { 244 if (str_size < 0) {
198 kfree(lc); 245 kfree(lc);
199 return str_size; 246 return str_size;
@@ -246,6 +293,19 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
246 DMERR("Failed to register %s with device-mapper", 293 DMERR("Failed to register %s with device-mapper",
247 devices_rdata); 294 devices_rdata);
248 } 295 }
296
297 if (lc->integrated_flush) {
298 lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0);
299 if (!lc->dmlog_wq) {
300 DMERR("couldn't start dmlogd");
301 r = -ENOMEM;
302 goto out;
303 }
304
305 INIT_DELAYED_WORK(&lc->flush_log_work, do_flush);
306 atomic_set(&lc->sched_flush, 0);
307 }
308
249out: 309out:
250 kfree(devices_rdata); 310 kfree(devices_rdata);
251 if (r) { 311 if (r) {
@@ -253,7 +313,6 @@ out:
253 kfree(ctr_str); 313 kfree(ctr_str);
254 } else { 314 } else {
255 lc->usr_argv_str = ctr_str; 315 lc->usr_argv_str = ctr_str;
256 lc->usr_argc = argc;
257 log->context = lc; 316 log->context = lc;
258 } 317 }
259 318
@@ -264,9 +323,16 @@ static void userspace_dtr(struct dm_dirty_log *log)
264{ 323{
265 struct log_c *lc = log->context; 324 struct log_c *lc = log->context;
266 325
326 if (lc->integrated_flush) {
327 /* flush workqueue */
328 if (atomic_read(&lc->sched_flush))
329 flush_delayed_work(&lc->flush_log_work);
330
331 destroy_workqueue(lc->dmlog_wq);
332 }
333
267 (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, 334 (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
268 NULL, 0, 335 NULL, 0, NULL, NULL);
269 NULL, NULL);
270 336
271 if (lc->log_dev) 337 if (lc->log_dev)
272 dm_put_device(lc->ti, lc->log_dev); 338 dm_put_device(lc->ti, lc->log_dev);
@@ -283,8 +349,7 @@ static int userspace_presuspend(struct dm_dirty_log *log)
283 struct log_c *lc = log->context; 349 struct log_c *lc = log->context;
284 350
285 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, 351 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
286 NULL, 0, 352 NULL, 0, NULL, NULL);
287 NULL, NULL);
288 353
289 return r; 354 return r;
290} 355}
@@ -294,9 +359,14 @@ static int userspace_postsuspend(struct dm_dirty_log *log)
294 int r; 359 int r;
295 struct log_c *lc = log->context; 360 struct log_c *lc = log->context;
296 361
362 /*
363 * Run planned flush earlier.
364 */
365 if (lc->integrated_flush && atomic_read(&lc->sched_flush))
366 flush_delayed_work(&lc->flush_log_work);
367
297 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, 368 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
298 NULL, 0, 369 NULL, 0, NULL, NULL);
299 NULL, NULL);
300 370
301 return r; 371 return r;
302} 372}
@@ -308,8 +378,7 @@ static int userspace_resume(struct dm_dirty_log *log)
308 378
309 lc->in_sync_hint = 0; 379 lc->in_sync_hint = 0;
310 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, 380 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
311 NULL, 0, 381 NULL, 0, NULL, NULL);
312 NULL, NULL);
313 382
314 return r; 383 return r;
315} 384}
@@ -405,7 +474,8 @@ static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
405 return r; 474 return r;
406} 475}
407 476
408static int flush_by_group(struct log_c *lc, struct list_head *flush_list) 477static int flush_by_group(struct log_c *lc, struct list_head *flush_list,
478 int flush_with_payload)
409{ 479{
410 int r = 0; 480 int r = 0;
411 int count; 481 int count;
@@ -431,15 +501,29 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
431 break; 501 break;
432 } 502 }
433 503
434 r = userspace_do_request(lc, lc->uuid, type, 504 if (flush_with_payload) {
435 (char *)(group), 505 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
436 count * sizeof(uint64_t), 506 (char *)(group),
437 NULL, NULL); 507 count * sizeof(uint64_t),
438 if (r) { 508 NULL, NULL);
439 /* Group send failed. Attempt one-by-one. */ 509 /*
440 list_splice_init(&tmp_list, flush_list); 510 * Integrated flush failed.
441 r = flush_one_by_one(lc, flush_list); 511 */
442 break; 512 if (r)
513 break;
514 } else {
515 r = userspace_do_request(lc, lc->uuid, type,
516 (char *)(group),
517 count * sizeof(uint64_t),
518 NULL, NULL);
519 if (r) {
520 /*
521 * Group send failed. Attempt one-by-one.
522 */
523 list_splice_init(&tmp_list, flush_list);
524 r = flush_one_by_one(lc, flush_list);
525 break;
526 }
443 } 527 }
444 } 528 }
445 529
@@ -476,6 +560,8 @@ static int userspace_flush(struct dm_dirty_log *log)
476 struct log_c *lc = log->context; 560 struct log_c *lc = log->context;
477 LIST_HEAD(mark_list); 561 LIST_HEAD(mark_list);
478 LIST_HEAD(clear_list); 562 LIST_HEAD(clear_list);
563 int mark_list_is_empty;
564 int clear_list_is_empty;
479 struct flush_entry *fe, *tmp_fe; 565 struct flush_entry *fe, *tmp_fe;
480 566
481 spin_lock_irqsave(&lc->flush_lock, flags); 567 spin_lock_irqsave(&lc->flush_lock, flags);
@@ -483,23 +569,51 @@ static int userspace_flush(struct dm_dirty_log *log)
483 list_splice_init(&lc->clear_list, &clear_list); 569 list_splice_init(&lc->clear_list, &clear_list);
484 spin_unlock_irqrestore(&lc->flush_lock, flags); 570 spin_unlock_irqrestore(&lc->flush_lock, flags);
485 571
486 if (list_empty(&mark_list) && list_empty(&clear_list)) 572 mark_list_is_empty = list_empty(&mark_list);
573 clear_list_is_empty = list_empty(&clear_list);
574
575 if (mark_list_is_empty && clear_list_is_empty)
487 return 0; 576 return 0;
488 577
489 r = flush_by_group(lc, &mark_list); 578 r = flush_by_group(lc, &clear_list, 0);
490 if (r) 579 if (r)
491 goto fail; 580 goto out;
492 581
493 r = flush_by_group(lc, &clear_list); 582 if (!lc->integrated_flush) {
583 r = flush_by_group(lc, &mark_list, 0);
584 if (r)
585 goto out;
586 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
587 NULL, 0, NULL, NULL);
588 goto out;
589 }
590
591 /*
592 * Send integrated flush request with mark_list as payload.
593 */
594 r = flush_by_group(lc, &mark_list, 1);
494 if (r) 595 if (r)
495 goto fail; 596 goto out;
496 597
497 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, 598 if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) {
498 NULL, 0, NULL, NULL); 599 /*
600 * When there are only clear region requests,
601 * we schedule a flush in the future.
602 */
603 queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ);
604 atomic_set(&lc->sched_flush, 1);
605 } else {
606 /*
607 * Cancel pending flush because we
608 * have already flushed in mark_region.
609 */
610 cancel_delayed_work(&lc->flush_log_work);
611 atomic_set(&lc->sched_flush, 0);
612 }
499 613
500fail: 614out:
501 /* 615 /*
502 * We can safely remove these entries, even if failure. 616 * We can safely remove these entries, even after failure.
503 * Calling code will receive an error and will know that 617 * Calling code will receive an error and will know that
504 * the log facility has failed. 618 * the log facility has failed.
505 */ 619 */
@@ -603,8 +717,7 @@ static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
603 717
604 rdata_size = sizeof(pkg); 718 rdata_size = sizeof(pkg);
605 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, 719 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
606 NULL, 0, 720 NULL, 0, (char *)&pkg, &rdata_size);
607 (char *)&pkg, &rdata_size);
608 721
609 *region = pkg.r; 722 *region = pkg.r;
610 return (r) ? r : (int)pkg.i; 723 return (r) ? r : (int)pkg.i;
@@ -630,8 +743,7 @@ static void userspace_set_region_sync(struct dm_dirty_log *log,
630 pkg.i = (int64_t)in_sync; 743 pkg.i = (int64_t)in_sync;
631 744
632 r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, 745 r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
633 (char *)&pkg, sizeof(pkg), 746 (char *)&pkg, sizeof(pkg), NULL, NULL);
634 NULL, NULL);
635 747
636 /* 748 /*
637 * It would be nice to be able to report failures. 749 * It would be nice to be able to report failures.
@@ -657,8 +769,7 @@ static region_t userspace_get_sync_count(struct dm_dirty_log *log)
657 769
658 rdata_size = sizeof(sync_count); 770 rdata_size = sizeof(sync_count);
659 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, 771 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
660 NULL, 0, 772 NULL, 0, (char *)&sync_count, &rdata_size);
661 (char *)&sync_count, &rdata_size);
662 773
663 if (r) 774 if (r)
664 return 0; 775 return 0;
@@ -685,8 +796,7 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
685 switch (status_type) { 796 switch (status_type) {
686 case STATUSTYPE_INFO: 797 case STATUSTYPE_INFO:
687 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, 798 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
688 NULL, 0, 799 NULL, 0, result, &sz);
689 result, &sz);
690 800
691 if (r) { 801 if (r) {
692 sz = 0; 802 sz = 0;
@@ -699,8 +809,10 @@ static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
699 BUG_ON(!table_args); /* There will always be a ' ' */ 809 BUG_ON(!table_args); /* There will always be a ' ' */
700 table_args++; 810 table_args++;
701 811
702 DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, 812 DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid);
703 lc->uuid, table_args); 813 if (lc->integrated_flush)
814 DMEMIT("integrated_flush ");
815 DMEMIT("%s ", table_args);
704 break; 816 break;
705 } 817 }
706 return (r) ? 0 : (int)sz; 818 return (r) ? 0 : (int)sz;