diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2010-07-06 05:14:00 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2010-10-14 12:38:14 -0400 |
commit | 778f271dfe7a7173c0bae2d6cde8d9bd1533e668 (patch) | |
tree | 1c057622152bd652102749b488653bff8be24c2a /drivers/block | |
parent | 8e26f9ccb9be00fdb33551a34c8f6029e89ab79f (diff) |
drbd: The new, smarter resync speed controller
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 11 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 22 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 20 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 98 |
5 files changed, 151 insertions, 1 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fd2cdd45f155..facb72ccc56b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -928,6 +928,12 @@ enum write_ordering_e { | |||
928 | WO_bio_barrier | 928 | WO_bio_barrier |
929 | }; | 929 | }; |
930 | 930 | ||
931 | struct fifo_buffer { | ||
932 | int *values; | ||
933 | unsigned int head_index; | ||
934 | unsigned int size; | ||
935 | }; | ||
936 | |||
931 | struct drbd_conf { | 937 | struct drbd_conf { |
932 | /* things that are stored as / read from meta data on disk */ | 938 | /* things that are stored as / read from meta data on disk */ |
933 | unsigned long flags; | 939 | unsigned long flags; |
@@ -1068,6 +1074,11 @@ struct drbd_conf { | |||
1068 | u64 ed_uuid; /* UUID of the exposed data */ | 1074 | u64 ed_uuid; /* UUID of the exposed data */ |
1069 | struct mutex state_mutex; | 1075 | struct mutex state_mutex; |
1070 | char congestion_reason; /* Why we where congested... */ | 1076 | char congestion_reason; /* Why we where congested... */ |
1077 | atomic_t rs_sect_in; /* counter to measure the incoming resync data rate */ | ||
1078 | int c_sync_rate; /* current resync rate after delay_probe magic */ | ||
1079 | struct fifo_buffer rs_plan_s; /* correction values of resync planer */ | ||
1080 | int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */ | ||
1081 | int rs_planed; /* resync sectors already planed */ | ||
1071 | }; | 1082 | }; |
1072 | 1083 | ||
1073 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) | 1084 | static inline struct drbd_conf *minor_to_mdev(unsigned int minor) |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bff4f598d38f..ed09a840d838 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -2734,6 +2734,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2734 | atomic_set(&mdev->net_cnt, 0); | 2734 | atomic_set(&mdev->net_cnt, 0); |
2735 | atomic_set(&mdev->packet_seq, 0); | 2735 | atomic_set(&mdev->packet_seq, 0); |
2736 | atomic_set(&mdev->pp_in_use, 0); | 2736 | atomic_set(&mdev->pp_in_use, 0); |
2737 | atomic_set(&mdev->rs_sect_in, 0); | ||
2737 | 2738 | ||
2738 | mutex_init(&mdev->md_io_mutex); | 2739 | mutex_init(&mdev->md_io_mutex); |
2739 | mutex_init(&mdev->data.mutex); | 2740 | mutex_init(&mdev->data.mutex); |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 7d384fd39c16..295b8d593708 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -1587,6 +1587,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
1587 | struct crypto_hash *csums_tfm = NULL; | 1587 | struct crypto_hash *csums_tfm = NULL; |
1588 | struct syncer_conf sc; | 1588 | struct syncer_conf sc; |
1589 | cpumask_var_t new_cpu_mask; | 1589 | cpumask_var_t new_cpu_mask; |
1590 | int *rs_plan_s = NULL; | ||
1591 | int fifo_size; | ||
1590 | 1592 | ||
1591 | if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { | 1593 | if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { |
1592 | retcode = ERR_NOMEM; | 1594 | retcode = ERR_NOMEM; |
@@ -1687,6 +1689,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
1687 | if (retcode != NO_ERROR) | 1689 | if (retcode != NO_ERROR) |
1688 | goto fail; | 1690 | goto fail; |
1689 | 1691 | ||
1692 | fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; | ||
1693 | if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { | ||
1694 | rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); | ||
1695 | if (!rs_plan_s) { | ||
1696 | dev_err(DEV, "kmalloc of fifo_buffer failed"); | ||
1697 | retcode = ERR_NOMEM; | ||
1698 | goto fail; | ||
1699 | } | ||
1700 | } | ||
1701 | |||
1690 | /* ok, assign the rest of it as well. | 1702 | /* ok, assign the rest of it as well. |
1691 | * lock against receive_SyncParam() */ | 1703 | * lock against receive_SyncParam() */ |
1692 | spin_lock(&mdev->peer_seq_lock); | 1704 | spin_lock(&mdev->peer_seq_lock); |
@@ -1703,6 +1715,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
1703 | mdev->verify_tfm = verify_tfm; | 1715 | mdev->verify_tfm = verify_tfm; |
1704 | verify_tfm = NULL; | 1716 | verify_tfm = NULL; |
1705 | } | 1717 | } |
1718 | |||
1719 | if (fifo_size != mdev->rs_plan_s.size) { | ||
1720 | kfree(mdev->rs_plan_s.values); | ||
1721 | mdev->rs_plan_s.values = rs_plan_s; | ||
1722 | mdev->rs_plan_s.size = fifo_size; | ||
1723 | mdev->rs_planed = 0; | ||
1724 | rs_plan_s = NULL; | ||
1725 | } | ||
1726 | |||
1706 | spin_unlock(&mdev->peer_seq_lock); | 1727 | spin_unlock(&mdev->peer_seq_lock); |
1707 | 1728 | ||
1708 | if (get_ldev(mdev)) { | 1729 | if (get_ldev(mdev)) { |
@@ -1734,6 +1755,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n | |||
1734 | 1755 | ||
1735 | kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); | 1756 | kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); |
1736 | fail: | 1757 | fail: |
1758 | kfree(rs_plan_s); | ||
1737 | free_cpumask_var(new_cpu_mask); | 1759 | free_cpumask_var(new_cpu_mask); |
1738 | crypto_free_hash(csums_tfm); | 1760 | crypto_free_hash(csums_tfm); |
1739 | crypto_free_hash(verify_tfm); | 1761 | crypto_free_hash(verify_tfm); |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 34bea972f734..5f80b22e711d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -1640,6 +1640,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) | |||
1640 | drbd_send_ack_dp(mdev, P_NEG_ACK, p); | 1640 | drbd_send_ack_dp(mdev, P_NEG_ACK, p); |
1641 | } | 1641 | } |
1642 | 1642 | ||
1643 | atomic_add(data_size >> 9, &mdev->rs_sect_in); | ||
1644 | |||
1643 | return ok; | 1645 | return ok; |
1644 | } | 1646 | } |
1645 | 1647 | ||
@@ -2810,6 +2812,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) | |||
2810 | struct crypto_hash *verify_tfm = NULL; | 2812 | struct crypto_hash *verify_tfm = NULL; |
2811 | struct crypto_hash *csums_tfm = NULL; | 2813 | struct crypto_hash *csums_tfm = NULL; |
2812 | const int apv = mdev->agreed_pro_version; | 2814 | const int apv = mdev->agreed_pro_version; |
2815 | int *rs_plan_s = NULL; | ||
2816 | int fifo_size = 0; | ||
2813 | 2817 | ||
2814 | exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) | 2818 | exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) |
2815 | : apv == 88 ? sizeof(struct p_rs_param) | 2819 | : apv == 88 ? sizeof(struct p_rs_param) |
@@ -2904,6 +2908,15 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) | |||
2904 | mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); | 2908 | mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); |
2905 | mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); | 2909 | mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); |
2906 | mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); | 2910 | mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); |
2911 | |||
2912 | fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; | ||
2913 | if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { | ||
2914 | rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); | ||
2915 | if (!rs_plan_s) { | ||
2916 | dev_err(DEV, "kmalloc of fifo_buffer failed"); | ||
2917 | goto disconnect; | ||
2918 | } | ||
2919 | } | ||
2907 | } | 2920 | } |
2908 | 2921 | ||
2909 | spin_lock(&mdev->peer_seq_lock); | 2922 | spin_lock(&mdev->peer_seq_lock); |
@@ -2922,6 +2935,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) | |||
2922 | mdev->csums_tfm = csums_tfm; | 2935 | mdev->csums_tfm = csums_tfm; |
2923 | dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); | 2936 | dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); |
2924 | } | 2937 | } |
2938 | if (fifo_size != mdev->rs_plan_s.size) { | ||
2939 | kfree(mdev->rs_plan_s.values); | ||
2940 | mdev->rs_plan_s.values = rs_plan_s; | ||
2941 | mdev->rs_plan_s.size = fifo_size; | ||
2942 | mdev->rs_planed = 0; | ||
2943 | } | ||
2925 | spin_unlock(&mdev->peer_seq_lock); | 2944 | spin_unlock(&mdev->peer_seq_lock); |
2926 | } | 2945 | } |
2927 | 2946 | ||
@@ -4202,6 +4221,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) | |||
4202 | /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ | 4221 | /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ |
4203 | mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); | 4222 | mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); |
4204 | dec_rs_pending(mdev); | 4223 | dec_rs_pending(mdev); |
4224 | atomic_add(blksize >> 9, &mdev->rs_sect_in); | ||
4205 | 4225 | ||
4206 | return TRUE; | 4226 | return TRUE; |
4207 | } | 4227 | } |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d94720f4bd07..fd3e1e9561cb 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -422,6 +422,89 @@ void resync_timer_fn(unsigned long data) | |||
422 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); | 422 | drbd_queue_work(&mdev->data.work, &mdev->resync_work); |
423 | } | 423 | } |
424 | 424 | ||
425 | static void fifo_set(struct fifo_buffer *fb, int value) | ||
426 | { | ||
427 | int i; | ||
428 | |||
429 | for (i = 0; i < fb->size; i++) | ||
430 | fb->values[i] += value; | ||
431 | } | ||
432 | |||
433 | static int fifo_push(struct fifo_buffer *fb, int value) | ||
434 | { | ||
435 | int ov; | ||
436 | |||
437 | ov = fb->values[fb->head_index]; | ||
438 | fb->values[fb->head_index++] = value; | ||
439 | |||
440 | if (fb->head_index >= fb->size) | ||
441 | fb->head_index = 0; | ||
442 | |||
443 | return ov; | ||
444 | } | ||
445 | |||
446 | static void fifo_add_val(struct fifo_buffer *fb, int value) | ||
447 | { | ||
448 | int i; | ||
449 | |||
450 | for (i = 0; i < fb->size; i++) | ||
451 | fb->values[i] += value; | ||
452 | } | ||
453 | |||
454 | int drbd_rs_controller(struct drbd_conf *mdev) | ||
455 | { | ||
456 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ | ||
457 | unsigned int want; /* The number of sectors we want in the proxy */ | ||
458 | int req_sect; /* Number of sectors to request in this turn */ | ||
459 | int correction; /* Number of sectors more we need in the proxy*/ | ||
460 | int cps; /* correction per invocation of drbd_rs_controller() */ | ||
461 | int steps; /* Number of time steps to plan ahead */ | ||
462 | int curr_corr; | ||
463 | int max_sect; | ||
464 | |||
465 | sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */ | ||
466 | mdev->rs_in_flight -= sect_in; | ||
467 | |||
468 | spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */ | ||
469 | |||
470 | steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */ | ||
471 | |||
472 | if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */ | ||
473 | want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps; | ||
474 | } else { /* normal path */ | ||
475 | want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target : | ||
476 | sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10); | ||
477 | } | ||
478 | |||
479 | correction = want - mdev->rs_in_flight - mdev->rs_planed; | ||
480 | |||
481 | /* Plan ahead */ | ||
482 | cps = correction / steps; | ||
483 | fifo_add_val(&mdev->rs_plan_s, cps); | ||
484 | mdev->rs_planed += cps * steps; | ||
485 | |||
486 | /* What we do in this step */ | ||
487 | curr_corr = fifo_push(&mdev->rs_plan_s, 0); | ||
488 | spin_unlock(&mdev->peer_seq_lock); | ||
489 | mdev->rs_planed -= curr_corr; | ||
490 | |||
491 | req_sect = sect_in + curr_corr; | ||
492 | if (req_sect < 0) | ||
493 | req_sect = 0; | ||
494 | |||
495 | max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ; | ||
496 | if (req_sect > max_sect) | ||
497 | req_sect = max_sect; | ||
498 | |||
499 | /* | ||
500 | dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n", | ||
501 | sect_in, mdev->rs_in_flight, want, correction, | ||
502 | steps, cps, mdev->rs_planed, curr_corr, req_sect); | ||
503 | */ | ||
504 | |||
505 | return req_sect; | ||
506 | } | ||
507 | |||
425 | int w_make_resync_request(struct drbd_conf *mdev, | 508 | int w_make_resync_request(struct drbd_conf *mdev, |
426 | struct drbd_work *w, int cancel) | 509 | struct drbd_work *w, int cancel) |
427 | { | 510 | { |
@@ -459,7 +542,13 @@ int w_make_resync_request(struct drbd_conf *mdev, | |||
459 | max_segment_size = mdev->agreed_pro_version < 94 ? | 542 | max_segment_size = mdev->agreed_pro_version < 94 ? |
460 | queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE; | 543 | queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE; |
461 | 544 | ||
462 | number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ); | 545 | if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */ |
546 | number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9); | ||
547 | mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; | ||
548 | } else { | ||
549 | mdev->c_sync_rate = mdev->sync_conf.rate; | ||
550 | number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); | ||
551 | } | ||
463 | pe = atomic_read(&mdev->rs_pending_cnt); | 552 | pe = atomic_read(&mdev->rs_pending_cnt); |
464 | 553 | ||
465 | mutex_lock(&mdev->data.mutex); | 554 | mutex_lock(&mdev->data.mutex); |
@@ -593,6 +682,7 @@ next_sector: | |||
593 | } | 682 | } |
594 | 683 | ||
595 | requeue: | 684 | requeue: |
685 | mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9)); | ||
596 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); | 686 | mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); |
597 | put_ldev(mdev); | 687 | put_ldev(mdev); |
598 | return 1; | 688 | return 1; |
@@ -1419,6 +1509,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) | |||
1419 | drbd_resync_finished(mdev); | 1509 | drbd_resync_finished(mdev); |
1420 | } | 1510 | } |
1421 | 1511 | ||
1512 | atomic_set(&mdev->rs_sect_in, 0); | ||
1513 | mdev->rs_in_flight = 0; | ||
1514 | mdev->rs_planed = 0; | ||
1515 | spin_lock(&mdev->peer_seq_lock); | ||
1516 | fifo_set(&mdev->rs_plan_s, 0); | ||
1517 | spin_unlock(&mdev->peer_seq_lock); | ||
1422 | /* ns.conn may already be != mdev->state.conn, | 1518 | /* ns.conn may already be != mdev->state.conn, |
1423 | * we may have been paused in between, or become paused until | 1519 | * we may have been paused in between, or become paused until |
1424 | * the timer triggers. | 1520 | * the timer triggers. |