aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2010-07-06 05:14:00 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2010-10-14 12:38:14 -0400
commit778f271dfe7a7173c0bae2d6cde8d9bd1533e668 (patch)
tree1c057622152bd652102749b488653bff8be24c2a /drivers/block
parent8e26f9ccb9be00fdb33551a34c8f6029e89ab79f (diff)
drbd: The new, smarter resync speed controller
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_int.h11
-rw-r--r--drivers/block/drbd/drbd_main.c1
-rw-r--r--drivers/block/drbd/drbd_nl.c22
-rw-r--r--drivers/block/drbd/drbd_receiver.c20
-rw-r--r--drivers/block/drbd/drbd_worker.c98
5 files changed, 151 insertions, 1 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index fd2cdd45f155..facb72ccc56b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -928,6 +928,12 @@ enum write_ordering_e {
928 WO_bio_barrier 928 WO_bio_barrier
929}; 929};
930 930
931struct fifo_buffer {
932 int *values;
933 unsigned int head_index;
934 unsigned int size;
935};
936
931struct drbd_conf { 937struct drbd_conf {
932 /* things that are stored as / read from meta data on disk */ 938 /* things that are stored as / read from meta data on disk */
933 unsigned long flags; 939 unsigned long flags;
@@ -1068,6 +1074,11 @@ struct drbd_conf {
1068 u64 ed_uuid; /* UUID of the exposed data */ 1074 u64 ed_uuid; /* UUID of the exposed data */
1069 struct mutex state_mutex; 1075 struct mutex state_mutex;
1070 char congestion_reason; /* Why we where congested... */ 1076 char congestion_reason; /* Why we where congested... */
1077 atomic_t rs_sect_in; /* counter to measure the incoming resync data rate */
1078 int c_sync_rate; /* current resync rate after delay_probe magic */
1079 struct fifo_buffer rs_plan_s; /* correction values of resync planer */
1080 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
1081 int rs_planed; /* resync sectors already planed */
1071}; 1082};
1072 1083
1073static inline struct drbd_conf *minor_to_mdev(unsigned int minor) 1084static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index bff4f598d38f..ed09a840d838 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2734,6 +2734,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2734 atomic_set(&mdev->net_cnt, 0); 2734 atomic_set(&mdev->net_cnt, 0);
2735 atomic_set(&mdev->packet_seq, 0); 2735 atomic_set(&mdev->packet_seq, 0);
2736 atomic_set(&mdev->pp_in_use, 0); 2736 atomic_set(&mdev->pp_in_use, 0);
2737 atomic_set(&mdev->rs_sect_in, 0);
2737 2738
2738 mutex_init(&mdev->md_io_mutex); 2739 mutex_init(&mdev->md_io_mutex);
2739 mutex_init(&mdev->data.mutex); 2740 mutex_init(&mdev->data.mutex);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 7d384fd39c16..295b8d593708 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1587,6 +1587,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1587 struct crypto_hash *csums_tfm = NULL; 1587 struct crypto_hash *csums_tfm = NULL;
1588 struct syncer_conf sc; 1588 struct syncer_conf sc;
1589 cpumask_var_t new_cpu_mask; 1589 cpumask_var_t new_cpu_mask;
1590 int *rs_plan_s = NULL;
1591 int fifo_size;
1590 1592
1591 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { 1593 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
1592 retcode = ERR_NOMEM; 1594 retcode = ERR_NOMEM;
@@ -1687,6 +1689,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1687 if (retcode != NO_ERROR) 1689 if (retcode != NO_ERROR)
1688 goto fail; 1690 goto fail;
1689 1691
1692 fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1693 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
1694 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
1695 if (!rs_plan_s) {
1696 dev_err(DEV, "kmalloc of fifo_buffer failed");
1697 retcode = ERR_NOMEM;
1698 goto fail;
1699 }
1700 }
1701
1690 /* ok, assign the rest of it as well. 1702 /* ok, assign the rest of it as well.
1691 * lock against receive_SyncParam() */ 1703 * lock against receive_SyncParam() */
1692 spin_lock(&mdev->peer_seq_lock); 1704 spin_lock(&mdev->peer_seq_lock);
@@ -1703,6 +1715,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1703 mdev->verify_tfm = verify_tfm; 1715 mdev->verify_tfm = verify_tfm;
1704 verify_tfm = NULL; 1716 verify_tfm = NULL;
1705 } 1717 }
1718
1719 if (fifo_size != mdev->rs_plan_s.size) {
1720 kfree(mdev->rs_plan_s.values);
1721 mdev->rs_plan_s.values = rs_plan_s;
1722 mdev->rs_plan_s.size = fifo_size;
1723 mdev->rs_planed = 0;
1724 rs_plan_s = NULL;
1725 }
1726
1706 spin_unlock(&mdev->peer_seq_lock); 1727 spin_unlock(&mdev->peer_seq_lock);
1707 1728
1708 if (get_ldev(mdev)) { 1729 if (get_ldev(mdev)) {
@@ -1734,6 +1755,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1734 1755
1735 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 1756 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1736fail: 1757fail:
1758 kfree(rs_plan_s);
1737 free_cpumask_var(new_cpu_mask); 1759 free_cpumask_var(new_cpu_mask);
1738 crypto_free_hash(csums_tfm); 1760 crypto_free_hash(csums_tfm);
1739 crypto_free_hash(verify_tfm); 1761 crypto_free_hash(verify_tfm);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 34bea972f734..5f80b22e711d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1640,6 +1640,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
1640 drbd_send_ack_dp(mdev, P_NEG_ACK, p); 1640 drbd_send_ack_dp(mdev, P_NEG_ACK, p);
1641 } 1641 }
1642 1642
1643 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1644
1643 return ok; 1645 return ok;
1644} 1646}
1645 1647
@@ -2810,6 +2812,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
2810 struct crypto_hash *verify_tfm = NULL; 2812 struct crypto_hash *verify_tfm = NULL;
2811 struct crypto_hash *csums_tfm = NULL; 2813 struct crypto_hash *csums_tfm = NULL;
2812 const int apv = mdev->agreed_pro_version; 2814 const int apv = mdev->agreed_pro_version;
2815 int *rs_plan_s = NULL;
2816 int fifo_size = 0;
2813 2817
2814 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 2818 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2815 : apv == 88 ? sizeof(struct p_rs_param) 2819 : apv == 88 ? sizeof(struct p_rs_param)
@@ -2904,6 +2908,15 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
2904 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); 2908 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2905 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); 2909 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2906 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); 2910 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
2911
2912 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2913 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2914 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2915 if (!rs_plan_s) {
2916 dev_err(DEV, "kmalloc of fifo_buffer failed");
2917 goto disconnect;
2918 }
2919 }
2907 } 2920 }
2908 2921
2909 spin_lock(&mdev->peer_seq_lock); 2922 spin_lock(&mdev->peer_seq_lock);
@@ -2922,6 +2935,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
2922 mdev->csums_tfm = csums_tfm; 2935 mdev->csums_tfm = csums_tfm;
2923 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); 2936 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2924 } 2937 }
2938 if (fifo_size != mdev->rs_plan_s.size) {
2939 kfree(mdev->rs_plan_s.values);
2940 mdev->rs_plan_s.values = rs_plan_s;
2941 mdev->rs_plan_s.size = fifo_size;
2942 mdev->rs_planed = 0;
2943 }
2925 spin_unlock(&mdev->peer_seq_lock); 2944 spin_unlock(&mdev->peer_seq_lock);
2926 } 2945 }
2927 2946
@@ -4202,6 +4221,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
4202 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 4221 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4203 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 4222 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4204 dec_rs_pending(mdev); 4223 dec_rs_pending(mdev);
4224 atomic_add(blksize >> 9, &mdev->rs_sect_in);
4205 4225
4206 return TRUE; 4226 return TRUE;
4207} 4227}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d94720f4bd07..fd3e1e9561cb 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -422,6 +422,89 @@ void resync_timer_fn(unsigned long data)
422 drbd_queue_work(&mdev->data.work, &mdev->resync_work); 422 drbd_queue_work(&mdev->data.work, &mdev->resync_work);
423} 423}
424 424
425static void fifo_set(struct fifo_buffer *fb, int value)
426{
427 int i;
428
429 for (i = 0; i < fb->size; i++)
430 fb->values[i] += value;
431}
432
433static int fifo_push(struct fifo_buffer *fb, int value)
434{
435 int ov;
436
437 ov = fb->values[fb->head_index];
438 fb->values[fb->head_index++] = value;
439
440 if (fb->head_index >= fb->size)
441 fb->head_index = 0;
442
443 return ov;
444}
445
446static void fifo_add_val(struct fifo_buffer *fb, int value)
447{
448 int i;
449
450 for (i = 0; i < fb->size; i++)
451 fb->values[i] += value;
452}
453
454int drbd_rs_controller(struct drbd_conf *mdev)
455{
456 unsigned int sect_in; /* Number of sectors that came in since the last turn */
457 unsigned int want; /* The number of sectors we want in the proxy */
458 int req_sect; /* Number of sectors to request in this turn */
459 int correction; /* Number of sectors more we need in the proxy*/
460 int cps; /* correction per invocation of drbd_rs_controller() */
461 int steps; /* Number of time steps to plan ahead */
462 int curr_corr;
463 int max_sect;
464
465 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
466 mdev->rs_in_flight -= sect_in;
467
468 spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
469
470 steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
471
472 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
473 want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
474 } else { /* normal path */
475 want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
476 sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
477 }
478
479 correction = want - mdev->rs_in_flight - mdev->rs_planed;
480
481 /* Plan ahead */
482 cps = correction / steps;
483 fifo_add_val(&mdev->rs_plan_s, cps);
484 mdev->rs_planed += cps * steps;
485
486 /* What we do in this step */
487 curr_corr = fifo_push(&mdev->rs_plan_s, 0);
488 spin_unlock(&mdev->peer_seq_lock);
489 mdev->rs_planed -= curr_corr;
490
491 req_sect = sect_in + curr_corr;
492 if (req_sect < 0)
493 req_sect = 0;
494
495 max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
496 if (req_sect > max_sect)
497 req_sect = max_sect;
498
499 /*
500 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
501 sect_in, mdev->rs_in_flight, want, correction,
502 steps, cps, mdev->rs_planed, curr_corr, req_sect);
503 */
504
505 return req_sect;
506}
507
425int w_make_resync_request(struct drbd_conf *mdev, 508int w_make_resync_request(struct drbd_conf *mdev,
426 struct drbd_work *w, int cancel) 509 struct drbd_work *w, int cancel)
427{ 510{
@@ -459,7 +542,13 @@ int w_make_resync_request(struct drbd_conf *mdev,
459 max_segment_size = mdev->agreed_pro_version < 94 ? 542 max_segment_size = mdev->agreed_pro_version < 94 ?
460 queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE; 543 queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
461 544
462 number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ); 545 if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
546 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
547 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
548 } else {
549 mdev->c_sync_rate = mdev->sync_conf.rate;
550 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
551 }
463 pe = atomic_read(&mdev->rs_pending_cnt); 552 pe = atomic_read(&mdev->rs_pending_cnt);
464 553
465 mutex_lock(&mdev->data.mutex); 554 mutex_lock(&mdev->data.mutex);
@@ -593,6 +682,7 @@ next_sector:
593 } 682 }
594 683
595 requeue: 684 requeue:
685 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
596 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 686 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
597 put_ldev(mdev); 687 put_ldev(mdev);
598 return 1; 688 return 1;
@@ -1419,6 +1509,12 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1419 drbd_resync_finished(mdev); 1509 drbd_resync_finished(mdev);
1420 } 1510 }
1421 1511
1512 atomic_set(&mdev->rs_sect_in, 0);
1513 mdev->rs_in_flight = 0;
1514 mdev->rs_planed = 0;
1515 spin_lock(&mdev->peer_seq_lock);
1516 fifo_set(&mdev->rs_plan_s, 0);
1517 spin_unlock(&mdev->peer_seq_lock);
1422 /* ns.conn may already be != mdev->state.conn, 1518 /* ns.conn may already be != mdev->state.conn,
1423 * we may have been paused in between, or become paused until 1519 * we may have been paused in between, or become paused until
1424 * the timer triggers. 1520 * the timer triggers.