drbd: new configuration parameter c-min-rate

We now track the data rate of locally submitted resync related requests, and can thus detect non-resync activity on the lower level device. If the current sync rate is above c-min-rate, and the lower level device appears to be busy, we throttle the resyncer. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
author: Lars Ellenberg <lars.ellenberg@linbit.com> 2010-08-11 17:40:24 -0400
committer: Philipp Reisner <philipp.reisner@linbit.com> 2010-10-14 12:38:20 -0400
commit: 0f0601f4ea2f53cfd8bcae060fb03d9bbde070ec (patch)
tree: 42bcbd8edf48c9cee526893108cc69b4918a9f64 /drivers/block/drbd/drbd_receiver.c
parent: 80a40e439e5a3f30b0a6210a1add6d7c33392e54 (diff)
1 files changed, 83 insertions, 5 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 346aed98027f..0d9967fef528 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1561,6 +1561,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
        list_add(&e->w.list, &mdev->sync_ee);
        spin_unlock_irq(&mdev->req_lock);
+        atomic_add(data_size >> 9, &mdev->rs_sect_ev);
        if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
                return TRUE;
@@ -2017,17 +2018,66 @@ out_interrupted:
        return FALSE;
 }
+/* We may throttle resync, if the lower device seems to be busy,
+ * and current sync rate is above c_min_rate.
+ *
+ * To decide whether or not the lower device is busy, we use a scheme similar
+ * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
+ * (more than 64 sectors) of activity we cannot account for with our own resync
+ * activity, it obviously is "busy".
+ *
+ * The current sync rate used here uses only the most recent two step marks,
+ * to have a short time average so we can react faster.
+ */
+int drbd_rs_should_slow_down(struct drbd_conf *mdev)
+{
+        struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
+        unsigned long db, dt, dbdt;
+        int curr_events;
+        int throttle = 0;
+        /* feature disabled? */
+        if (mdev->sync_conf.c_min_rate == 0)
+                return 0;
+        curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
+                      (int)part_stat_read(&disk->part0, sectors[1]) -
+                        atomic_read(&mdev->rs_sect_ev);
+        if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
+                unsigned long rs_left;
+                int i;
+                mdev->rs_last_events = curr_events;
+                /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
+                 * approx. */
+                i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
+                rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+                dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
+                if (!dt)
+                        dt++;
+                db = mdev->rs_mark_left[i] - rs_left;
+                dbdt = Bit2KB(db/dt);
+                if (dbdt > mdev->sync_conf.c_min_rate)
+                        throttle = 1;
+        }
+        return throttle;
+}
 static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
 {
        sector_t sector;
        const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
        struct drbd_epoch_entry *e;
        struct digest_info *di = NULL;
+        struct p_block_req *p = (struct p_block_req *)h;
+        const int brps = sizeof(*p)-sizeof(*h);
        int size, digest_size;
        unsigned int fault_type;
-        struct p_block_req *p =
-                (struct p_block_req *)h;
-        const int brps = sizeof(*p)-sizeof(*h);
        if (drbd_recv(mdev, h->payload, brps) != brps)
                return FALSE;
@@ -2099,8 +2149,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
                } else if (h->command == P_OV_REPLY) {
                        e->w.cb = w_e_end_ov_reply;
                        dec_rs_pending(mdev);
-                        /* drbd_rs_begin_io done when we sent this request */
+                        /* drbd_rs_begin_io done when we sent this request,
-                        goto submit;
+                         * but accounting still needs to be done. */
+                        goto submit_for_resync;
                }
                break;
@@ -2128,9 +2179,36 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
                goto out_free_e;
        }
+        /* Throttle, drbd_rs_begin_io and submit should become asynchronous
+         * wrt the receiver, but it is not as straightforward as it may seem.
+         * Various places in the resync start and stop logic assume resync
+         * requests are processed in order, requeuing this on the worker thread
+         * introduces a bunch of new code for synchronization between threads.
+         *
+         * Unlimited throttling before drbd_rs_begin_io may stall the resync
+         * "forever", throttling after drbd_rs_begin_io will lock that extent
+         * for application writes for the same time.  For now, just throttle
+         * here, where the rest of the code expects the receiver to sleep for
+         * a while, anyways.
+         */
+        /* Throttle before drbd_rs_begin_io, as that locks out application IO;
+         * this defers syncer requests for some time, before letting at least
+         * on request through.  The resync controller on the receiving side
+         * will adapt to the incoming rate accordingly.
+         *
+         * We cannot throttle here if remote is Primary/SyncTarget:
+         * we would also throttle its application reads.
+         * In that case, throttling is done on the SyncTarget only.
+         */
+        if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
+                msleep(100);
        if (drbd_rs_begin_io(mdev, e->sector))
                goto out_free_e;
+submit_for_resync:
+        atomic_add(size >> 9, &mdev->rs_sect_ev);
 submit:
        inc_unacked(mdev);
        spin_lock_irq(&mdev->req_lock);
author	Lars Ellenberg <lars.ellenberg@linbit.com>	2010-08-11 17:40:24 -0400
committer	Philipp Reisner <philipp.reisner@linbit.com>	2010-10-14 12:38:20 -0400
commit	0f0601f4ea2f53cfd8bcae060fb03d9bbde070ec (patch)
tree	42bcbd8edf48c9cee526893108cc69b4918a9f64 /drivers/block/drbd/drbd_receiver.c
parent	80a40e439e5a3f30b0a6210a1add6d7c33392e54 (diff)

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 346aed98027f..0d9967fef528 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c
@@ -1561,6 +1561,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1561	list_add(&e->w.list, &mdev->sync_ee);	1561	list_add(&e->w.list, &mdev->sync_ee);
1562	spin_unlock_irq(&mdev->req_lock);	1562	spin_unlock_irq(&mdev->req_lock);
1563		1563
		1564	atomic_add(data_size >> 9, &mdev->rs_sect_ev);
1564	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)	1565	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
1565	return TRUE;	1566	return TRUE;
1566		1567
@@ -2017,17 +2018,66 @@ out_interrupted:
2017	return FALSE;	2018	return FALSE;
2018	}	2019	}
2019		2020
		2021	/* We may throttle resync, if the lower device seems to be busy,
		2022	* and current sync rate is above c_min_rate.
		2023	*
		2024	* To decide whether or not the lower device is busy, we use a scheme similar
		2025	* to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
		2026	* (more than 64 sectors) of activity we cannot account for with our own resync
		2027	* activity, it obviously is "busy".
		2028	*
		2029	* The current sync rate used here uses only the most recent two step marks,
		2030	* to have a short time average so we can react faster.
		2031	*/
		2032	int drbd_rs_should_slow_down(struct drbd_conf *mdev)
		2033	{
		2034	struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
		2035	unsigned long db, dt, dbdt;
		2036	int curr_events;
		2037	int throttle = 0;
		2038
		2039	/* feature disabled? */
		2040	if (mdev->sync_conf.c_min_rate == 0)
		2041	return 0;
		2042
		2043	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
		2044	(int)part_stat_read(&disk->part0, sectors[1]) -
		2045	atomic_read(&mdev->rs_sect_ev);
		2046	if (!mdev->rs_last_events \|\| curr_events - mdev->rs_last_events > 64) {
		2047	unsigned long rs_left;
		2048	int i;
		2049
		2050	mdev->rs_last_events = curr_events;
		2051
		2052	/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
		2053	* approx. */
		2054	i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
		2055	rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
		2056
		2057	dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
		2058	if (!dt)
		2059	dt++;
		2060	db = mdev->rs_mark_left[i] - rs_left;
		2061	dbdt = Bit2KB(db/dt);
		2062
		2063	if (dbdt > mdev->sync_conf.c_min_rate)
		2064	throttle = 1;
		2065	}
		2066	return throttle;
		2067	}
		2068
		2069
2020	static int receive_DataRequest(struct drbd_conf mdev, struct p_header h)	2070	static int receive_DataRequest(struct drbd_conf mdev, struct p_header h)
2021	{	2071	{
2022	sector_t sector;	2072	sector_t sector;
2023	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);	2073	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
2024	struct drbd_epoch_entry *e;	2074	struct drbd_epoch_entry *e;
2025	struct digest_info *di = NULL;	2075	struct digest_info *di = NULL;
		2076	struct p_block_req p = (struct p_block_req )h;
		2077	const int brps = sizeof(p)-sizeof(h);
2026	int size, digest_size;	2078	int size, digest_size;
2027	unsigned int fault_type;	2079	unsigned int fault_type;
2028	struct p_block_req *p =	2080
2029	(struct p_block_req *)h;
2030	const int brps = sizeof(p)-sizeof(h);
2031		2081
2032	if (drbd_recv(mdev, h->payload, brps) != brps)	2082	if (drbd_recv(mdev, h->payload, brps) != brps)
2033	return FALSE;	2083	return FALSE;
@@ -2099,8 +2149,9 @@ static int receive_DataRequest(struct drbd_conf mdev, struct p_header h)
2099	} else if (h->command == P_OV_REPLY) {	2149	} else if (h->command == P_OV_REPLY) {
2100	e->w.cb = w_e_end_ov_reply;	2150	e->w.cb = w_e_end_ov_reply;
2101	dec_rs_pending(mdev);	2151	dec_rs_pending(mdev);
2102	/* drbd_rs_begin_io done when we sent this request */	2152	/* drbd_rs_begin_io done when we sent this request,
2103	goto submit;	2153	* but accounting still needs to be done. */
		2154	goto submit_for_resync;
2104	}	2155	}
2105	break;	2156	break;
2106		2157
@@ -2128,9 +2179,36 @@ static int receive_DataRequest(struct drbd_conf mdev, struct p_header h)
2128	goto out_free_e;	2179	goto out_free_e;
2129	}	2180	}
2130		2181
		2182	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
		2183	* wrt the receiver, but it is not as straightforward as it may seem.
		2184	* Various places in the resync start and stop logic assume resync
		2185	* requests are processed in order, requeuing this on the worker thread
		2186	* introduces a bunch of new code for synchronization between threads.
		2187	*
		2188	* Unlimited throttling before drbd_rs_begin_io may stall the resync
		2189	* "forever", throttling after drbd_rs_begin_io will lock that extent
		2190	* for application writes for the same time. For now, just throttle
		2191	* here, where the rest of the code expects the receiver to sleep for
		2192	* a while, anyways.
		2193	*/
		2194
		2195	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
		2196	* this defers syncer requests for some time, before letting at least
		2197	* on request through. The resync controller on the receiving side
		2198	* will adapt to the incoming rate accordingly.
		2199	*
		2200	* We cannot throttle here if remote is Primary/SyncTarget:
		2201	* we would also throttle its application reads.
		2202	* In that case, throttling is done on the SyncTarget only.
		2203	*/
		2204	if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
		2205	msleep(100);
2131	if (drbd_rs_begin_io(mdev, e->sector))	2206	if (drbd_rs_begin_io(mdev, e->sector))
2132	goto out_free_e;	2207	goto out_free_e;
2133		2208
		2209	submit_for_resync:
		2210	atomic_add(size >> 9, &mdev->rs_sect_ev);
		2211
2134	submit:	2212	submit:
2135	inc_unacked(mdev);	2213	inc_unacked(mdev);
2136	spin_lock_irq(&mdev->req_lock);	2214	spin_lock_irq(&mdev->req_lock);