diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2010-08-11 17:40:24 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2010-10-14 12:38:20 -0400 |
commit | 0f0601f4ea2f53cfd8bcae060fb03d9bbde070ec (patch) | |
tree | 42bcbd8edf48c9cee526893108cc69b4918a9f64 /drivers/block/drbd/drbd_receiver.c | |
parent | 80a40e439e5a3f30b0a6210a1add6d7c33392e54 (diff) |
drbd: new configuration parameter c-min-rate
We now track the data rate of locally submitted resync related requests,
and can thus detect non-resync activity on the lower level device.
If the current sync rate is above c-min-rate, and the lower level device
appears to be busy, we throttle the resyncer.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_receiver.c')
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 88 |
1 files changed, 83 insertions, 5 deletions
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 346aed98027f..0d9967fef528 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -1561,6 +1561,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si | |||
1561 | list_add(&e->w.list, &mdev->sync_ee); | 1561 | list_add(&e->w.list, &mdev->sync_ee); |
1562 | spin_unlock_irq(&mdev->req_lock); | 1562 | spin_unlock_irq(&mdev->req_lock); |
1563 | 1563 | ||
1564 | atomic_add(data_size >> 9, &mdev->rs_sect_ev); | ||
1564 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) | 1565 | if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) |
1565 | return TRUE; | 1566 | return TRUE; |
1566 | 1567 | ||
@@ -2017,17 +2018,66 @@ out_interrupted: | |||
2017 | return FALSE; | 2018 | return FALSE; |
2018 | } | 2019 | } |
2019 | 2020 | ||
2021 | /* We may throttle resync, if the lower device seems to be busy, | ||
2022 | * and current sync rate is above c_min_rate. | ||
2023 | * | ||
2024 | * To decide whether or not the lower device is busy, we use a scheme similar | ||
2025 | * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" | ||
2026 | * (more than 64 sectors) of activity we cannot account for with our own resync | ||
2027 | * activity, it obviously is "busy". | ||
2028 | * | ||
2029 | * The current sync rate used here uses only the most recent two step marks, | ||
2030 | * to have a short time average so we can react faster. | ||
2031 | */ | ||
2032 | int drbd_rs_should_slow_down(struct drbd_conf *mdev) | ||
2033 | { | ||
2034 | struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; | ||
2035 | unsigned long db, dt, dbdt; | ||
2036 | int curr_events; | ||
2037 | int throttle = 0; | ||
2038 | |||
2039 | /* feature disabled? */ | ||
2040 | if (mdev->sync_conf.c_min_rate == 0) | ||
2041 | return 0; | ||
2042 | |||
2043 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + | ||
2044 | (int)part_stat_read(&disk->part0, sectors[1]) - | ||
2045 | atomic_read(&mdev->rs_sect_ev); | ||
2046 | if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { | ||
2047 | unsigned long rs_left; | ||
2048 | int i; | ||
2049 | |||
2050 | mdev->rs_last_events = curr_events; | ||
2051 | |||
2052 | /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, | ||
2053 | * approx. */ | ||
2054 | i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS; | ||
2055 | rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; | ||
2056 | |||
2057 | dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; | ||
2058 | if (!dt) | ||
2059 | dt++; | ||
2060 | db = mdev->rs_mark_left[i] - rs_left; | ||
2061 | dbdt = Bit2KB(db/dt); | ||
2062 | |||
2063 | if (dbdt > mdev->sync_conf.c_min_rate) | ||
2064 | throttle = 1; | ||
2065 | } | ||
2066 | return throttle; | ||
2067 | } | ||
2068 | |||
2069 | |||
2020 | static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) | 2070 | static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) |
2021 | { | 2071 | { |
2022 | sector_t sector; | 2072 | sector_t sector; |
2023 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); | 2073 | const sector_t capacity = drbd_get_capacity(mdev->this_bdev); |
2024 | struct drbd_epoch_entry *e; | 2074 | struct drbd_epoch_entry *e; |
2025 | struct digest_info *di = NULL; | 2075 | struct digest_info *di = NULL; |
2076 | struct p_block_req *p = (struct p_block_req *)h; | ||
2077 | const int brps = sizeof(*p)-sizeof(*h); | ||
2026 | int size, digest_size; | 2078 | int size, digest_size; |
2027 | unsigned int fault_type; | 2079 | unsigned int fault_type; |
2028 | struct p_block_req *p = | 2080 | |
2029 | (struct p_block_req *)h; | ||
2030 | const int brps = sizeof(*p)-sizeof(*h); | ||
2031 | 2081 | ||
2032 | if (drbd_recv(mdev, h->payload, brps) != brps) | 2082 | if (drbd_recv(mdev, h->payload, brps) != brps) |
2033 | return FALSE; | 2083 | return FALSE; |
@@ -2099,8 +2149,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) | |||
2099 | } else if (h->command == P_OV_REPLY) { | 2149 | } else if (h->command == P_OV_REPLY) { |
2100 | e->w.cb = w_e_end_ov_reply; | 2150 | e->w.cb = w_e_end_ov_reply; |
2101 | dec_rs_pending(mdev); | 2151 | dec_rs_pending(mdev); |
2102 | /* drbd_rs_begin_io done when we sent this request */ | 2152 | /* drbd_rs_begin_io done when we sent this request, |
2103 | goto submit; | 2153 | * but accounting still needs to be done. */ |
2154 | goto submit_for_resync; | ||
2104 | } | 2155 | } |
2105 | break; | 2156 | break; |
2106 | 2157 | ||
@@ -2128,9 +2179,36 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) | |||
2128 | goto out_free_e; | 2179 | goto out_free_e; |
2129 | } | 2180 | } |
2130 | 2181 | ||
2182 | /* Throttle, drbd_rs_begin_io and submit should become asynchronous | ||
2183 | * wrt the receiver, but it is not as straightforward as it may seem. | ||
2184 | * Various places in the resync start and stop logic assume resync | ||
2185 | * requests are processed in order, requeuing this on the worker thread | ||
2186 | * introduces a bunch of new code for synchronization between threads. | ||
2187 | * | ||
2188 | * Unlimited throttling before drbd_rs_begin_io may stall the resync | ||
2189 | * "forever", throttling after drbd_rs_begin_io will lock that extent | ||
2190 | * for application writes for the same time. For now, just throttle | ||
2191 | * here, where the rest of the code expects the receiver to sleep for | ||
2192 | * a while, anyways. | ||
2193 | */ | ||
2194 | |||
2195 | /* Throttle before drbd_rs_begin_io, as that locks out application IO; | ||
2196 | * this defers syncer requests for some time, before letting at least | ||
2197 | * on request through. The resync controller on the receiving side | ||
2198 | * will adapt to the incoming rate accordingly. | ||
2199 | * | ||
2200 | * We cannot throttle here if remote is Primary/SyncTarget: | ||
2201 | * we would also throttle its application reads. | ||
2202 | * In that case, throttling is done on the SyncTarget only. | ||
2203 | */ | ||
2204 | if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) | ||
2205 | msleep(100); | ||
2131 | if (drbd_rs_begin_io(mdev, e->sector)) | 2206 | if (drbd_rs_begin_io(mdev, e->sector)) |
2132 | goto out_free_e; | 2207 | goto out_free_e; |
2133 | 2208 | ||
2209 | submit_for_resync: | ||
2210 | atomic_add(size >> 9, &mdev->rs_sect_ev); | ||
2211 | |||
2134 | submit: | 2212 | submit: |
2135 | inc_unacked(mdev); | 2213 | inc_unacked(mdev); |
2136 | spin_lock_irq(&mdev->req_lock); | 2214 | spin_lock_irq(&mdev->req_lock); |