aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/block/drbd/drbd_main.c7
-rw-r--r--drivers/block/drbd/drbd_nl.c3
-rw-r--r--drivers/block/drbd/drbd_receiver.c88
-rw-r--r--drivers/block/drbd/drbd_worker.c29
-rw-r--r--include/linux/drbd_limits.h4
-rw-r--r--include/linux/drbd_nl.h1
7 files changed, 116 insertions, 17 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 0fce3f36fc1c..0fedcc0b8dc9 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1513,6 +1513,7 @@ extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
1513extern void resync_timer_fn(unsigned long data); 1513extern void resync_timer_fn(unsigned long data);
1514 1514
1515/* drbd_receiver.c */ 1515/* drbd_receiver.c */
1516extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
1516extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1517extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1517 const unsigned rw, const int fault_type); 1518 const unsigned rw, const int fault_type);
1518extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); 1519extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 1ff8418ae0fa..db93eee7e543 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1098,6 +1098,8 @@ int __drbd_set_state(struct drbd_conf *mdev,
1098 mdev->ov_left = mdev->rs_total 1098 mdev->ov_left = mdev->rs_total
1099 - BM_SECT_TO_BIT(mdev->ov_position); 1099 - BM_SECT_TO_BIT(mdev->ov_position);
1100 mdev->rs_start = now; 1100 mdev->rs_start = now;
1101 mdev->rs_last_events = 0;
1102 mdev->rs_last_sect_ev = 0;
1101 mdev->ov_last_oos_size = 0; 1103 mdev->ov_last_oos_size = 0;
1102 mdev->ov_last_oos_start = 0; 1104 mdev->ov_last_oos_start = 0;
1103 1105
@@ -2706,7 +2708,8 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
2706 /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF, 2708 /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF,
2707 /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF, 2709 /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF,
2708 /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF, 2710 /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF,
2709 /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF 2711 /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF,
2712 /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF
2710 }; 2713 };
2711 2714
2712 /* Have to use that way, because the layout differs between 2715 /* Have to use that way, because the layout differs between
@@ -2742,6 +2745,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2742 atomic_set(&mdev->packet_seq, 0); 2745 atomic_set(&mdev->packet_seq, 0);
2743 atomic_set(&mdev->pp_in_use, 0); 2746 atomic_set(&mdev->pp_in_use, 0);
2744 atomic_set(&mdev->rs_sect_in, 0); 2747 atomic_set(&mdev->rs_sect_in, 0);
2748 atomic_set(&mdev->rs_sect_ev, 0);
2745 2749
2746 mutex_init(&mdev->md_io_mutex); 2750 mutex_init(&mdev->md_io_mutex);
2747 mutex_init(&mdev->data.mutex); 2751 mutex_init(&mdev->data.mutex);
@@ -2819,6 +2823,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2819 mdev->rs_total = 2823 mdev->rs_total =
2820 mdev->rs_failed = 0; 2824 mdev->rs_failed = 0;
2821 mdev->rs_last_events = 0; 2825 mdev->rs_last_events = 0;
2826 mdev->rs_last_sect_ev = 0;
2822 for (i = 0; i < DRBD_SYNC_MARKS; i++) { 2827 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2823 mdev->rs_mark_left[i] = 0; 2828 mdev->rs_mark_left[i] = 0;
2824 mdev->rs_mark_time[i] = 0; 2829 mdev->rs_mark_time[i] = 0;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 295b8d593708..6b35d41706e4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1604,7 +1604,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1604 sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF; 1604 sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
1605 sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF; 1605 sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
1606 sc.c_fill_target = DRBD_C_FILL_TARGET_DEF; 1606 sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
1607 sc.c_max_rate = DRBD_C_MAX_RATE_DEF; 1607 sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
1608 sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
1608 } else 1609 } else
1609 memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); 1610 memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
1610 1611
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 346aed98027f..0d9967fef528 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1561,6 +1561,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1561 list_add(&e->w.list, &mdev->sync_ee); 1561 list_add(&e->w.list, &mdev->sync_ee);
1562 spin_unlock_irq(&mdev->req_lock); 1562 spin_unlock_irq(&mdev->req_lock);
1563 1563
1564 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
1564 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) 1565 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
1565 return TRUE; 1566 return TRUE;
1566 1567
@@ -2017,17 +2018,66 @@ out_interrupted:
2017 return FALSE; 2018 return FALSE;
2018} 2019}
2019 2020
2021/* We may throttle resync, if the lower device seems to be busy,
2022 * and current sync rate is above c_min_rate.
2023 *
2024 * To decide whether or not the lower device is busy, we use a scheme similar
2025 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2026 * (more than 64 sectors) of activity we cannot account for with our own resync
2027 * activity, it obviously is "busy".
2028 *
2029 * The current sync rate used here uses only the most recent two step marks,
2030 * to have a short time average so we can react faster.
2031 */
2032int drbd_rs_should_slow_down(struct drbd_conf *mdev)
2033{
2034 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2035 unsigned long db, dt, dbdt;
2036 int curr_events;
2037 int throttle = 0;
2038
2039 /* feature disabled? */
2040 if (mdev->sync_conf.c_min_rate == 0)
2041 return 0;
2042
2043 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2044 (int)part_stat_read(&disk->part0, sectors[1]) -
2045 atomic_read(&mdev->rs_sect_ev);
2046 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2047 unsigned long rs_left;
2048 int i;
2049
2050 mdev->rs_last_events = curr_events;
2051
2052 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2053 * approx. */
2054 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
2055 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
2056
2057 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2058 if (!dt)
2059 dt++;
2060 db = mdev->rs_mark_left[i] - rs_left;
2061 dbdt = Bit2KB(db/dt);
2062
2063 if (dbdt > mdev->sync_conf.c_min_rate)
2064 throttle = 1;
2065 }
2066 return throttle;
2067}
2068
2069
2020static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) 2070static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2021{ 2071{
2022 sector_t sector; 2072 sector_t sector;
2023 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 2073 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
2024 struct drbd_epoch_entry *e; 2074 struct drbd_epoch_entry *e;
2025 struct digest_info *di = NULL; 2075 struct digest_info *di = NULL;
2076 struct p_block_req *p = (struct p_block_req *)h;
2077 const int brps = sizeof(*p)-sizeof(*h);
2026 int size, digest_size; 2078 int size, digest_size;
2027 unsigned int fault_type; 2079 unsigned int fault_type;
2028 struct p_block_req *p = 2080
2029 (struct p_block_req *)h;
2030 const int brps = sizeof(*p)-sizeof(*h);
2031 2081
2032 if (drbd_recv(mdev, h->payload, brps) != brps) 2082 if (drbd_recv(mdev, h->payload, brps) != brps)
2033 return FALSE; 2083 return FALSE;
@@ -2099,8 +2149,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2099 } else if (h->command == P_OV_REPLY) { 2149 } else if (h->command == P_OV_REPLY) {
2100 e->w.cb = w_e_end_ov_reply; 2150 e->w.cb = w_e_end_ov_reply;
2101 dec_rs_pending(mdev); 2151 dec_rs_pending(mdev);
2102 /* drbd_rs_begin_io done when we sent this request */ 2152 /* drbd_rs_begin_io done when we sent this request,
2103 goto submit; 2153 * but accounting still needs to be done. */
2154 goto submit_for_resync;
2104 } 2155 }
2105 break; 2156 break;
2106 2157
@@ -2128,9 +2179,36 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2128 goto out_free_e; 2179 goto out_free_e;
2129 } 2180 }
2130 2181
2182 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2183 * wrt the receiver, but it is not as straightforward as it may seem.
2184 * Various places in the resync start and stop logic assume resync
2185 * requests are processed in order, requeuing this on the worker thread
2186 * introduces a bunch of new code for synchronization between threads.
2187 *
2188 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2189 * "forever", throttling after drbd_rs_begin_io will lock that extent
2190 * for application writes for the same time. For now, just throttle
2191 * here, where the rest of the code expects the receiver to sleep for
2192 * a while, anyways.
2193 */
2194
2195 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2196 * this defers syncer requests for some time, before letting at least
2197 * on request through. The resync controller on the receiving side
2198 * will adapt to the incoming rate accordingly.
2199 *
2200 * We cannot throttle here if remote is Primary/SyncTarget:
2201 * we would also throttle its application reads.
2202 * In that case, throttling is done on the SyncTarget only.
2203 */
2204 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
2205 msleep(100);
2131 if (drbd_rs_begin_io(mdev, e->sector)) 2206 if (drbd_rs_begin_io(mdev, e->sector))
2132 goto out_free_e; 2207 goto out_free_e;
2133 2208
2209submit_for_resync:
2210 atomic_add(size >> 9, &mdev->rs_sect_ev);
2211
2134submit: 2212submit:
2135 inc_unacked(mdev); 2213 inc_unacked(mdev);
2136 spin_lock_irq(&mdev->req_lock); 2214 spin_lock_irq(&mdev->req_lock);
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index f5d779b4d685..99c937acb471 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -215,10 +215,8 @@ void drbd_endio_sec(struct bio *bio, int error)
215 */ 215 */
216void drbd_endio_pri(struct bio *bio, int error) 216void drbd_endio_pri(struct bio *bio, int error)
217{ 217{
218 unsigned long flags;
219 struct drbd_request *req = bio->bi_private; 218 struct drbd_request *req = bio->bi_private;
220 struct drbd_conf *mdev = req->mdev; 219 struct drbd_conf *mdev = req->mdev;
221 struct bio_and_error m;
222 enum drbd_req_event what; 220 enum drbd_req_event what;
223 int uptodate = bio_flagged(bio, BIO_UPTODATE); 221 int uptodate = bio_flagged(bio, BIO_UPTODATE);
224 222
@@ -244,12 +242,7 @@ void drbd_endio_pri(struct bio *bio, int error)
244 bio_put(req->private_bio); 242 bio_put(req->private_bio);
245 req->private_bio = ERR_PTR(error); 243 req->private_bio = ERR_PTR(error);
246 244
247 spin_lock_irqsave(&mdev->req_lock, flags); 245 req_mod(req, what);
248 __req_mod(req, what, &m);
249 spin_unlock_irqrestore(&mdev->req_lock, flags);
250
251 if (m.bio)
252 complete_master_bio(mdev, &m);
253} 246}
254 247
255int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 248int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -376,6 +369,9 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
376 if (!get_ldev(mdev)) 369 if (!get_ldev(mdev))
377 return -EIO; 370 return -EIO;
378 371
372 if (drbd_rs_should_slow_down(mdev))
373 goto defer;
374
379 /* GFP_TRY, because if there is no memory available right now, this may 375 /* GFP_TRY, because if there is no memory available right now, this may
380 * be rescheduled for later. It is "only" background resync, after all. */ 376 * be rescheduled for later. It is "only" background resync, after all. */
381 e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); 377 e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
@@ -387,6 +383,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
387 list_add(&e->w.list, &mdev->read_ee); 383 list_add(&e->w.list, &mdev->read_ee);
388 spin_unlock_irq(&mdev->req_lock); 384 spin_unlock_irq(&mdev->req_lock);
389 385
386 atomic_add(size >> 9, &mdev->rs_sect_ev);
390 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) 387 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
391 return 0; 388 return 0;
392 389
@@ -512,8 +509,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
512 sector_t sector; 509 sector_t sector;
513 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 510 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
514 int max_segment_size; 511 int max_segment_size;
515 int number, i, rollback_i, size, pe, mx; 512 int number, rollback_i, size, pe, mx;
516 int align, queued, sndbuf; 513 int align, queued, sndbuf;
514 int i = 0;
517 515
518 if (unlikely(cancel)) 516 if (unlikely(cancel))
519 return 1; 517 return 1;
@@ -549,7 +547,14 @@ int w_make_resync_request(struct drbd_conf *mdev,
549 mdev->c_sync_rate = mdev->sync_conf.rate; 547 mdev->c_sync_rate = mdev->sync_conf.rate;
550 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 548 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
551 } 549 }
552 pe = atomic_read(&mdev->rs_pending_cnt); 550
551 /* Throttle resync on lower level disk activity, which may also be
552 * caused by application IO on Primary/SyncTarget.
553 * Keep this after the call to drbd_rs_controller, as that assumes
554 * to be called as precisely as possible every SLEEP_TIME,
555 * and would be confused otherwise. */
556 if (drbd_rs_should_slow_down(mdev))
557 goto requeue;
553 558
554 mutex_lock(&mdev->data.mutex); 559 mutex_lock(&mdev->data.mutex);
555 if (mdev->data.socket) 560 if (mdev->data.socket)
@@ -563,6 +568,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
563 mx = number; 568 mx = number;
564 569
565 /* Limit the number of pending RS requests to no more than the peer's receive buffer */ 570 /* Limit the number of pending RS requests to no more than the peer's receive buffer */
571 pe = atomic_read(&mdev->rs_pending_cnt);
566 if ((pe + number) > mx) { 572 if ((pe + number) > mx) {
567 number = mx - pe; 573 number = mx - pe;
568 } 574 }
@@ -1492,6 +1498,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1492 mdev->rs_failed = 0; 1498 mdev->rs_failed = 0;
1493 mdev->rs_paused = 0; 1499 mdev->rs_paused = 0;
1494 mdev->rs_same_csum = 0; 1500 mdev->rs_same_csum = 0;
1501 mdev->rs_last_events = 0;
1502 mdev->rs_last_sect_ev = 0;
1495 mdev->rs_total = tw; 1503 mdev->rs_total = tw;
1496 mdev->rs_start = now; 1504 mdev->rs_start = now;
1497 for (i = 0; i < DRBD_SYNC_MARKS; i++) { 1505 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
@@ -1516,6 +1524,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1516 } 1524 }
1517 1525
1518 atomic_set(&mdev->rs_sect_in, 0); 1526 atomic_set(&mdev->rs_sect_in, 0);
1527 atomic_set(&mdev->rs_sect_ev, 0);
1519 mdev->rs_in_flight = 0; 1528 mdev->rs_in_flight = 0;
1520 mdev->rs_planed = 0; 1529 mdev->rs_planed = 0;
1521 spin_lock(&mdev->peer_seq_lock); 1530 spin_lock(&mdev->peer_seq_lock);
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 06dbba47a8ef..0b24ded6fffd 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -150,5 +150,9 @@
150#define DRBD_C_MAX_RATE_MAX (4 << 20) 150#define DRBD_C_MAX_RATE_MAX (4 << 20)
151#define DRBD_C_MAX_RATE_DEF 102400 151#define DRBD_C_MAX_RATE_DEF 102400
152 152
153#define DRBD_C_MIN_RATE_MIN 0 /* kByte/sec */
154#define DRBD_C_MIN_RATE_MAX (4 << 20)
155#define DRBD_C_MIN_RATE_DEF 4096
156
153#undef RANGE 157#undef RANGE
154#endif 158#endif
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index e23683c87ca1..ade91107c9a5 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -92,6 +92,7 @@ NL_PACKET(syncer_conf, 8,
92 NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target) 92 NL_INTEGER( 77, T_MAY_IGNORE, c_delay_target)
93 NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target) 93 NL_INTEGER( 78, T_MAY_IGNORE, c_fill_target)
94 NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate) 94 NL_INTEGER( 79, T_MAY_IGNORE, c_max_rate)
95 NL_INTEGER( 80, T_MAY_IGNORE, c_min_rate)
95) 96)
96 97
97NL_PACKET(invalidate, 9, ) 98NL_PACKET(invalidate, 9, )