diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2013-12-20 05:22:13 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2014-07-10 12:35:13 -0400 |
commit | ad3fee790088d36ad862e31535b5b99c25adeef4 (patch) | |
tree | 916a5c23cf57283f9f6a5abf8418364c52249ad9 | |
parent | 7753a4c17f9e305ed19d8851e1a3154c8c9abaaf (diff) |
drbd: improve throttling decisions of background resynchronisation
Background resynchronisation does some "side-stepping", or throttles
itself, if it detects application IO activity, and the current resync
rate estimate is above the configured "cmin-rate".
What was not detected: if there is no application IO,
because it blocks on activity log transactions.
Introduce a new atomic_t ap_actlog_cnt, tracking such blocked requests,
and count non-zero as application IO activity.
This counter is exposed at proc_details level 2 and above.
Also make sure to release the currently locked resync extent
if we side-step due to such voluntary throttling.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 29 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 19 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 4 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 9 |
7 files changed, 51 insertions, 18 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index d7e80663187c..6ce5c76d642b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -991,6 +991,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) | |||
991 | struct lc_element *e; | 991 | struct lc_element *e; |
992 | struct bm_extent *bm_ext; | 992 | struct bm_extent *bm_ext; |
993 | int i; | 993 | int i; |
994 | bool throttle = drbd_rs_should_slow_down(device, sector, true); | ||
995 | |||
996 | /* If we need to throttle, a half-locked (only marked BME_NO_WRITES, | ||
997 | * not yet BME_LOCKED) extent needs to be kicked out explicitly if we | ||
998 | * need to throttle. There is at most one such half-locked extent, | ||
999 | * which is remembered in resync_wenr. */ | ||
1000 | |||
1001 | if (throttle && device->resync_wenr != enr) | ||
1002 | return -EAGAIN; | ||
994 | 1003 | ||
995 | spin_lock_irq(&device->al_lock); | 1004 | spin_lock_irq(&device->al_lock); |
996 | if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { | 1005 | if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { |
@@ -1014,8 +1023,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) | |||
1014 | D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); | 1023 | D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); |
1015 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | 1024 | clear_bit(BME_NO_WRITES, &bm_ext->flags); |
1016 | device->resync_wenr = LC_FREE; | 1025 | device->resync_wenr = LC_FREE; |
1017 | if (lc_put(device->resync, &bm_ext->lce) == 0) | 1026 | if (lc_put(device->resync, &bm_ext->lce) == 0) { |
1027 | bm_ext->flags = 0; | ||
1018 | device->resync_locked--; | 1028 | device->resync_locked--; |
1029 | } | ||
1019 | wake_up(&device->al_wait); | 1030 | wake_up(&device->al_wait); |
1020 | } else { | 1031 | } else { |
1021 | drbd_alert(device, "LOGIC BUG\n"); | 1032 | drbd_alert(device, "LOGIC BUG\n"); |
@@ -1077,8 +1088,20 @@ proceed: | |||
1077 | return 0; | 1088 | return 0; |
1078 | 1089 | ||
1079 | try_again: | 1090 | try_again: |
1080 | if (bm_ext) | 1091 | if (bm_ext) { |
1081 | device->resync_wenr = enr; | 1092 | if (throttle) { |
1093 | D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); | ||
1094 | D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); | ||
1095 | clear_bit(BME_NO_WRITES, &bm_ext->flags); | ||
1096 | device->resync_wenr = LC_FREE; | ||
1097 | if (lc_put(device->resync, &bm_ext->lce) == 0) { | ||
1098 | bm_ext->flags = 0; | ||
1099 | device->resync_locked--; | ||
1100 | } | ||
1101 | wake_up(&device->al_wait); | ||
1102 | } else | ||
1103 | device->resync_wenr = enr; | ||
1104 | } | ||
1082 | spin_unlock_irq(&device->al_lock); | 1105 | spin_unlock_irq(&device->al_lock); |
1083 | return -EAGAIN; | 1106 | return -EAGAIN; |
1084 | } | 1107 | } |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fa010ea3a4bf..81f4af49b8ac 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -797,6 +797,7 @@ struct drbd_device { | |||
797 | unsigned int al_writ_cnt; | 797 | unsigned int al_writ_cnt; |
798 | unsigned int bm_writ_cnt; | 798 | unsigned int bm_writ_cnt; |
799 | atomic_t ap_bio_cnt; /* Requests we need to complete */ | 799 | atomic_t ap_bio_cnt; /* Requests we need to complete */ |
800 | atomic_t ap_actlog_cnt; /* Requests waiting for activity log */ | ||
800 | atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ | 801 | atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ |
801 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ | 802 | atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ |
802 | atomic_t unacked_cnt; /* Need to send replies for */ | 803 | atomic_t unacked_cnt; /* Need to send replies for */ |
@@ -1454,7 +1455,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); | |||
1454 | extern int drbd_receiver(struct drbd_thread *thi); | 1455 | extern int drbd_receiver(struct drbd_thread *thi); |
1455 | extern int drbd_asender(struct drbd_thread *thi); | 1456 | extern int drbd_asender(struct drbd_thread *thi); |
1456 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); | 1457 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); |
1457 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); | 1458 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, |
1459 | bool throttle_if_app_is_waiting); | ||
1458 | extern int drbd_submit_peer_request(struct drbd_device *, | 1460 | extern int drbd_submit_peer_request(struct drbd_device *, |
1459 | struct drbd_peer_request *, const unsigned, | 1461 | struct drbd_peer_request *, const unsigned, |
1460 | const int); | 1462 | const int); |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 58865969c9f4..ad7c0e8843c4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -1909,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device) | |||
1909 | drbd_set_defaults(device); | 1909 | drbd_set_defaults(device); |
1910 | 1910 | ||
1911 | atomic_set(&device->ap_bio_cnt, 0); | 1911 | atomic_set(&device->ap_bio_cnt, 0); |
1912 | atomic_set(&device->ap_actlog_cnt, 0); | ||
1912 | atomic_set(&device->ap_pending_cnt, 0); | 1913 | atomic_set(&device->ap_pending_cnt, 0); |
1913 | atomic_set(&device->rs_pending_cnt, 0); | 1914 | atomic_set(&device->rs_pending_cnt, 0); |
1914 | atomic_set(&device->unacked_cnt, 0); | 1915 | atomic_set(&device->unacked_cnt, 0); |
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 9059d7bf8a36..06e6147c7601 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -335,6 +335,9 @@ static int drbd_seq_show(struct seq_file *seq, void *v) | |||
335 | lc_seq_printf_stats(seq, device->act_log); | 335 | lc_seq_printf_stats(seq, device->act_log); |
336 | put_ldev(device); | 336 | put_ldev(device); |
337 | } | 337 | } |
338 | |||
339 | if (proc_details >= 2) | ||
340 | seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt)); | ||
338 | } | 341 | } |
339 | rcu_read_unlock(); | 342 | rcu_read_unlock(); |
340 | 343 | ||
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7a1078d285dd..0d3cbd8e4b9c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -2417,13 +2417,14 @@ out_interrupted: | |||
2417 | * The current sync rate used here uses only the most recent two step marks, | 2417 | * The current sync rate used here uses only the most recent two step marks, |
2418 | * to have a short time average so we can react faster. | 2418 | * to have a short time average so we can react faster. |
2419 | */ | 2419 | */ |
2420 | bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | 2420 | bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, |
2421 | bool throttle_if_app_is_waiting) | ||
2421 | { | 2422 | { |
2422 | struct lc_element *tmp; | 2423 | struct lc_element *tmp; |
2423 | bool throttle = true; | 2424 | bool throttle = drbd_rs_c_min_rate_throttle(device); |
2424 | 2425 | ||
2425 | if (!drbd_rs_c_min_rate_throttle(device)) | 2426 | if (!throttle || throttle_if_app_is_waiting) |
2426 | return false; | 2427 | return throttle; |
2427 | 2428 | ||
2428 | spin_lock_irq(&device->al_lock); | 2429 | spin_lock_irq(&device->al_lock); |
2429 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); | 2430 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); |
@@ -2431,7 +2432,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | |||
2431 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | 2432 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); |
2432 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) | 2433 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) |
2433 | throttle = false; | 2434 | throttle = false; |
2434 | /* Do not slow down if app IO is already waiting for this extent */ | 2435 | /* Do not slow down if app IO is already waiting for this extent, |
2436 | * and our progress is necessary for application IO to complete. */ | ||
2435 | } | 2437 | } |
2436 | spin_unlock_irq(&device->al_lock); | 2438 | spin_unlock_irq(&device->al_lock); |
2437 | 2439 | ||
@@ -2456,7 +2458,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) | |||
2456 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + | 2458 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
2457 | (int)part_stat_read(&disk->part0, sectors[1]) - | 2459 | (int)part_stat_read(&disk->part0, sectors[1]) - |
2458 | atomic_read(&device->rs_sect_ev); | 2460 | atomic_read(&device->rs_sect_ev); |
2459 | if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { | 2461 | |
2462 | if (atomic_read(&device->ap_actlog_cnt) | ||
2463 | || !device->rs_last_events || curr_events - device->rs_last_events > 64) { | ||
2460 | unsigned long rs_left; | 2464 | unsigned long rs_left; |
2461 | int i; | 2465 | int i; |
2462 | 2466 | ||
@@ -2646,7 +2650,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2646 | * we would also throttle its application reads. | 2650 | * we would also throttle its application reads. |
2647 | * In that case, throttling is done on the SyncTarget only. | 2651 | * In that case, throttling is done on the SyncTarget only. |
2648 | */ | 2652 | */ |
2649 | if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector)) | 2653 | if (device->state.peer != R_PRIMARY |
2654 | && drbd_rs_should_slow_down(device, sector, false)) | ||
2650 | schedule_timeout_uninterruptible(HZ/10); | 2655 | schedule_timeout_uninterruptible(HZ/10); |
2651 | if (drbd_rs_begin_io(device, sector)) | 2656 | if (drbd_rs_begin_io(device, sector)) |
2652 | goto out_free_e; | 2657 | goto out_free_e; |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3f6a6ed2fd03..74ebef101dc7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -1218,6 +1218,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long | |||
1218 | if (rw == WRITE && req->private_bio && req->i.size | 1218 | if (rw == WRITE && req->private_bio && req->i.size |
1219 | && !test_bit(AL_SUSPENDED, &device->flags)) { | 1219 | && !test_bit(AL_SUSPENDED, &device->flags)) { |
1220 | if (!drbd_al_begin_io_fastpath(device, &req->i)) { | 1220 | if (!drbd_al_begin_io_fastpath(device, &req->i)) { |
1221 | atomic_inc(&device->ap_actlog_cnt); | ||
1221 | drbd_queue_write(device, req); | 1222 | drbd_queue_write(device, req); |
1222 | return NULL; | 1223 | return NULL; |
1223 | } | 1224 | } |
@@ -1354,6 +1355,7 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom | |||
1354 | 1355 | ||
1355 | req->rq_state |= RQ_IN_ACT_LOG; | 1356 | req->rq_state |= RQ_IN_ACT_LOG; |
1356 | req->in_actlog_jif = jiffies; | 1357 | req->in_actlog_jif = jiffies; |
1358 | atomic_dec(&device->ap_actlog_cnt); | ||
1357 | } | 1359 | } |
1358 | 1360 | ||
1359 | list_del_init(&req->tl_requests); | 1361 | list_del_init(&req->tl_requests); |
@@ -1439,6 +1441,7 @@ skip_fast_path: | |||
1439 | list_for_each_entry_safe(req, tmp, &pending, tl_requests) { | 1441 | list_for_each_entry_safe(req, tmp, &pending, tl_requests) { |
1440 | req->rq_state |= RQ_IN_ACT_LOG; | 1442 | req->rq_state |= RQ_IN_ACT_LOG; |
1441 | req->in_actlog_jif = jiffies; | 1443 | req->in_actlog_jif = jiffies; |
1444 | atomic_dec(&device->ap_actlog_cnt); | ||
1442 | list_del_init(&req->tl_requests); | 1445 | list_del_init(&req->tl_requests); |
1443 | drbd_send_and_submit(device, req); | 1446 | drbd_send_and_submit(device, req); |
1444 | } | 1447 | } |
@@ -1454,6 +1457,7 @@ skip_fast_path: | |||
1454 | if (!was_cold) { | 1457 | if (!was_cold) { |
1455 | req->rq_state |= RQ_IN_ACT_LOG; | 1458 | req->rq_state |= RQ_IN_ACT_LOG; |
1456 | req->in_actlog_jif = jiffies; | 1459 | req->in_actlog_jif = jiffies; |
1460 | atomic_dec(&device->ap_actlog_cnt); | ||
1457 | /* Corresponding extent was hot after all? */ | 1461 | /* Corresponding extent was hot after all? */ |
1458 | drbd_send_and_submit(device, req); | 1462 | drbd_send_and_submit(device, req); |
1459 | } else { | 1463 | } else { |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0ff8f4637741..48975a264985 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -395,9 +395,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, | |||
395 | if (!get_ldev(device)) | 395 | if (!get_ldev(device)) |
396 | return -EIO; | 396 | return -EIO; |
397 | 397 | ||
398 | if (drbd_rs_should_slow_down(device, sector)) | ||
399 | goto defer; | ||
400 | |||
401 | /* GFP_TRY, because if there is no memory available right now, this may | 398 | /* GFP_TRY, because if there is no memory available right now, this may |
402 | * be rescheduled for later. It is "only" background resync, after all. */ | 399 | * be rescheduled for later. It is "only" background resync, after all. */ |
403 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, | 400 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, |
@@ -651,8 +648,7 @@ next_sector: | |||
651 | 648 | ||
652 | sector = BM_BIT_TO_SECT(bit); | 649 | sector = BM_BIT_TO_SECT(bit); |
653 | 650 | ||
654 | if (drbd_rs_should_slow_down(device, sector) || | 651 | if (drbd_try_rs_begin_io(device, sector)) { |
655 | drbd_try_rs_begin_io(device, sector)) { | ||
656 | device->bm_resync_fo = bit; | 652 | device->bm_resync_fo = bit; |
657 | goto requeue; | 653 | goto requeue; |
658 | } | 654 | } |
@@ -783,8 +779,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) | |||
783 | 779 | ||
784 | size = BM_BLOCK_SIZE; | 780 | size = BM_BLOCK_SIZE; |
785 | 781 | ||
786 | if (drbd_rs_should_slow_down(device, sector) || | 782 | if (drbd_try_rs_begin_io(device, sector)) { |
787 | drbd_try_rs_begin_io(device, sector)) { | ||
788 | device->ov_position = sector; | 783 | device->ov_position = sector; |
789 | goto requeue; | 784 | goto requeue; |
790 | } | 785 | } |