diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2014-04-28 12:43:19 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-04-30 15:46:54 -0400 |
commit | e82998743385ca861b9ec919eb2ba8177ce72180 (patch) | |
tree | 4cf895d0a9633eff1f11363f98ebe60713a4bcd7 | |
parent | 0e49d7b014c5d591a053d08888a455bd74a88646 (diff) |
drbd: don't let application IO pre-empt resync too often
Before, application IO could pre-empt resync activity
for up to hardcoded 20 seconds per resync request.
A very busy server could throttle the effective resync bandwidth
down to one request per 20 seconds.
Now, we only let application IO pre-empt resync traffic
while the current resync rate estimate is above c-min-rate.
If you disable the c-min-rate throttle feature (set c-min-rate = 0),
application IO will no longer pre-empt resync traffic at all.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 13 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 47 |
3 files changed, 34 insertions, 29 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 7e7b0e143655..8dd09a7f23c6 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -1022,8 +1022,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) | |||
1022 | unsigned int enr = BM_SECT_TO_EXT(sector); | 1022 | unsigned int enr = BM_SECT_TO_EXT(sector); |
1023 | struct bm_extent *bm_ext; | 1023 | struct bm_extent *bm_ext; |
1024 | int i, sig; | 1024 | int i, sig; |
1025 | int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. | 1025 | bool sa; |
1026 | 200 times -> 20 seconds. */ | ||
1027 | 1026 | ||
1028 | retry: | 1027 | retry: |
1029 | sig = wait_event_interruptible(device->al_wait, | 1028 | sig = wait_event_interruptible(device->al_wait, |
@@ -1034,12 +1033,15 @@ retry: | |||
1034 | if (test_bit(BME_LOCKED, &bm_ext->flags)) | 1033 | if (test_bit(BME_LOCKED, &bm_ext->flags)) |
1035 | return 0; | 1034 | return 0; |
1036 | 1035 | ||
1036 | /* step aside only while we are above c-min-rate; unless disabled. */ | ||
1037 | sa = drbd_rs_c_min_rate_throttle(device); | ||
1038 | |||
1037 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { | 1039 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { |
1038 | sig = wait_event_interruptible(device->al_wait, | 1040 | sig = wait_event_interruptible(device->al_wait, |
1039 | !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || | 1041 | !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || |
1040 | test_bit(BME_PRIORITY, &bm_ext->flags)); | 1042 | (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); |
1041 | 1043 | ||
1042 | if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { | 1044 | if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { |
1043 | spin_lock_irq(&device->al_lock); | 1045 | spin_lock_irq(&device->al_lock); |
1044 | if (lc_put(device->resync, &bm_ext->lce) == 0) { | 1046 | if (lc_put(device->resync, &bm_ext->lce) == 0) { |
1045 | bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ | 1047 | bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ |
@@ -1051,9 +1053,6 @@ retry: | |||
1051 | return -EINTR; | 1053 | return -EINTR; |
1052 | if (schedule_timeout_interruptible(HZ/10)) | 1054 | if (schedule_timeout_interruptible(HZ/10)) |
1053 | return -EINTR; | 1055 | return -EINTR; |
1054 | if (sa && --sa == 0) | ||
1055 | drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec." | ||
1056 | "Resync stalled?\n"); | ||
1057 | goto retry; | 1056 | goto retry; |
1058 | } | 1057 | } |
1059 | } | 1058 | } |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 361a2e9cd727..f0cabea5cda2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1339,7 +1339,8 @@ extern void start_resync_timer_fn(unsigned long data); | |||
1339 | /* drbd_receiver.c */ | 1339 | /* drbd_receiver.c */ |
1340 | extern int drbd_receiver(struct drbd_thread *thi); | 1340 | extern int drbd_receiver(struct drbd_thread *thi); |
1341 | extern int drbd_asender(struct drbd_thread *thi); | 1341 | extern int drbd_asender(struct drbd_thread *thi); |
1342 | extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); | 1342 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); |
1343 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); | ||
1343 | extern int drbd_submit_peer_request(struct drbd_device *, | 1344 | extern int drbd_submit_peer_request(struct drbd_device *, |
1344 | struct drbd_peer_request *, const unsigned, | 1345 | struct drbd_peer_request *, const unsigned, |
1345 | const int); | 1346 | const int); |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6ffbc22eba0b..10d2dcb16bff 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -2323,39 +2323,45 @@ out_interrupted: | |||
2323 | * The current sync rate used here uses only the most recent two step marks, | 2323 | * The current sync rate used here uses only the most recent two step marks, |
2324 | * to have a short time average so we can react faster. | 2324 | * to have a short time average so we can react faster. |
2325 | */ | 2325 | */ |
2326 | int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | 2326 | bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) |
2327 | { | 2327 | { |
2328 | struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; | ||
2329 | unsigned long db, dt, dbdt; | ||
2330 | struct lc_element *tmp; | 2328 | struct lc_element *tmp; |
2331 | int curr_events; | 2329 | bool throttle = true; |
2332 | int throttle = 0; | ||
2333 | unsigned int c_min_rate; | ||
2334 | 2330 | ||
2335 | rcu_read_lock(); | 2331 | if (!drbd_rs_c_min_rate_throttle(device)) |
2336 | c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; | 2332 | return false; |
2337 | rcu_read_unlock(); | ||
2338 | |||
2339 | /* feature disabled? */ | ||
2340 | if (c_min_rate == 0) | ||
2341 | return 0; | ||
2342 | 2333 | ||
2343 | spin_lock_irq(&device->al_lock); | 2334 | spin_lock_irq(&device->al_lock); |
2344 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); | 2335 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); |
2345 | if (tmp) { | 2336 | if (tmp) { |
2346 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | 2337 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); |
2347 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) { | 2338 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) |
2348 | spin_unlock_irq(&device->al_lock); | 2339 | throttle = false; |
2349 | return 0; | ||
2350 | } | ||
2351 | /* Do not slow down if app IO is already waiting for this extent */ | 2340 | /* Do not slow down if app IO is already waiting for this extent */ |
2352 | } | 2341 | } |
2353 | spin_unlock_irq(&device->al_lock); | 2342 | spin_unlock_irq(&device->al_lock); |
2354 | 2343 | ||
2344 | return throttle; | ||
2345 | } | ||
2346 | |||
2347 | bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) | ||
2348 | { | ||
2349 | struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; | ||
2350 | unsigned long db, dt, dbdt; | ||
2351 | unsigned int c_min_rate; | ||
2352 | int curr_events; | ||
2353 | |||
2354 | rcu_read_lock(); | ||
2355 | c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; | ||
2356 | rcu_read_unlock(); | ||
2357 | |||
2358 | /* feature disabled? */ | ||
2359 | if (c_min_rate == 0) | ||
2360 | return false; | ||
2361 | |||
2355 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + | 2362 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
2356 | (int)part_stat_read(&disk->part0, sectors[1]) - | 2363 | (int)part_stat_read(&disk->part0, sectors[1]) - |
2357 | atomic_read(&device->rs_sect_ev); | 2364 | atomic_read(&device->rs_sect_ev); |
2358 | |||
2359 | if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { | 2365 | if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { |
2360 | unsigned long rs_left; | 2366 | unsigned long rs_left; |
2361 | int i; | 2367 | int i; |
@@ -2378,12 +2384,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | |||
2378 | dbdt = Bit2KB(db/dt); | 2384 | dbdt = Bit2KB(db/dt); |
2379 | 2385 | ||
2380 | if (dbdt > c_min_rate) | 2386 | if (dbdt > c_min_rate) |
2381 | throttle = 1; | 2387 | return true; |
2382 | } | 2388 | } |
2383 | return throttle; | 2389 | return false; |
2384 | } | 2390 | } |
2385 | 2391 | ||
2386 | |||
2387 | static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) | 2392 | static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) |
2388 | { | 2393 | { |
2389 | struct drbd_peer_device *peer_device; | 2394 | struct drbd_peer_device *peer_device; |