aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2010-11-07 09:56:29 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-03-10 05:34:53 -0500
commite3555d8545976703938d1b59e2db509426dbe02c (patch)
treea29b15c4ab07b51bf6e162ae68796c50718778aa
parent59817f4fab6a165ba83ce399464ba38432db8233 (diff)
drbd: Implemented priority inheritance for resync requests
We only issue resync requests if there is no significant application IO going on. = Application IO has higher priority than resnyc IO. If application IO can not be started because the resync process locked an resync_lru entry, start the IO operations necessary to release the lock ASAP. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_actlog.c4
-rw-r--r--drivers/block/drbd/drbd_int.h3
-rw-r--r--drivers/block/drbd/drbd_receiver.c22
-rw-r--r--drivers/block/drbd/drbd_worker.c18
4 files changed, 27 insertions, 20 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 33f6cc537d08..28f85d950781 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -182,6 +182,7 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
182 if (unlikely(tmp != NULL)) { 182 if (unlikely(tmp != NULL)) {
183 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 183 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
184 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) { 184 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
185 set_bit(BME_PRIORITY, &bm_ext->flags);
185 spin_unlock_irq(&mdev->al_lock); 186 spin_unlock_irq(&mdev->al_lock);
186 return NULL; 187 return NULL;
187 } 188 }
@@ -1297,8 +1298,7 @@ void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
1297 } 1298 }
1298 1299
1299 if (lc_put(mdev->resync, &bm_ext->lce) == 0) { 1300 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
1300 clear_bit(BME_LOCKED, &bm_ext->flags); 1301 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
1301 clear_bit(BME_NO_WRITES, &bm_ext->flags);
1302 mdev->resync_locked--; 1302 mdev->resync_locked--;
1303 wake_up(&mdev->al_wait); 1303 wake_up(&mdev->al_wait);
1304 } 1304 }
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index da02cce374c9..366873d661b8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1326,6 +1326,7 @@ struct bm_extent {
1326 1326
1327#define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */ 1327#define BME_NO_WRITES 0 /* bm_extent.flags: no more requests on this one! */
1328#define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */ 1328#define BME_LOCKED 1 /* bm_extent.flags: syncer active on this one. */
1329#define BME_PRIORITY 2 /* finish resync IO on this extent ASAP! App IO waiting! */
1329 1330
1330/* drbd_bitmap.c */ 1331/* drbd_bitmap.c */
1331/* 1332/*
@@ -1552,7 +1553,7 @@ extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
1552extern void resync_timer_fn(unsigned long data); 1553extern void resync_timer_fn(unsigned long data);
1553 1554
1554/* drbd_receiver.c */ 1555/* drbd_receiver.c */
1555extern int drbd_rs_should_slow_down(struct drbd_conf *mdev); 1556extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
1556extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1557extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1557 const unsigned rw, const int fault_type); 1558 const unsigned rw, const int fault_type);
1558extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); 1559extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ee9238e59327..0630a2e122d3 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1862,10 +1862,11 @@ out_interrupted:
1862 * The current sync rate used here uses only the most recent two step marks, 1862 * The current sync rate used here uses only the most recent two step marks,
1863 * to have a short time average so we can react faster. 1863 * to have a short time average so we can react faster.
1864 */ 1864 */
1865int drbd_rs_should_slow_down(struct drbd_conf *mdev) 1865int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
1866{ 1866{
1867 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; 1867 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1868 unsigned long db, dt, dbdt; 1868 unsigned long db, dt, dbdt;
1869 struct lc_element *tmp;
1869 int curr_events; 1870 int curr_events;
1870 int throttle = 0; 1871 int throttle = 0;
1871 1872
@@ -1873,9 +1874,22 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev)
1873 if (mdev->sync_conf.c_min_rate == 0) 1874 if (mdev->sync_conf.c_min_rate == 0)
1874 return 0; 1875 return 0;
1875 1876
1877 spin_lock_irq(&mdev->al_lock);
1878 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1879 if (tmp) {
1880 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1881 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1882 spin_unlock_irq(&mdev->al_lock);
1883 return 0;
1884 }
1885 /* Do not slow down if app IO is already waiting for this extent */
1886 }
1887 spin_unlock_irq(&mdev->al_lock);
1888
1876 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + 1889 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1877 (int)part_stat_read(&disk->part0, sectors[1]) - 1890 (int)part_stat_read(&disk->part0, sectors[1]) -
1878 atomic_read(&mdev->rs_sect_ev); 1891 atomic_read(&mdev->rs_sect_ev);
1892
1879 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { 1893 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1880 unsigned long rs_left; 1894 unsigned long rs_left;
1881 int i; 1895 int i;
@@ -2060,9 +2074,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un
2060 * we would also throttle its application reads. 2074 * we would also throttle its application reads.
2061 * In that case, throttling is done on the SyncTarget only. 2075 * In that case, throttling is done on the SyncTarget only.
2062 */ 2076 */
2063 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) 2077 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2064 msleep(100); 2078 schedule_timeout_uninterruptible(HZ/10);
2065 if (drbd_rs_begin_io(mdev, e->sector)) 2079 if (drbd_rs_begin_io(mdev, sector))
2066 goto out_free_e; 2080 goto out_free_e;
2067 2081
2068submit_for_resync: 2082submit_for_resync:
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 982d68432a0f..4008130f2b2c 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -355,7 +355,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
355 if (!get_ldev(mdev)) 355 if (!get_ldev(mdev))
356 return -EIO; 356 return -EIO;
357 357
358 if (drbd_rs_should_slow_down(mdev)) 358 if (drbd_rs_should_slow_down(mdev, sector))
359 goto defer; 359 goto defer;
360 360
361 /* GFP_TRY, because if there is no memory available right now, this may 361 /* GFP_TRY, because if there is no memory available right now, this may
@@ -503,16 +503,6 @@ int drbd_rs_number_requests(struct drbd_conf *mdev)
503 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ); 503 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
504 } 504 }
505 505
506 /* Throttle resync on lower level disk activity, which may also be
507 * caused by application IO on Primary/SyncTarget.
508 * Keep this after the call to drbd_rs_controller, as that assumes
509 * to be called as precisely as possible every SLEEP_TIME,
510 * and would be confused otherwise. */
511 if (number && drbd_rs_should_slow_down(mdev)) {
512 mdev->c_sync_rate = 1;
513 number = 0;
514 }
515
516 /* ignore the amount of pending requests, the resync controller should 506 /* ignore the amount of pending requests, the resync controller should
517 * throttle down to incoming reply rate soon enough anyways. */ 507 * throttle down to incoming reply rate soon enough anyways. */
518 return number; 508 return number;
@@ -594,7 +584,8 @@ next_sector:
594 584
595 sector = BM_BIT_TO_SECT(bit); 585 sector = BM_BIT_TO_SECT(bit);
596 586
597 if (drbd_try_rs_begin_io(mdev, sector)) { 587 if (drbd_rs_should_slow_down(mdev, sector) ||
588 drbd_try_rs_begin_io(mdev, sector)) {
598 mdev->bm_resync_fo = bit; 589 mdev->bm_resync_fo = bit;
599 goto requeue; 590 goto requeue;
600 } 591 }
@@ -719,7 +710,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
719 710
720 size = BM_BLOCK_SIZE; 711 size = BM_BLOCK_SIZE;
721 712
722 if (drbd_try_rs_begin_io(mdev, sector)) { 713 if (drbd_rs_should_slow_down(mdev, sector) ||
714 drbd_try_rs_begin_io(mdev, sector)) {
723 mdev->ov_position = sector; 715 mdev->ov_position = sector;
724 goto requeue; 716 goto requeue;
725 } 717 }