aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilipp Reisner <philipp.reisner@linbit.com>2011-11-11 06:31:20 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 10:58:10 -0500
commit380207d08e7c4d1b19c0323777278992b4fbf9d6 (patch)
tree3a35f3543810eb2a7d87f15dd62a8fe8e13aad76
parentd10b4ea32bf2b77a3d56a20992cd549978df7b38 (diff)
drbd: Load balancing of read requests
New config option for the disk secition "read-balancing", with the values: prefer-local, prefer-remote, round-robin, when-congested-remote. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/block/drbd/drbd_req.c57
-rw-r--r--include/linux/drbd.h8
-rw-r--r--include/linux/drbd_genl.h1
-rw-r--r--include/linux/drbd_limits.h1
6 files changed, 68 insertions, 2 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d397681fb7aa..e2cccb40f5af 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -698,6 +698,7 @@ enum {
698 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ 698 AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
699 B_RS_H_DONE, /* Before resync handler done (already executed) */ 699 B_RS_H_DONE, /* Before resync handler done (already executed) */
700 DISCARD_MY_DATA, /* discard_my_data flag per volume */ 700 DISCARD_MY_DATA, /* discard_my_data flag per volume */
701 READ_BALANCE_RR,
701}; 702};
702 703
703struct drbd_bitmap; /* opaque for drbd_conf */ 704struct drbd_bitmap; /* opaque for drbd_conf */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e546dd3fab8a..733b8bd663d5 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
4974 4974
4975 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 4975 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4976 4976
4977 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", 4977 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
4978 (unsigned long long)sector, be32_to_cpu(p->blksize)); 4978 (unsigned long long)sector, be32_to_cpu(p->blksize));
4979 4979
4980 return validate_req_change_req_state(mdev, p->block_id, sector, 4980 return validate_req_change_req_state(mdev, p->block_id, sector,
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index ceb04a94aace..98251e2a7fb7 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
563 if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) 563 if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
564 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); 564 atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
565 565
566 if (!(req->rq_state & RQ_WRITE) &&
567 mdev->state.disk == D_UP_TO_DATE &&
568 !IS_ERR_OR_NULL(req->private_bio))
569 goto goto_read_retry_local;
570
566 /* if it is still queued, we may not complete it here. 571 /* if it is still queued, we may not complete it here.
567 * it will be canceled soon. */ 572 * it will be canceled soon. */
568 if (!(req->rq_state & RQ_NET_QUEUED)) 573 if (!(req->rq_state & RQ_NET_QUEUED))
@@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
625 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); 630 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
626 631
627 req->rq_state |= RQ_NET_DONE; 632 req->rq_state |= RQ_NET_DONE;
633
634 if (!(req->rq_state & RQ_WRITE) &&
635 mdev->state.disk == D_UP_TO_DATE &&
636 !IS_ERR_OR_NULL(req->private_bio))
637 goto goto_read_retry_local;
638
628 _req_may_be_done_not_susp(req, m); 639 _req_may_be_done_not_susp(req, m);
629 /* else: done by HANDED_OVER_TO_NETWORK */ 640 /* else: done by HANDED_OVER_TO_NETWORK */
630 break; 641 break;
631 642
643 goto_read_retry_local:
644 req->rq_state |= RQ_LOCAL_PENDING;
645 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
646 generic_make_request(req->private_bio);
647 break;
648
632 case FAIL_FROZEN_DISK_IO: 649 case FAIL_FROZEN_DISK_IO:
633 if (!(req->rq_state & RQ_LOCAL_COMPLETED)) 650 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
634 break; 651 break;
@@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
689 dec_ap_pending(mdev); 706 dec_ap_pending(mdev);
690 req->rq_state &= ~RQ_NET_PENDING; 707 req->rq_state &= ~RQ_NET_PENDING;
691 req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); 708 req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
709 if (!IS_ERR_OR_NULL(req->private_bio)) {
710 bio_put(req->private_bio);
711 req->private_bio = NULL;
712 put_ldev(mdev);
713 }
692 _req_may_be_done_not_susp(req, m); 714 _req_may_be_done_not_susp(req, m);
693 break; 715 break;
694 }; 716 };
@@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int
723 return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; 745 return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
724} 746}
725 747
748static bool remote_due_to_read_balancing(struct drbd_conf *mdev)
749{
750 enum drbd_read_balancing rbm;
751 struct backing_dev_info *bdi;
752
753 if (mdev->state.pdsk < D_UP_TO_DATE)
754 return false;
755
756 rcu_read_lock();
757 rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing;
758 rcu_read_unlock();
759
760 switch (rbm) {
761 case RB_CONGESTED_REMOTE:
762 bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
763 return bdi_read_congested(bdi);
764 case RB_LEAST_PENDING:
765 return atomic_read(&mdev->local_cnt) >
766 atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
767 case RB_ROUND_ROBIN:
768 return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
769 case RB_PREFER_REMOTE:
770 return true;
771 case RB_PREFER_LOCAL:
772 default:
773 return false;
774 }
775}
776
726/* 777/*
727 * complete_conflicting_writes - wait for any conflicting write requests 778 * complete_conflicting_writes - wait for any conflicting write requests
728 * 779 *
@@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
790 bio_put(req->private_bio); 841 bio_put(req->private_bio);
791 req->private_bio = NULL; 842 req->private_bio = NULL;
792 put_ldev(mdev); 843 put_ldev(mdev);
844 } else if (remote_due_to_read_balancing(mdev)) {
845 /* Keep the private bio in case we need it
846 for a local retry */
847 local = 0;
793 } 848 }
794 } 849 }
795 remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; 850 remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
@@ -1017,7 +1072,7 @@ fail_free_complete:
1017 if (req->rq_state & RQ_IN_ACT_LOG) 1072 if (req->rq_state & RQ_IN_ACT_LOG)
1018 drbd_al_complete_io(mdev, &req->i); 1073 drbd_al_complete_io(mdev, &req->i);
1019fail_and_free_req: 1074fail_and_free_req:
1020 if (local) { 1075 if (!IS_ERR_OR_NULL(req->private_bio)) {
1021 bio_put(req->private_bio); 1076 bio_put(req->private_bio);
1022 req->private_bio = NULL; 1077 req->private_bio = NULL;
1023 put_ldev(mdev); 1078 put_ldev(mdev);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1e9f754b66ac..157ba3d74dc7 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -102,6 +102,14 @@ enum drbd_on_congestion {
102 OC_DISCONNECT, 102 OC_DISCONNECT,
103}; 103};
104 104
105enum drbd_read_balancing {
106 RB_PREFER_LOCAL,
107 RB_PREFER_REMOTE,
108 RB_ROUND_ROBIN,
109 RB_LEAST_PENDING,
110 RB_CONGESTED_REMOTE,
111};
112
105/* KEEP the order, do not delete or insert. Only append. */ 113/* KEEP the order, do not delete or insert. Only append. */
106enum drbd_ret_code { 114enum drbd_ret_code {
107 ERR_CODE_BASE = 100, 115 ERR_CODE_BASE = 100,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 2e6cefefe5e5..826008f297fe 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
129 __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) 129 __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF)
130 __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) 130 __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF)
131 __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) 131 __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF)
132 __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF)
132) 133)
133 134
134GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, 135GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 6d0a24331ed2..17ef66a5c114 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -161,6 +161,7 @@
161#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT 161#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
162#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR 162#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
163#define DRBD_ON_CONGESTION_DEF OC_BLOCK 163#define DRBD_ON_CONGESTION_DEF OC_BLOCK
164#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
164 165
165#define DRBD_MAX_BIO_BVECS_MIN 0 166#define DRBD_MAX_BIO_BVECS_MIN 0
166#define DRBD_MAX_BIO_BVECS_MAX 128 167#define DRBD_MAX_BIO_BVECS_MAX 128