diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2011-11-11 06:31:20 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-08 10:58:10 -0500 |
commit | 380207d08e7c4d1b19c0323777278992b4fbf9d6 (patch) | |
tree | 3a35f3543810eb2a7d87f15dd62a8fe8e13aad76 | |
parent | d10b4ea32bf2b77a3d56a20992cd549978df7b38 (diff) |
drbd: Load balancing of read requests
New config option for the disk secition "read-balancing", with
the values: prefer-local, prefer-remote, round-robin, when-congested-remote.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 57 | ||||
-rw-r--r-- | include/linux/drbd.h | 8 | ||||
-rw-r--r-- | include/linux/drbd_genl.h | 1 | ||||
-rw-r--r-- | include/linux/drbd_limits.h | 1 |
6 files changed, 68 insertions, 2 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d397681fb7aa..e2cccb40f5af 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -698,6 +698,7 @@ enum { | |||
698 | AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ | 698 | AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ |
699 | B_RS_H_DONE, /* Before resync handler done (already executed) */ | 699 | B_RS_H_DONE, /* Before resync handler done (already executed) */ |
700 | DISCARD_MY_DATA, /* discard_my_data flag per volume */ | 700 | DISCARD_MY_DATA, /* discard_my_data flag per volume */ |
701 | READ_BALANCE_RR, | ||
701 | }; | 702 | }; |
702 | 703 | ||
703 | struct drbd_bitmap; /* opaque for drbd_conf */ | 704 | struct drbd_bitmap; /* opaque for drbd_conf */ |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e546dd3fab8a..733b8bd663d5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi) | |||
4974 | 4974 | ||
4975 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); | 4975 | update_peer_seq(mdev, be32_to_cpu(p->seq_num)); |
4976 | 4976 | ||
4977 | dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", | 4977 | dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n", |
4978 | (unsigned long long)sector, be32_to_cpu(p->blksize)); | 4978 | (unsigned long long)sector, be32_to_cpu(p->blksize)); |
4979 | 4979 | ||
4980 | return validate_req_change_req_state(mdev, p->block_id, sector, | 4980 | return validate_req_change_req_state(mdev, p->block_id, sector, |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ceb04a94aace..98251e2a7fb7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
563 | if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) | 563 | if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) |
564 | atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); | 564 | atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); |
565 | 565 | ||
566 | if (!(req->rq_state & RQ_WRITE) && | ||
567 | mdev->state.disk == D_UP_TO_DATE && | ||
568 | !IS_ERR_OR_NULL(req->private_bio)) | ||
569 | goto goto_read_retry_local; | ||
570 | |||
566 | /* if it is still queued, we may not complete it here. | 571 | /* if it is still queued, we may not complete it here. |
567 | * it will be canceled soon. */ | 572 | * it will be canceled soon. */ |
568 | if (!(req->rq_state & RQ_NET_QUEUED)) | 573 | if (!(req->rq_state & RQ_NET_QUEUED)) |
@@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
625 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); | 630 | req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); |
626 | 631 | ||
627 | req->rq_state |= RQ_NET_DONE; | 632 | req->rq_state |= RQ_NET_DONE; |
633 | |||
634 | if (!(req->rq_state & RQ_WRITE) && | ||
635 | mdev->state.disk == D_UP_TO_DATE && | ||
636 | !IS_ERR_OR_NULL(req->private_bio)) | ||
637 | goto goto_read_retry_local; | ||
638 | |||
628 | _req_may_be_done_not_susp(req, m); | 639 | _req_may_be_done_not_susp(req, m); |
629 | /* else: done by HANDED_OVER_TO_NETWORK */ | 640 | /* else: done by HANDED_OVER_TO_NETWORK */ |
630 | break; | 641 | break; |
631 | 642 | ||
643 | goto_read_retry_local: | ||
644 | req->rq_state |= RQ_LOCAL_PENDING; | ||
645 | req->private_bio->bi_bdev = mdev->ldev->backing_bdev; | ||
646 | generic_make_request(req->private_bio); | ||
647 | break; | ||
648 | |||
632 | case FAIL_FROZEN_DISK_IO: | 649 | case FAIL_FROZEN_DISK_IO: |
633 | if (!(req->rq_state & RQ_LOCAL_COMPLETED)) | 650 | if (!(req->rq_state & RQ_LOCAL_COMPLETED)) |
634 | break; | 651 | break; |
@@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
689 | dec_ap_pending(mdev); | 706 | dec_ap_pending(mdev); |
690 | req->rq_state &= ~RQ_NET_PENDING; | 707 | req->rq_state &= ~RQ_NET_PENDING; |
691 | req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); | 708 | req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); |
709 | if (!IS_ERR_OR_NULL(req->private_bio)) { | ||
710 | bio_put(req->private_bio); | ||
711 | req->private_bio = NULL; | ||
712 | put_ldev(mdev); | ||
713 | } | ||
692 | _req_may_be_done_not_susp(req, m); | 714 | _req_may_be_done_not_susp(req, m); |
693 | break; | 715 | break; |
694 | }; | 716 | }; |
@@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int | |||
723 | return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; | 745 | return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0; |
724 | } | 746 | } |
725 | 747 | ||
748 | static bool remote_due_to_read_balancing(struct drbd_conf *mdev) | ||
749 | { | ||
750 | enum drbd_read_balancing rbm; | ||
751 | struct backing_dev_info *bdi; | ||
752 | |||
753 | if (mdev->state.pdsk < D_UP_TO_DATE) | ||
754 | return false; | ||
755 | |||
756 | rcu_read_lock(); | ||
757 | rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing; | ||
758 | rcu_read_unlock(); | ||
759 | |||
760 | switch (rbm) { | ||
761 | case RB_CONGESTED_REMOTE: | ||
762 | bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info; | ||
763 | return bdi_read_congested(bdi); | ||
764 | case RB_LEAST_PENDING: | ||
765 | return atomic_read(&mdev->local_cnt) > | ||
766 | atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt); | ||
767 | case RB_ROUND_ROBIN: | ||
768 | return test_and_change_bit(READ_BALANCE_RR, &mdev->flags); | ||
769 | case RB_PREFER_REMOTE: | ||
770 | return true; | ||
771 | case RB_PREFER_LOCAL: | ||
772 | default: | ||
773 | return false; | ||
774 | } | ||
775 | } | ||
776 | |||
726 | /* | 777 | /* |
727 | * complete_conflicting_writes - wait for any conflicting write requests | 778 | * complete_conflicting_writes - wait for any conflicting write requests |
728 | * | 779 | * |
@@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s | |||
790 | bio_put(req->private_bio); | 841 | bio_put(req->private_bio); |
791 | req->private_bio = NULL; | 842 | req->private_bio = NULL; |
792 | put_ldev(mdev); | 843 | put_ldev(mdev); |
844 | } else if (remote_due_to_read_balancing(mdev)) { | ||
845 | /* Keep the private bio in case we need it | ||
846 | for a local retry */ | ||
847 | local = 0; | ||
793 | } | 848 | } |
794 | } | 849 | } |
795 | remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; | 850 | remote = !local && mdev->state.pdsk >= D_UP_TO_DATE; |
@@ -1017,7 +1072,7 @@ fail_free_complete: | |||
1017 | if (req->rq_state & RQ_IN_ACT_LOG) | 1072 | if (req->rq_state & RQ_IN_ACT_LOG) |
1018 | drbd_al_complete_io(mdev, &req->i); | 1073 | drbd_al_complete_io(mdev, &req->i); |
1019 | fail_and_free_req: | 1074 | fail_and_free_req: |
1020 | if (local) { | 1075 | if (!IS_ERR_OR_NULL(req->private_bio)) { |
1021 | bio_put(req->private_bio); | 1076 | bio_put(req->private_bio); |
1022 | req->private_bio = NULL; | 1077 | req->private_bio = NULL; |
1023 | put_ldev(mdev); | 1078 | put_ldev(mdev); |
diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 1e9f754b66ac..157ba3d74dc7 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h | |||
@@ -102,6 +102,14 @@ enum drbd_on_congestion { | |||
102 | OC_DISCONNECT, | 102 | OC_DISCONNECT, |
103 | }; | 103 | }; |
104 | 104 | ||
105 | enum drbd_read_balancing { | ||
106 | RB_PREFER_LOCAL, | ||
107 | RB_PREFER_REMOTE, | ||
108 | RB_ROUND_ROBIN, | ||
109 | RB_LEAST_PENDING, | ||
110 | RB_CONGESTED_REMOTE, | ||
111 | }; | ||
112 | |||
105 | /* KEEP the order, do not delete or insert. Only append. */ | 113 | /* KEEP the order, do not delete or insert. Only append. */ |
106 | enum drbd_ret_code { | 114 | enum drbd_ret_code { |
107 | ERR_CODE_BASE = 100, | 115 | ERR_CODE_BASE = 100, |
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 2e6cefefe5e5..826008f297fe 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h | |||
@@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, | |||
129 | __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) | 129 | __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) |
130 | __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) | 130 | __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) |
131 | __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) | 131 | __u32_field_def(20, DRBD_GENLA_F_MANDATORY, disk_timeout, DRBD_DISK_TIMEOUT_DEF) |
132 | __u32_field_def(21, 0 /* OPTIONAL */, read_balancing, DRBD_READ_BALANCING_DEF) | ||
132 | ) | 133 | ) |
133 | 134 | ||
134 | GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, | 135 | GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, |
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 6d0a24331ed2..17ef66a5c114 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h | |||
@@ -161,6 +161,7 @@ | |||
161 | #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT | 161 | #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT |
162 | #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR | 162 | #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR |
163 | #define DRBD_ON_CONGESTION_DEF OC_BLOCK | 163 | #define DRBD_ON_CONGESTION_DEF OC_BLOCK |
164 | #define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL | ||
164 | 165 | ||
165 | #define DRBD_MAX_BIO_BVECS_MIN 0 | 166 | #define DRBD_MAX_BIO_BVECS_MIN 0 |
166 | #define DRBD_MAX_BIO_BVECS_MAX 128 | 167 | #define DRBD_MAX_BIO_BVECS_MAX 128 |