diff options
author | David Dillow <dillowda@ornl.gov> | 2010-11-26 15:34:46 -0500 |
---|---|---|
committer | David Dillow <dillowda@ornl.gov> | 2011-01-10 15:44:51 -0500 |
commit | 9af762719e8f8fa282de02997dced593030eb238 (patch) | |
tree | 707ebec104ed79499935fe6367357121fbb6906e | |
parent | e9684678221441f886b4d7c74f8770bb0981737a (diff) |
IB/srp: consolidate hot-path variables into cache lines
Put the variables accessed together in the hot-path into common
cachelines, and separate them by RW vs RO to avoid false dirtying.
We keep a local copy of the lkey and rkey in the target to avoid
traversing pointers (and associated cache lines) to find them.
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: David Dillow <dillowda@ornl.gov>
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 31 |
2 files changed, 26 insertions, 17 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 8691fc83f70b..4b62105ed1e8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c | |||
@@ -768,7 +768,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, | |||
768 | struct srp_direct_buf *buf = (void *) cmd->add_data; | 768 | struct srp_direct_buf *buf = (void *) cmd->add_data; |
769 | 769 | ||
770 | buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); | 770 | buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); |
771 | buf->key = cpu_to_be32(dev->mr->rkey); | 771 | buf->key = cpu_to_be32(target->rkey); |
772 | buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); | 772 | buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); |
773 | } else if (srp_map_fmr(target, scat, count, req, | 773 | } else if (srp_map_fmr(target, scat, count, req, |
774 | (void *) cmd->add_data)) { | 774 | (void *) cmd->add_data)) { |
@@ -793,7 +793,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, | |||
793 | buf->desc_list[i].va = | 793 | buf->desc_list[i].va = |
794 | cpu_to_be64(ib_sg_dma_address(ibdev, sg)); | 794 | cpu_to_be64(ib_sg_dma_address(ibdev, sg)); |
795 | buf->desc_list[i].key = | 795 | buf->desc_list[i].key = |
796 | cpu_to_be32(dev->mr->rkey); | 796 | cpu_to_be32(target->rkey); |
797 | buf->desc_list[i].len = cpu_to_be32(dma_len); | 797 | buf->desc_list[i].len = cpu_to_be32(dma_len); |
798 | datalen += dma_len; | 798 | datalen += dma_len; |
799 | } | 799 | } |
@@ -806,7 +806,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, | |||
806 | buf->table_desc.va = | 806 | buf->table_desc.va = |
807 | cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf); | 807 | cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf); |
808 | buf->table_desc.key = | 808 | buf->table_desc.key = |
809 | cpu_to_be32(target->srp_host->srp_dev->mr->rkey); | 809 | cpu_to_be32(target->rkey); |
810 | buf->table_desc.len = | 810 | buf->table_desc.len = |
811 | cpu_to_be32(count * sizeof (struct srp_direct_buf)); | 811 | cpu_to_be32(count * sizeof (struct srp_direct_buf)); |
812 | 812 | ||
@@ -883,7 +883,7 @@ static int srp_post_send(struct srp_target_port *target, | |||
883 | 883 | ||
884 | list.addr = iu->dma; | 884 | list.addr = iu->dma; |
885 | list.length = len; | 885 | list.length = len; |
886 | list.lkey = target->srp_host->srp_dev->mr->lkey; | 886 | list.lkey = target->lkey; |
887 | 887 | ||
888 | wr.next = NULL; | 888 | wr.next = NULL; |
889 | wr.wr_id = (uintptr_t) iu; | 889 | wr.wr_id = (uintptr_t) iu; |
@@ -902,7 +902,7 @@ static int srp_post_recv(struct srp_target_port *target, struct srp_iu *iu) | |||
902 | 902 | ||
903 | list.addr = iu->dma; | 903 | list.addr = iu->dma; |
904 | list.length = iu->size; | 904 | list.length = iu->size; |
905 | list.lkey = target->srp_host->srp_dev->mr->lkey; | 905 | list.lkey = target->lkey; |
906 | 906 | ||
907 | wr.next = NULL; | 907 | wr.next = NULL; |
908 | wr.wr_id = (uintptr_t) iu; | 908 | wr.wr_id = (uintptr_t) iu; |
@@ -1955,6 +1955,8 @@ static ssize_t srp_create_target(struct device *dev, | |||
1955 | target->io_class = SRP_REV16A_IB_IO_CLASS; | 1955 | target->io_class = SRP_REV16A_IB_IO_CLASS; |
1956 | target->scsi_host = target_host; | 1956 | target->scsi_host = target_host; |
1957 | target->srp_host = host; | 1957 | target->srp_host = host; |
1958 | target->lkey = host->srp_dev->mr->lkey; | ||
1959 | target->rkey = host->srp_dev->mr->rkey; | ||
1958 | 1960 | ||
1959 | spin_lock_init(&target->lock); | 1961 | spin_lock_init(&target->lock); |
1960 | INIT_LIST_HEAD(&target->free_tx); | 1962 | INIT_LIST_HEAD(&target->free_tx); |
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index acb435d3c1e3..9dc6fc3fd894 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h | |||
@@ -117,6 +117,24 @@ struct srp_request { | |||
117 | }; | 117 | }; |
118 | 118 | ||
119 | struct srp_target_port { | 119 | struct srp_target_port { |
120 | /* These are RW in the hot path, and commonly used together */ | ||
121 | struct list_head free_tx; | ||
122 | struct list_head free_reqs; | ||
123 | spinlock_t lock; | ||
124 | s32 req_lim; | ||
125 | |||
126 | /* These are read-only in the hot path */ | ||
127 | struct ib_cq *send_cq ____cacheline_aligned_in_smp; | ||
128 | struct ib_cq *recv_cq; | ||
129 | struct ib_qp *qp; | ||
130 | u32 lkey; | ||
131 | u32 rkey; | ||
132 | enum srp_target_state state; | ||
133 | |||
134 | /* Everything above this point is used in the hot path of | ||
135 | * command processing. Try to keep them packed into cachelines. | ||
136 | */ | ||
137 | |||
120 | __be64 id_ext; | 138 | __be64 id_ext; |
121 | __be64 ioc_guid; | 139 | __be64 ioc_guid; |
122 | __be64 service_id; | 140 | __be64 service_id; |
@@ -133,23 +151,13 @@ struct srp_target_port { | |||
133 | int path_query_id; | 151 | int path_query_id; |
134 | 152 | ||
135 | struct ib_cm_id *cm_id; | 153 | struct ib_cm_id *cm_id; |
136 | struct ib_cq *recv_cq; | ||
137 | struct ib_cq *send_cq; | ||
138 | struct ib_qp *qp; | ||
139 | 154 | ||
140 | int max_ti_iu_len; | 155 | int max_ti_iu_len; |
141 | s32 req_lim; | ||
142 | 156 | ||
143 | int zero_req_lim; | 157 | int zero_req_lim; |
144 | 158 | ||
145 | struct srp_iu *rx_ring[SRP_RQ_SIZE]; | ||
146 | |||
147 | spinlock_t lock; | ||
148 | |||
149 | struct list_head free_tx; | ||
150 | struct srp_iu *tx_ring[SRP_SQ_SIZE]; | 159 | struct srp_iu *tx_ring[SRP_SQ_SIZE]; |
151 | 160 | struct srp_iu *rx_ring[SRP_RQ_SIZE]; | |
152 | struct list_head free_reqs; | ||
153 | struct srp_request req_ring[SRP_CMD_SQ_SIZE]; | 161 | struct srp_request req_ring[SRP_CMD_SQ_SIZE]; |
154 | 162 | ||
155 | struct work_struct work; | 163 | struct work_struct work; |
@@ -157,7 +165,6 @@ struct srp_target_port { | |||
157 | struct list_head list; | 165 | struct list_head list; |
158 | struct completion done; | 166 | struct completion done; |
159 | int status; | 167 | int status; |
160 | enum srp_target_state state; | ||
161 | int qp_in_error; | 168 | int qp_in_error; |
162 | 169 | ||
163 | struct completion tsk_mgmt_done; | 170 | struct completion tsk_mgmt_done; |