aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp/iser
diff options
context:
space:
mode:
authorSagi Grimberg <sagig@mellanox.com>2015-10-13 12:12:58 -0400
committerDoug Ledford <dledford@redhat.com>2015-10-28 12:26:06 -0400
commitdd0107a08996c0ab8cac2b98ddbed5313e118e81 (patch)
tree1248e196a4cfc38d3c06b736171249e395e19fe7 /drivers/infiniband/ulp/iser
parent6c760b3dd576329e776b353f2eaefbe2034361b9 (diff)
IB/iser: set block queue_virt_boundary
The block layer can reliably guarantee that SG lists won't contain gaps (page unaligned) if a driver set the queue virt_boundary. With this setting the block layer will: - refuse merges if bios are not aligned to the virtual boundary - split bios/requests that are not aligned to the virtual boundary - or, bounce buffer SG_IOs that are not aligned to the virtual boundary Since iser is working in 4K page size, set the virt_boundary to 4K pages. With this setting, we can now safely remove the bounce buffering logic in iser. Signed-off-by: Sagi Grimberg <sagig@mellanox.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/ulp/iser')
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c12
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h7
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c51
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c274
4 files changed, 18 insertions, 326 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 3d0bdb87a653..2ea0a14125e8 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -762,9 +762,7 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
762 stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ 762 stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
763 stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; 763 stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
764 stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; 764 stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
765 stats->custom_length = 1; 765 stats->custom_length = 0;
766 strcpy(stats->custom[0].desc, "fmr_unalign_cnt");
767 stats->custom[0].value = conn->fmr_unalign_cnt;
768} 766}
769 767
770static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, 768static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
@@ -969,6 +967,13 @@ static umode_t iser_attr_is_visible(int param_type, int param)
969 return 0; 967 return 0;
970} 968}
971 969
970static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
971{
972 blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
973
974 return 0;
975}
976
972static struct scsi_host_template iscsi_iser_sht = { 977static struct scsi_host_template iscsi_iser_sht = {
973 .module = THIS_MODULE, 978 .module = THIS_MODULE,
974 .name = "iSCSI Initiator over iSER", 979 .name = "iSCSI Initiator over iSER",
@@ -982,6 +987,7 @@ static struct scsi_host_template iscsi_iser_sht = {
982 .eh_target_reset_handler = iscsi_eh_recover_target, 987 .eh_target_reset_handler = iscsi_eh_recover_target,
983 .target_alloc = iscsi_target_alloc, 988 .target_alloc = iscsi_target_alloc,
984 .use_clustering = DISABLE_CLUSTERING, 989 .use_clustering = DISABLE_CLUSTERING,
990 .slave_alloc = iscsi_iser_slave_alloc,
985 .proc_name = "iscsi_iser", 991 .proc_name = "iscsi_iser",
986 .this_id = -1, 992 .this_id = -1,
987 .track_queue_depth = 1, 993 .track_queue_depth = 1,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index a5edd6ede692..d8bbad9eb59b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -227,18 +227,13 @@ enum iser_data_dir {
227 * @size: num entries of this sg 227 * @size: num entries of this sg
228 * @data_len: total beffer byte len 228 * @data_len: total beffer byte len
229 * @dma_nents: returned by dma_map_sg 229 * @dma_nents: returned by dma_map_sg
230 * @orig_sg: pointer to the original sg list (in case
231 * we used a copy)
232 * @orig_size: num entris of orig sg list
233 */ 230 */
234struct iser_data_buf { 231struct iser_data_buf {
235 struct scatterlist *sg; 232 struct scatterlist *sg;
236 unsigned int size; 233 unsigned int size;
237 unsigned long data_len; 234 unsigned long data_len;
238 unsigned int dma_nents; 235 unsigned int dma_nents;
239 struct scatterlist *orig_sg; 236};
240 unsigned int orig_size;
241 };
242 237
243/* fwd declarations */ 238/* fwd declarations */
244struct iser_device; 239struct iser_device;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index d511879d8cdf..ffd00c420729 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -661,48 +661,14 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
661 661
662void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) 662void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
663{ 663{
664 int is_rdma_data_aligned = 1;
665 int is_rdma_prot_aligned = 1;
666 int prot_count = scsi_prot_sg_count(iser_task->sc); 664 int prot_count = scsi_prot_sg_count(iser_task->sc);
667 665
668 /* if we were reading, copy back to unaligned sglist,
669 * anyway dma_unmap and free the copy
670 */
671 if (iser_task->data[ISER_DIR_IN].orig_sg) {
672 is_rdma_data_aligned = 0;
673 iser_finalize_rdma_unaligned_sg(iser_task,
674 &iser_task->data[ISER_DIR_IN],
675 ISER_DIR_IN);
676 }
677
678 if (iser_task->data[ISER_DIR_OUT].orig_sg) {
679 is_rdma_data_aligned = 0;
680 iser_finalize_rdma_unaligned_sg(iser_task,
681 &iser_task->data[ISER_DIR_OUT],
682 ISER_DIR_OUT);
683 }
684
685 if (iser_task->prot[ISER_DIR_IN].orig_sg) {
686 is_rdma_prot_aligned = 0;
687 iser_finalize_rdma_unaligned_sg(iser_task,
688 &iser_task->prot[ISER_DIR_IN],
689 ISER_DIR_IN);
690 }
691
692 if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
693 is_rdma_prot_aligned = 0;
694 iser_finalize_rdma_unaligned_sg(iser_task,
695 &iser_task->prot[ISER_DIR_OUT],
696 ISER_DIR_OUT);
697 }
698
699 if (iser_task->dir[ISER_DIR_IN]) { 666 if (iser_task->dir[ISER_DIR_IN]) {
700 iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); 667 iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
701 if (is_rdma_data_aligned) 668 iser_dma_unmap_task_data(iser_task,
702 iser_dma_unmap_task_data(iser_task, 669 &iser_task->data[ISER_DIR_IN],
703 &iser_task->data[ISER_DIR_IN], 670 DMA_FROM_DEVICE);
704 DMA_FROM_DEVICE); 671 if (prot_count)
705 if (prot_count && is_rdma_prot_aligned)
706 iser_dma_unmap_task_data(iser_task, 672 iser_dma_unmap_task_data(iser_task,
707 &iser_task->prot[ISER_DIR_IN], 673 &iser_task->prot[ISER_DIR_IN],
708 DMA_FROM_DEVICE); 674 DMA_FROM_DEVICE);
@@ -710,11 +676,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
710 676
711 if (iser_task->dir[ISER_DIR_OUT]) { 677 if (iser_task->dir[ISER_DIR_OUT]) {
712 iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); 678 iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
713 if (is_rdma_data_aligned) 679 iser_dma_unmap_task_data(iser_task,
714 iser_dma_unmap_task_data(iser_task, 680 &iser_task->data[ISER_DIR_OUT],
715 &iser_task->data[ISER_DIR_OUT], 681 DMA_TO_DEVICE);
716 DMA_TO_DEVICE); 682 if (prot_count)
717 if (prot_count && is_rdma_prot_aligned)
718 iser_dma_unmap_task_data(iser_task, 683 iser_dma_unmap_task_data(iser_task,
719 &iser_task->prot[ISER_DIR_OUT], 684 &iser_task->prot[ISER_DIR_OUT],
720 DMA_TO_DEVICE); 685 DMA_TO_DEVICE);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 4c46d67d37a1..3e0452c4248f 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -88,113 +88,6 @@ int iser_assign_reg_ops(struct iser_device *device)
88 return 0; 88 return 0;
89} 89}
90 90
91static void
92iser_free_bounce_sg(struct iser_data_buf *data)
93{
94 struct scatterlist *sg;
95 int count;
96
97 for_each_sg(data->sg, sg, data->size, count)
98 __free_page(sg_page(sg));
99
100 kfree(data->sg);
101
102 data->sg = data->orig_sg;
103 data->size = data->orig_size;
104 data->orig_sg = NULL;
105 data->orig_size = 0;
106}
107
108static int
109iser_alloc_bounce_sg(struct iser_data_buf *data)
110{
111 struct scatterlist *sg;
112 struct page *page;
113 unsigned long length = data->data_len;
114 int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
115
116 sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
117 if (!sg)
118 goto err;
119
120 sg_init_table(sg, nents);
121 while (length) {
122 u32 page_len = min_t(u32, length, PAGE_SIZE);
123
124 page = alloc_page(GFP_ATOMIC);
125 if (!page)
126 goto err;
127
128 sg_set_page(&sg[i], page, page_len, 0);
129 length -= page_len;
130 i++;
131 }
132
133 data->orig_sg = data->sg;
134 data->orig_size = data->size;
135 data->sg = sg;
136 data->size = nents;
137
138 return 0;
139
140err:
141 for (; i > 0; i--)
142 __free_page(sg_page(&sg[i - 1]));
143 kfree(sg);
144
145 return -ENOMEM;
146}
147
148static void
149iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
150{
151 struct scatterlist *osg, *bsg = data->sg;
152 void *oaddr, *baddr;
153 unsigned int left = data->data_len;
154 unsigned int bsg_off = 0;
155 int i;
156
157 for_each_sg(data->orig_sg, osg, data->orig_size, i) {
158 unsigned int copy_len, osg_off = 0;
159
160 oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
161 copy_len = min(left, osg->length);
162 while (copy_len) {
163 unsigned int len = min(copy_len, bsg->length - bsg_off);
164
165 baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
166 if (to_buffer)
167 memcpy(baddr + bsg_off, oaddr + osg_off, len);
168 else
169 memcpy(oaddr + osg_off, baddr + bsg_off, len);
170
171 kunmap_atomic(baddr - bsg->offset);
172 osg_off += len;
173 bsg_off += len;
174 copy_len -= len;
175
176 if (bsg_off >= bsg->length) {
177 bsg = sg_next(bsg);
178 bsg_off = 0;
179 }
180 }
181 kunmap_atomic(oaddr - osg->offset);
182 left -= osg_off;
183 }
184}
185
186static inline void
187iser_copy_from_bounce(struct iser_data_buf *data)
188{
189 iser_copy_bounce(data, false);
190}
191
192static inline void
193iser_copy_to_bounce(struct iser_data_buf *data)
194{
195 iser_copy_bounce(data, true);
196}
197
198struct iser_fr_desc * 91struct iser_fr_desc *
199iser_reg_desc_get_fr(struct ib_conn *ib_conn) 92iser_reg_desc_get_fr(struct ib_conn *ib_conn)
200{ 93{
@@ -238,62 +131,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
238{ 131{
239} 132}
240 133
241/**
242 * iser_start_rdma_unaligned_sg
243 */
244static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
245 struct iser_data_buf *data,
246 enum iser_data_dir cmd_dir)
247{
248 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
249 int rc;
250
251 rc = iser_alloc_bounce_sg(data);
252 if (rc) {
253 iser_err("Failed to allocate bounce for data len %lu\n",
254 data->data_len);
255 return rc;
256 }
257
258 if (cmd_dir == ISER_DIR_OUT)
259 iser_copy_to_bounce(data);
260
261 data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
262 (cmd_dir == ISER_DIR_OUT) ?
263 DMA_TO_DEVICE : DMA_FROM_DEVICE);
264 if (!data->dma_nents) {
265 iser_err("Got dma_nents %d, something went wrong...\n",
266 data->dma_nents);
267 rc = -ENOMEM;
268 goto err;
269 }
270
271 return 0;
272err:
273 iser_free_bounce_sg(data);
274 return rc;
275}
276
277/**
278 * iser_finalize_rdma_unaligned_sg
279 */
280
281void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
282 struct iser_data_buf *data,
283 enum iser_data_dir cmd_dir)
284{
285 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
286
287 ib_dma_unmap_sg(dev, data->sg, data->size,
288 (cmd_dir == ISER_DIR_OUT) ?
289 DMA_TO_DEVICE : DMA_FROM_DEVICE);
290
291 if (cmd_dir == ISER_DIR_IN)
292 iser_copy_from_bounce(data);
293
294 iser_free_bounce_sg(data);
295}
296
297#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) 134#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
298 135
299/** 136/**
@@ -355,64 +192,6 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
355 return cur_page; 192 return cur_page;
356} 193}
357 194
358
359/**
360 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
361 * for RDMA sub-list of a scatter-gather list of memory buffers, and returns
362 * the number of entries which are aligned correctly. Supports the case where
363 * consecutive SG elements are actually fragments of the same physcial page.
364 */
365static int iser_data_buf_aligned_len(struct iser_data_buf *data,
366 struct ib_device *ibdev,
367 unsigned sg_tablesize)
368{
369 struct scatterlist *sg, *sgl, *next_sg = NULL;
370 u64 start_addr, end_addr;
371 int i, ret_len, start_check = 0;
372
373 if (data->dma_nents == 1)
374 return 1;
375
376 sgl = data->sg;
377 start_addr = ib_sg_dma_address(ibdev, sgl);
378
379 if (unlikely(sgl[0].offset &&
380 data->data_len >= sg_tablesize * PAGE_SIZE)) {
381 iser_dbg("can't register length %lx with offset %x "
382 "fall to bounce buffer\n", data->data_len,
383 sgl[0].offset);
384 return 0;
385 }
386
387 for_each_sg(sgl, sg, data->dma_nents, i) {
388 if (start_check && !IS_4K_ALIGNED(start_addr))
389 break;
390
391 next_sg = sg_next(sg);
392 if (!next_sg)
393 break;
394
395 end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
396 start_addr = ib_sg_dma_address(ibdev, next_sg);
397
398 if (end_addr == start_addr) {
399 start_check = 0;
400 continue;
401 } else
402 start_check = 1;
403
404 if (!IS_4K_ALIGNED(end_addr))
405 break;
406 }
407 ret_len = (next_sg) ? i : i+1;
408
409 if (unlikely(ret_len != data->dma_nents))
410 iser_warn("rdma alignment violation (%d/%d aligned)\n",
411 ret_len, data->dma_nents);
412
413 return ret_len;
414}
415
416static void iser_data_buf_dump(struct iser_data_buf *data, 195static void iser_data_buf_dump(struct iser_data_buf *data,
417 struct ib_device *ibdev) 196 struct ib_device *ibdev)
418{ 197{
@@ -483,31 +262,6 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
483 return 0; 262 return 0;
484} 263}
485 264
486static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
487 struct iser_data_buf *mem,
488 enum iser_data_dir cmd_dir)
489{
490 struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
491 struct iser_device *device = iser_task->iser_conn->ib_conn.device;
492
493 iscsi_conn->fmr_unalign_cnt++;
494
495 if (iser_debug_level > 0)
496 iser_data_buf_dump(mem, device->ib_device);
497
498 /* unmap the command data before accessing it */
499 iser_dma_unmap_task_data(iser_task, mem,
500 (cmd_dir == ISER_DIR_OUT) ?
501 DMA_TO_DEVICE : DMA_FROM_DEVICE);
502
503 /* allocate copy buf, if we are writing, copy the */
504 /* unaligned scatterlist, dma map the copy */
505 if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
506 return -ENOMEM;
507
508 return 0;
509}
510
511/** 265/**
512 * iser_reg_page_vec - Register physical memory 266 * iser_reg_page_vec - Register physical memory
513 * 267 *
@@ -780,26 +534,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
780} 534}
781 535
782static int 536static int
783iser_handle_unaligned_buf(struct iscsi_iser_task *task,
784 struct iser_data_buf *mem,
785 enum iser_data_dir dir)
786{
787 struct iser_conn *iser_conn = task->iser_conn;
788 struct iser_device *device = iser_conn->ib_conn.device;
789 int err, aligned_len;
790
791 aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
792 iser_conn->scsi_sg_tablesize);
793 if (aligned_len != mem->dma_nents) {
794 err = fall_to_bounce_buf(task, mem, dir);
795 if (err)
796 return err;
797 }
798
799 return 0;
800}
801
802static int
803iser_reg_prot_sg(struct iscsi_iser_task *task, 537iser_reg_prot_sg(struct iscsi_iser_task *task,
804 struct iser_data_buf *mem, 538 struct iser_data_buf *mem,
805 struct iser_fr_desc *desc, 539 struct iser_fr_desc *desc,
@@ -841,10 +575,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
841 bool use_dma_key; 575 bool use_dma_key;
842 int err; 576 int err;
843 577
844 err = iser_handle_unaligned_buf(task, mem, dir);
845 if (unlikely(err))
846 return err;
847
848 use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && 578 use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
849 scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); 579 scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
850 580
@@ -867,10 +597,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
867 597
868 if (scsi_prot_sg_count(task->sc)) { 598 if (scsi_prot_sg_count(task->sc)) {
869 mem = &task->prot[dir]; 599 mem = &task->prot[dir];
870 err = iser_handle_unaligned_buf(task, mem, dir);
871 if (unlikely(err))
872 goto err_reg;
873
874 err = iser_reg_prot_sg(task, mem, desc, 600 err = iser_reg_prot_sg(task, mem, desc,
875 use_dma_key, prot_reg); 601 use_dma_key, prot_reg);
876 if (unlikely(err)) 602 if (unlikely(err))