summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Smart <jsmart2021@gmail.com>2019-08-14 19:57:09 -0400
committerMartin K. Petersen <martin.petersen@oracle.com>2019-08-19 22:41:12 -0400
commitd79c9e9d4b3d9330ee38f392a7c98e0fc494f7f8 (patch)
tree088e0dc43e6f8415f9bd529e49a3e072c89631d9
parente62245d923caebc02582b12ce861c3d780b4106f (diff)
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware.
Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI to contain the exchanges Scatter/Gather List. This caps the number of SGL elements that can be in the SGL. There are not extensions to extend the list out of the 2 pages. The G7 hardware adds a SGE type that allows the SGL to be vectored to a different scatter/gather list segment. And that segment can contain a SGE to go to another segment and so on. The initial segment must still be pre-registered for the XRI, but it can be a much smaller amount (256Bytes) as it can now be dynamically grown. This much smaller allocation can handle the SG list for most normal I/O, and the dynamic aspect allows it to support many MB's if needed. The implementation creates a pool which contains "segments" and which is initially sized to hold the initial small segment per xri. If an I/O requires additional segments, they are allocated from the pool. If the pool has no more segments, the pool is grown based on what is now needed. After the I/O completes, the additional segments are returned to the pool for use by other I/Os. Once allocated, the additional segments are not released under the assumption of "if needed once, it will be needed again". Pools are kept on a per-hardware queue basis, which is typically 1:1 per cpu, but may be shared by multiple cpus. The switch to the smaller initial allocation significantly reduces the memory footprint of the driver (which only grows if large ios are issued). Based on the several K of XRIs for the adapter, the 8KB->256B reduction can conserve 32MBs or more. It has been observed with per-cpu resource pools that allocating a resource on CPU A, may be put back on CPU B. While the get routines are distributed evenly, only a limited subset of CPUs may be handling the put routines. This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because all the resources are being put on a limited subset of CPUs. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/lpfc/lpfc.h5
-rw-r--r--drivers/scsi/lpfc/lpfc_hw4.h20
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c313
-rw-r--r--drivers/scsi/lpfc/lpfc_mem.c44
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c94
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c355
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c292
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.h11
-rw-r--r--drivers/scsi/lpfc/lpfc_sli4.h18
9 files changed, 904 insertions, 248 deletions
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 2c3bb8a966e5..b738639fdf3f 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -51,6 +51,8 @@ struct lpfc_sli2_slim;
51 cmnd for menlo needs nearly twice as for firmware 51 cmnd for menlo needs nearly twice as for firmware
52 downloads using bsg */ 52 downloads using bsg */
53 53
54#define LPFC_DEFAULT_XPSGL_SIZE 256
55#define LPFC_MAX_SG_TABLESIZE 0xffff
54#define LPFC_MIN_SG_SLI4_BUF_SZ 0x800 /* based on LPFC_DEFAULT_SG_SEG_CNT */ 56#define LPFC_MIN_SG_SLI4_BUF_SZ 0x800 /* based on LPFC_DEFAULT_SG_SEG_CNT */
55#define LPFC_MAX_BG_SLI4_SEG_CNT_DIF 128 /* sg element count for BlockGuard */ 57#define LPFC_MAX_BG_SLI4_SEG_CNT_DIF 128 /* sg element count for BlockGuard */
56#define LPFC_MAX_SG_SEG_CNT_DIF 512 /* sg element count per scsi cmnd */ 58#define LPFC_MAX_SG_SEG_CNT_DIF 512 /* sg element count per scsi cmnd */
@@ -799,6 +801,7 @@ struct lpfc_hba {
799 /* HBA Config Parameters */ 801 /* HBA Config Parameters */
800 uint32_t cfg_ack0; 802 uint32_t cfg_ack0;
801 uint32_t cfg_xri_rebalancing; 803 uint32_t cfg_xri_rebalancing;
804 uint32_t cfg_xpsgl;
802 uint32_t cfg_enable_npiv; 805 uint32_t cfg_enable_npiv;
803 uint32_t cfg_enable_rrq; 806 uint32_t cfg_enable_rrq;
804 uint32_t cfg_topology; 807 uint32_t cfg_topology;
@@ -904,6 +907,7 @@ struct lpfc_hba {
904 wait_queue_head_t work_waitq; 907 wait_queue_head_t work_waitq;
905 struct task_struct *worker_thread; 908 struct task_struct *worker_thread;
906 unsigned long data_flags; 909 unsigned long data_flags;
910 uint32_t border_sge_num;
907 911
908 uint32_t hbq_in_use; /* HBQs in use flag */ 912 uint32_t hbq_in_use; /* HBQs in use flag */
909 uint32_t hbq_count; /* Count of configured HBQs */ 913 uint32_t hbq_count; /* Count of configured HBQs */
@@ -986,6 +990,7 @@ struct lpfc_hba {
986 struct dma_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */ 990 struct dma_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */
987 struct dma_pool *lpfc_hbq_pool; /* SLI3 hbq buffer pool */ 991 struct dma_pool *lpfc_hbq_pool; /* SLI3 hbq buffer pool */
988 struct dma_pool *txrdy_payload_pool; 992 struct dma_pool *txrdy_payload_pool;
993 struct dma_pool *lpfc_cmd_rsp_buf_pool;
989 struct lpfc_dma_pool lpfc_mbuf_safety_pool; 994 struct lpfc_dma_pool lpfc_mbuf_safety_pool;
990 995
991 mempool_t *mbox_mem_pool; 996 mempool_t *mbox_mem_pool;
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index d89480b9eade..e198de8eda32 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -2050,6 +2050,23 @@ struct sli4_sge { /* SLI-4 */
2050 uint32_t sge_len; 2050 uint32_t sge_len;
2051}; 2051};
2052 2052
2053struct sli4_hybrid_sgl {
2054 struct list_head list_node;
2055 struct sli4_sge *dma_sgl;
2056 dma_addr_t dma_phys_sgl;
2057};
2058
2059struct fcp_cmd_rsp_buf {
2060 struct list_head list_node;
2061
2062 /* for storing cmd/rsp dma alloc'ed virt_addr */
2063 struct fcp_cmnd *fcp_cmnd;
2064 struct fcp_rsp *fcp_rsp;
2065
2066 /* for storing this cmd/rsp's dma mapped phys addr from per CPU pool */
2067 dma_addr_t fcp_cmd_rsp_dma_handle;
2068};
2069
2053struct sli4_sge_diseed { /* SLI-4 */ 2070struct sli4_sge_diseed { /* SLI-4 */
2054 uint32_t ref_tag; 2071 uint32_t ref_tag;
2055 uint32_t ref_tag_tran; 2072 uint32_t ref_tag_tran;
@@ -3449,6 +3466,9 @@ struct lpfc_sli4_parameters {
3449#define cfg_xib_SHIFT 4 3466#define cfg_xib_SHIFT 4
3450#define cfg_xib_MASK 0x00000001 3467#define cfg_xib_MASK 0x00000001
3451#define cfg_xib_WORD word19 3468#define cfg_xib_WORD word19
3469#define cfg_xpsgl_SHIFT 6
3470#define cfg_xpsgl_MASK 0x00000001
3471#define cfg_xpsgl_WORD word19
3452#define cfg_eqdr_SHIFT 8 3472#define cfg_eqdr_SHIFT 8
3453#define cfg_eqdr_MASK 0x00000001 3473#define cfg_eqdr_MASK 0x00000001
3454#define cfg_eqdr_WORD word19 3474#define cfg_eqdr_WORD word19
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 6b39bfb73104..fa3741f12089 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3650,6 +3650,9 @@ lpfc_io_free(struct lpfc_hba *phba)
3650 qp->put_io_bufs--; 3650 qp->put_io_bufs--;
3651 dma_pool_free(phba->lpfc_sg_dma_buf_pool, 3651 dma_pool_free(phba->lpfc_sg_dma_buf_pool,
3652 lpfc_ncmd->data, lpfc_ncmd->dma_handle); 3652 lpfc_ncmd->data, lpfc_ncmd->dma_handle);
3653 if (phba->cfg_xpsgl && !phba->nvmet_support)
3654 lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
3655 lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
3653 kfree(lpfc_ncmd); 3656 kfree(lpfc_ncmd);
3654 qp->total_io_bufs--; 3657 qp->total_io_bufs--;
3655 } 3658 }
@@ -3663,6 +3666,9 @@ lpfc_io_free(struct lpfc_hba *phba)
3663 qp->get_io_bufs--; 3666 qp->get_io_bufs--;
3664 dma_pool_free(phba->lpfc_sg_dma_buf_pool, 3667 dma_pool_free(phba->lpfc_sg_dma_buf_pool,
3665 lpfc_ncmd->data, lpfc_ncmd->dma_handle); 3668 lpfc_ncmd->data, lpfc_ncmd->dma_handle);
3669 if (phba->cfg_xpsgl && !phba->nvmet_support)
3670 lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
3671 lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
3666 kfree(lpfc_ncmd); 3672 kfree(lpfc_ncmd);
3667 qp->total_io_bufs--; 3673 qp->total_io_bufs--;
3668 } 3674 }
@@ -4138,22 +4144,30 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
4138 break; 4144 break;
4139 } 4145 }
4140 4146
4141 /* 4147 if (phba->cfg_xpsgl && !phba->nvmet_support) {
4142 * 4K Page alignment is CRITICAL to BlockGuard, double check 4148 INIT_LIST_HEAD(&lpfc_ncmd->dma_sgl_xtra_list);
4143 * to be sure. 4149 } else {
4144 */ 4150 /*
4145 if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) && 4151 * 4K Page alignment is CRITICAL to BlockGuard, double
4146 (((unsigned long)(lpfc_ncmd->data) & 4152 * check to be sure.
4147 (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) { 4153 */
4148 lpfc_printf_log(phba, KERN_ERR, LOG_FCP, 4154 if ((phba->sli3_options & LPFC_SLI3_BG_ENABLED) &&
4149 "3369 Memory alignment err: addr=%lx\n", 4155 (((unsigned long)(lpfc_ncmd->data) &
4150 (unsigned long)lpfc_ncmd->data); 4156 (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
4151 dma_pool_free(phba->lpfc_sg_dma_buf_pool, 4157 lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
4152 lpfc_ncmd->data, lpfc_ncmd->dma_handle); 4158 "3369 Memory alignment err: "
4153 kfree(lpfc_ncmd); 4159 "addr=%lx\n",
4154 break; 4160 (unsigned long)lpfc_ncmd->data);
4161 dma_pool_free(phba->lpfc_sg_dma_buf_pool,
4162 lpfc_ncmd->data,
4163 lpfc_ncmd->dma_handle);
4164 kfree(lpfc_ncmd);
4165 break;
4166 }
4155 } 4167 }
4156 4168
4169 INIT_LIST_HEAD(&lpfc_ncmd->dma_cmd_rsp_list);
4170
4157 lxri = lpfc_sli4_next_xritag(phba); 4171 lxri = lpfc_sli4_next_xritag(phba);
4158 if (lxri == NO_XRI) { 4172 if (lxri == NO_XRI) {
4159 dma_pool_free(phba->lpfc_sg_dma_buf_pool, 4173 dma_pool_free(phba->lpfc_sg_dma_buf_pool,
@@ -4330,7 +4344,11 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
4330 4344
4331 shost->dma_boundary = 4345 shost->dma_boundary =
4332 phba->sli4_hba.pc_sli4_params.sge_supp_len-1; 4346 phba->sli4_hba.pc_sli4_params.sge_supp_len-1;
4333 shost->sg_tablesize = phba->cfg_scsi_seg_cnt; 4347
4348 if (phba->cfg_xpsgl && !phba->nvmet_support)
4349 shost->sg_tablesize = LPFC_MAX_SG_TABLESIZE;
4350 else
4351 shost->sg_tablesize = phba->cfg_scsi_seg_cnt;
4334 } else 4352 } else
4335 /* SLI-3 has a limited number of hardware queues (3), 4353 /* SLI-3 has a limited number of hardware queues (3),
4336 * thus there is only one for FCP processing. 4354 * thus there is only one for FCP processing.
@@ -6348,6 +6366,24 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
6348 if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ)) 6366 if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ))
6349 return -ENOMEM; 6367 return -ENOMEM;
6350 6368
6369 phba->lpfc_sg_dma_buf_pool =
6370 dma_pool_create("lpfc_sg_dma_buf_pool",
6371 &phba->pcidev->dev, phba->cfg_sg_dma_buf_size,
6372 BPL_ALIGN_SZ, 0);
6373
6374 if (!phba->lpfc_sg_dma_buf_pool)
6375 goto fail_free_mem;
6376
6377 phba->lpfc_cmd_rsp_buf_pool =
6378 dma_pool_create("lpfc_cmd_rsp_buf_pool",
6379 &phba->pcidev->dev,
6380 sizeof(struct fcp_cmnd) +
6381 sizeof(struct fcp_rsp),
6382 BPL_ALIGN_SZ, 0);
6383
6384 if (!phba->lpfc_cmd_rsp_buf_pool)
6385 goto fail_free_dma_buf_pool;
6386
6351 /* 6387 /*
6352 * Enable sr-iov virtual functions if supported and configured 6388 * Enable sr-iov virtual functions if supported and configured
6353 * through the module parameter. 6389 * through the module parameter.
@@ -6366,6 +6402,13 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
6366 } 6402 }
6367 6403
6368 return 0; 6404 return 0;
6405
6406fail_free_dma_buf_pool:
6407 dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
6408 phba->lpfc_sg_dma_buf_pool = NULL;
6409fail_free_mem:
6410 lpfc_mem_free(phba);
6411 return -ENOMEM;
6369} 6412}
6370 6413
6371/** 6414/**
@@ -6465,102 +6508,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
6465 * The WQ create will allocate the ring. 6508 * The WQ create will allocate the ring.
6466 */ 6509 */
6467 6510
6468 /*
6469 * 1 for cmd, 1 for rsp, NVME adds an extra one
6470 * for boundary conditions in its max_sgl_segment template.
6471 */
6472 extra = 2;
6473 if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
6474 extra++;
6475
6476 /*
6477 * It doesn't matter what family our adapter is in, we are
6478 * limited to 2 Pages, 512 SGEs, for our SGL.
6479 * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp
6480 */
6481 max_buf_size = (2 * SLI4_PAGE_SIZE);
6482
6483 /*
6484 * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
6485 * used to create the sg_dma_buf_pool must be calculated.
6486 */
6487 if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
6488 /*
6489 * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
6490 * the FCP rsp, and a SGE. Sice we have no control
6491 * over how many protection segments the SCSI Layer
6492 * will hand us (ie: there could be one for every block
6493 * in the IO), just allocate enough SGEs to accomidate
6494 * our max amount and we need to limit lpfc_sg_seg_cnt
6495 * to minimize the risk of running out.
6496 */
6497 phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
6498 sizeof(struct fcp_rsp) + max_buf_size;
6499
6500 /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
6501 phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
6502
6503 /*
6504 * If supporting DIF, reduce the seg count for scsi to
6505 * allow room for the DIF sges.
6506 */
6507 if (phba->cfg_enable_bg &&
6508 phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF)
6509 phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF;
6510 else
6511 phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
6512
6513 } else {
6514 /*
6515 * The scsi_buf for a regular I/O holds the FCP cmnd,
6516 * the FCP rsp, a SGE for each, and a SGE for up to
6517 * cfg_sg_seg_cnt data segments.
6518 */
6519 phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
6520 sizeof(struct fcp_rsp) +
6521 ((phba->cfg_sg_seg_cnt + extra) *
6522 sizeof(struct sli4_sge));
6523
6524 /* Total SGEs for scsi_sg_list */
6525 phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra;
6526 phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
6527
6528 /*
6529 * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only
6530 * need to post 1 page for the SGL.
6531 */
6532 }
6533
6534 /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */
6535 if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
6536 if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
6537 lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
6538 "6300 Reducing NVME sg segment "
6539 "cnt to %d\n",
6540 LPFC_MAX_NVME_SEG_CNT);
6541 phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
6542 } else
6543 phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
6544 }
6545
6546 /* Initialize the host templates with the updated values. */
6547 lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
6548 lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
6549 lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt;
6550
6551 if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ)
6552 phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
6553 else
6554 phba->cfg_sg_dma_buf_size =
6555 SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
6556
6557 lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
6558 "9087 sg_seg_cnt:%d dmabuf_size:%d "
6559 "total:%d scsi:%d nvme:%d\n",
6560 phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
6561 phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt,
6562 phba->cfg_nvme_seg_cnt);
6563
6564 /* Initialize buffer queue management fields */ 6511 /* Initialize buffer queue management fields */
6565 INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list); 6512 INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list);
6566 phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc; 6513 phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc;
@@ -6781,6 +6728,131 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
6781 } 6728 }
6782 } 6729 }
6783 6730
6731 /*
6732 * 1 for cmd, 1 for rsp, NVME adds an extra one
6733 * for boundary conditions in its max_sgl_segment template.
6734 */
6735 extra = 2;
6736 if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
6737 extra++;
6738
6739 /*
6740 * It doesn't matter what family our adapter is in, we are
6741 * limited to 2 Pages, 512 SGEs, for our SGL.
6742 * There are going to be 2 reserved SGEs: 1 FCP cmnd + 1 FCP rsp
6743 */
6744 max_buf_size = (2 * SLI4_PAGE_SIZE);
6745
6746 /*
6747 * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
6748 * used to create the sg_dma_buf_pool must be calculated.
6749 */
6750 if (phba->sli3_options & LPFC_SLI3_BG_ENABLED) {
6751 /* Both cfg_enable_bg and cfg_external_dif code paths */
6752
6753 /*
6754 * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
6755 * the FCP rsp, and a SGE. Sice we have no control
6756 * over how many protection segments the SCSI Layer
6757 * will hand us (ie: there could be one for every block
6758 * in the IO), just allocate enough SGEs to accomidate
6759 * our max amount and we need to limit lpfc_sg_seg_cnt
6760 * to minimize the risk of running out.
6761 */
6762 phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
6763 sizeof(struct fcp_rsp) + max_buf_size;
6764
6765 /* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
6766 phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
6767
6768 /*
6769 * If supporting DIF, reduce the seg count for scsi to
6770 * allow room for the DIF sges.
6771 */
6772 if (phba->cfg_enable_bg &&
6773 phba->cfg_sg_seg_cnt > LPFC_MAX_BG_SLI4_SEG_CNT_DIF)
6774 phba->cfg_scsi_seg_cnt = LPFC_MAX_BG_SLI4_SEG_CNT_DIF;
6775 else
6776 phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
6777
6778 } else {
6779 /*
6780 * The scsi_buf for a regular I/O holds the FCP cmnd,
6781 * the FCP rsp, a SGE for each, and a SGE for up to
6782 * cfg_sg_seg_cnt data segments.
6783 */
6784 phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
6785 sizeof(struct fcp_rsp) +
6786 ((phba->cfg_sg_seg_cnt + extra) *
6787 sizeof(struct sli4_sge));
6788
6789 /* Total SGEs for scsi_sg_list */
6790 phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + extra;
6791 phba->cfg_scsi_seg_cnt = phba->cfg_sg_seg_cnt;
6792
6793 /*
6794 * NOTE: if (phba->cfg_sg_seg_cnt + extra) <= 256 we only
6795 * need to post 1 page for the SGL.
6796 */
6797 }
6798
6799 if (phba->cfg_xpsgl && !phba->nvmet_support)
6800 phba->cfg_sg_dma_buf_size = LPFC_DEFAULT_XPSGL_SIZE;
6801 else if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ)
6802 phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
6803 else
6804 phba->cfg_sg_dma_buf_size =
6805 SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
6806
6807 phba->border_sge_num = phba->cfg_sg_dma_buf_size /
6808 sizeof(struct sli4_sge);
6809
6810 /* Limit to LPFC_MAX_NVME_SEG_CNT for NVME. */
6811 if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
6812 if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) {
6813 lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT,
6814 "6300 Reducing NVME sg segment "
6815 "cnt to %d\n",
6816 LPFC_MAX_NVME_SEG_CNT);
6817 phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT;
6818 } else
6819 phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt;
6820 }
6821
6822 /* Initialize the host templates with the updated values. */
6823 lpfc_vport_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
6824 lpfc_template.sg_tablesize = phba->cfg_scsi_seg_cnt;
6825 lpfc_template_no_hr.sg_tablesize = phba->cfg_scsi_seg_cnt;
6826
6827 lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
6828 "9087 sg_seg_cnt:%d dmabuf_size:%d "
6829 "total:%d scsi:%d nvme:%d\n",
6830 phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
6831 phba->cfg_total_seg_cnt, phba->cfg_scsi_seg_cnt,
6832 phba->cfg_nvme_seg_cnt);
6833
6834 if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
6835 i = phba->cfg_sg_dma_buf_size;
6836 else
6837 i = SLI4_PAGE_SIZE;
6838
6839 phba->lpfc_sg_dma_buf_pool =
6840 dma_pool_create("lpfc_sg_dma_buf_pool",
6841 &phba->pcidev->dev,
6842 phba->cfg_sg_dma_buf_size,
6843 i, 0);
6844 if (!phba->lpfc_sg_dma_buf_pool)
6845 goto out_free_bsmbx;
6846
6847 phba->lpfc_cmd_rsp_buf_pool =
6848 dma_pool_create("lpfc_cmd_rsp_buf_pool",
6849 &phba->pcidev->dev,
6850 sizeof(struct fcp_cmnd) +
6851 sizeof(struct fcp_rsp),
6852 i, 0);
6853 if (!phba->lpfc_cmd_rsp_buf_pool)
6854 goto out_free_sg_dma_buf;
6855
6784 mempool_free(mboxq, phba->mbox_mem_pool); 6856 mempool_free(mboxq, phba->mbox_mem_pool);
6785 6857
6786 /* Verify OAS is supported */ 6858 /* Verify OAS is supported */
@@ -6792,12 +6864,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
6792 /* Verify all the SLI4 queues */ 6864 /* Verify all the SLI4 queues */
6793 rc = lpfc_sli4_queue_verify(phba); 6865 rc = lpfc_sli4_queue_verify(phba);
6794 if (rc) 6866 if (rc)
6795 goto out_free_bsmbx; 6867 goto out_free_cmd_rsp_buf;
6796 6868
6797 /* Create driver internal CQE event pool */ 6869 /* Create driver internal CQE event pool */
6798 rc = lpfc_sli4_cq_event_pool_create(phba); 6870 rc = lpfc_sli4_cq_event_pool_create(phba);
6799 if (rc) 6871 if (rc)
6800 goto out_free_bsmbx; 6872 goto out_free_cmd_rsp_buf;
6801 6873
6802 /* Initialize sgl lists per host */ 6874 /* Initialize sgl lists per host */
6803 lpfc_init_sgl_list(phba); 6875 lpfc_init_sgl_list(phba);
@@ -6888,6 +6960,12 @@ out_free_active_sgl:
6888 lpfc_free_active_sgl(phba); 6960 lpfc_free_active_sgl(phba);
6889out_destroy_cq_event_pool: 6961out_destroy_cq_event_pool:
6890 lpfc_sli4_cq_event_pool_destroy(phba); 6962 lpfc_sli4_cq_event_pool_destroy(phba);
6963out_free_cmd_rsp_buf:
6964 dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
6965 phba->lpfc_cmd_rsp_buf_pool = NULL;
6966out_free_sg_dma_buf:
6967 dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
6968 phba->lpfc_sg_dma_buf_pool = NULL;
6891out_free_bsmbx: 6969out_free_bsmbx:
6892 lpfc_destroy_bootstrap_mbox(phba); 6970 lpfc_destroy_bootstrap_mbox(phba);
6893out_free_mem: 6971out_free_mem:
@@ -8814,6 +8892,9 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
8814 spin_lock_init(&qp->abts_nvme_buf_list_lock); 8892 spin_lock_init(&qp->abts_nvme_buf_list_lock);
8815 INIT_LIST_HEAD(&qp->lpfc_abts_nvme_buf_list); 8893 INIT_LIST_HEAD(&qp->lpfc_abts_nvme_buf_list);
8816 qp->abts_nvme_io_bufs = 0; 8894 qp->abts_nvme_io_bufs = 0;
8895 INIT_LIST_HEAD(&qp->sgl_list);
8896 INIT_LIST_HEAD(&qp->cmd_rsp_buf_list);
8897 spin_lock_init(&qp->hdwq_lock);
8817 } 8898 }
8818 } 8899 }
8819 8900
@@ -9188,6 +9269,9 @@ lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
9188 hdwq[idx].nvme_cq = NULL; 9269 hdwq[idx].nvme_cq = NULL;
9189 hdwq[idx].fcp_wq = NULL; 9270 hdwq[idx].fcp_wq = NULL;
9190 hdwq[idx].nvme_wq = NULL; 9271 hdwq[idx].nvme_wq = NULL;
9272 if (phba->cfg_xpsgl && !phba->nvmet_support)
9273 lpfc_free_sgl_per_hdwq(phba, &hdwq[idx]);
9274 lpfc_free_cmd_rsp_buf_per_hdwq(phba, &hdwq[idx]);
9191 } 9275 }
9192 /* Loop thru all IRQ vectors */ 9276 /* Loop thru all IRQ vectors */
9193 for (idx = 0; idx < phba->cfg_irq_chann; idx++) { 9277 for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
@@ -11647,6 +11731,9 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
11647 phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters); 11731 phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters);
11648 phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters); 11732 phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters);
11649 11733
11734 /* Check for Extended Pre-Registered SGL support */
11735 phba->cfg_xpsgl = bf_get(cfg_xpsgl, mbx_sli4_parameters);
11736
11650 /* Check for firmware nvme support */ 11737 /* Check for firmware nvme support */
11651 rc = (bf_get(cfg_nvme, mbx_sli4_parameters) && 11738 rc = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
11652 bf_get(cfg_xib, mbx_sli4_parameters)); 11739 bf_get(cfg_xib, mbx_sli4_parameters));
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 9bdb4a0a9f24..ae09bb863497 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -72,8 +72,8 @@ lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) {
72 * lpfc_mem_alloc - create and allocate all PCI and memory pools 72 * lpfc_mem_alloc - create and allocate all PCI and memory pools
73 * @phba: HBA to allocate pools for 73 * @phba: HBA to allocate pools for
74 * 74 *
75 * Description: Creates and allocates PCI pools lpfc_sg_dma_buf_pool, 75 * Description: Creates and allocates PCI pools lpfc_mbuf_pool,
76 * lpfc_mbuf_pool, lpfc_hrb_pool. Creates and allocates kmalloc-backed mempools 76 * lpfc_hrb_pool. Creates and allocates kmalloc-backed mempools
77 * for LPFC_MBOXQ_t and lpfc_nodelist. Also allocates the VPI bitmask. 77 * for LPFC_MBOXQ_t and lpfc_nodelist. Also allocates the VPI bitmask.
78 * 78 *
79 * Notes: Not interrupt-safe. Must be called with no locks held. If any 79 * Notes: Not interrupt-safe. Must be called with no locks held. If any
@@ -89,36 +89,12 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
89 struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; 89 struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
90 int i; 90 int i;
91 91
92 if (phba->sli_rev == LPFC_SLI_REV4) {
93 /* Calculate alignment */
94 if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
95 i = phba->cfg_sg_dma_buf_size;
96 else
97 i = SLI4_PAGE_SIZE;
98
99 phba->lpfc_sg_dma_buf_pool =
100 dma_pool_create("lpfc_sg_dma_buf_pool",
101 &phba->pcidev->dev,
102 phba->cfg_sg_dma_buf_size,
103 i, 0);
104 if (!phba->lpfc_sg_dma_buf_pool)
105 goto fail;
106
107 } else {
108 phba->lpfc_sg_dma_buf_pool =
109 dma_pool_create("lpfc_sg_dma_buf_pool",
110 &phba->pcidev->dev, phba->cfg_sg_dma_buf_size,
111 align, 0);
112
113 if (!phba->lpfc_sg_dma_buf_pool)
114 goto fail;
115 }
116 92
117 phba->lpfc_mbuf_pool = dma_pool_create("lpfc_mbuf_pool", &phba->pcidev->dev, 93 phba->lpfc_mbuf_pool = dma_pool_create("lpfc_mbuf_pool", &phba->pcidev->dev,
118 LPFC_BPL_SIZE, 94 LPFC_BPL_SIZE,
119 align, 0); 95 align, 0);
120 if (!phba->lpfc_mbuf_pool) 96 if (!phba->lpfc_mbuf_pool)
121 goto fail_free_dma_buf_pool; 97 goto fail;
122 98
123 pool->elements = kmalloc_array(LPFC_MBUF_POOL_SIZE, 99 pool->elements = kmalloc_array(LPFC_MBUF_POOL_SIZE,
124 sizeof(struct lpfc_dmabuf), 100 sizeof(struct lpfc_dmabuf),
@@ -208,9 +184,6 @@ fail_free_drb_pool:
208 fail_free_lpfc_mbuf_pool: 184 fail_free_lpfc_mbuf_pool:
209 dma_pool_destroy(phba->lpfc_mbuf_pool); 185 dma_pool_destroy(phba->lpfc_mbuf_pool);
210 phba->lpfc_mbuf_pool = NULL; 186 phba->lpfc_mbuf_pool = NULL;
211 fail_free_dma_buf_pool:
212 dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
213 phba->lpfc_sg_dma_buf_pool = NULL;
214 fail: 187 fail:
215 return -ENOMEM; 188 return -ENOMEM;
216} 189}
@@ -287,10 +260,6 @@ lpfc_mem_free(struct lpfc_hba *phba)
287 dma_pool_destroy(phba->lpfc_mbuf_pool); 260 dma_pool_destroy(phba->lpfc_mbuf_pool);
288 phba->lpfc_mbuf_pool = NULL; 261 phba->lpfc_mbuf_pool = NULL;
289 262
290 /* Free DMA buffer memory pool */
291 dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
292 phba->lpfc_sg_dma_buf_pool = NULL;
293
294 /* Free Device Data memory pool */ 263 /* Free Device Data memory pool */
295 if (phba->device_data_mem_pool) { 264 if (phba->device_data_mem_pool) {
296 /* Ensure all objects have been returned to the pool */ 265 /* Ensure all objects have been returned to the pool */
@@ -363,6 +332,13 @@ lpfc_mem_free_all(struct lpfc_hba *phba)
363 /* Free and destroy all the allocated memory pools */ 332 /* Free and destroy all the allocated memory pools */
364 lpfc_mem_free(phba); 333 lpfc_mem_free(phba);
365 334
335 /* Free DMA buffer memory pool */
336 dma_pool_destroy(phba->lpfc_sg_dma_buf_pool);
337 phba->lpfc_sg_dma_buf_pool = NULL;
338
339 dma_pool_destroy(phba->lpfc_cmd_rsp_buf_pool);
340 phba->lpfc_cmd_rsp_buf_pool = NULL;
341
366 /* Free the iocb lookup array */ 342 /* Free the iocb lookup array */
367 kfree(psli->iocbq_lookup); 343 kfree(psli->iocbq_lookup);
368 psli->iocbq_lookup = NULL; 344 psli->iocbq_lookup = NULL;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index b599ddc40c6b..5e48318eb7a9 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1306,14 +1306,16 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
1306 struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd; 1306 struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
1307 union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe; 1307 union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe;
1308 struct sli4_sge *sgl = lpfc_ncmd->dma_sgl; 1308 struct sli4_sge *sgl = lpfc_ncmd->dma_sgl;
1309 struct sli4_hybrid_sgl *sgl_xtra = NULL;
1309 struct scatterlist *data_sg; 1310 struct scatterlist *data_sg;
1310 struct sli4_sge *first_data_sgl; 1311 struct sli4_sge *first_data_sgl;
1311 struct ulp_bde64 *bde; 1312 struct ulp_bde64 *bde;
1312 dma_addr_t physaddr; 1313 dma_addr_t physaddr = 0;
1313 uint32_t num_bde = 0; 1314 uint32_t num_bde = 0;
1314 uint32_t dma_len; 1315 uint32_t dma_len = 0;
1315 uint32_t dma_offset = 0; 1316 uint32_t dma_offset = 0;
1316 int nseg, i; 1317 int nseg, i, j;
1318 bool lsp_just_set = false;
1317 1319
1318 /* Fix up the command and response DMA stuff. */ 1320 /* Fix up the command and response DMA stuff. */
1319 lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd); 1321 lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd);
@@ -1350,6 +1352,9 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
1350 */ 1352 */
1351 nseg = nCmd->sg_cnt; 1353 nseg = nCmd->sg_cnt;
1352 data_sg = nCmd->first_sgl; 1354 data_sg = nCmd->first_sgl;
1355
1356 /* for tracking the segment boundaries */
1357 j = 2;
1353 for (i = 0; i < nseg; i++) { 1358 for (i = 0; i < nseg; i++) {
1354 if (data_sg == NULL) { 1359 if (data_sg == NULL) {
1355 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 1360 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -1358,23 +1363,76 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
1358 lpfc_ncmd->seg_cnt = 0; 1363 lpfc_ncmd->seg_cnt = 0;
1359 return 1; 1364 return 1;
1360 } 1365 }
1361 physaddr = data_sg->dma_address; 1366
1362 dma_len = data_sg->length; 1367 sgl->word2 = 0;
1363 sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr)); 1368 if ((num_bde + 1) == nseg) {
1364 sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
1365 sgl->word2 = le32_to_cpu(sgl->word2);
1366 if ((num_bde + 1) == nseg)
1367 bf_set(lpfc_sli4_sge_last, sgl, 1); 1369 bf_set(lpfc_sli4_sge_last, sgl, 1);
1368 else 1370 bf_set(lpfc_sli4_sge_type, sgl,
1371 LPFC_SGE_TYPE_DATA);
1372 } else {
1369 bf_set(lpfc_sli4_sge_last, sgl, 0); 1373 bf_set(lpfc_sli4_sge_last, sgl, 0);
1370 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset); 1374
1371 bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA); 1375 /* expand the segment */
1372 sgl->word2 = cpu_to_le32(sgl->word2); 1376 if (!lsp_just_set &&
1373 sgl->sge_len = cpu_to_le32(dma_len); 1377 !((j + 1) % phba->border_sge_num) &&
1374 1378 ((nseg - 1) != i)) {
1375 dma_offset += dma_len; 1379 /* set LSP type */
1376 data_sg = sg_next(data_sg); 1380 bf_set(lpfc_sli4_sge_type, sgl,
1377 sgl++; 1381 LPFC_SGE_TYPE_LSP);
1382
1383 sgl_xtra = lpfc_get_sgl_per_hdwq(
1384 phba, lpfc_ncmd);
1385
1386 if (unlikely(!sgl_xtra)) {
1387 lpfc_ncmd->seg_cnt = 0;
1388 return 1;
1389 }
1390 sgl->addr_lo = cpu_to_le32(putPaddrLow(
1391 sgl_xtra->dma_phys_sgl));
1392 sgl->addr_hi = cpu_to_le32(putPaddrHigh(
1393 sgl_xtra->dma_phys_sgl));
1394
1395 } else {
1396 bf_set(lpfc_sli4_sge_type, sgl,
1397 LPFC_SGE_TYPE_DATA);
1398 }
1399 }
1400
1401 if (!(bf_get(lpfc_sli4_sge_type, sgl) &
1402 LPFC_SGE_TYPE_LSP)) {
1403 if ((nseg - 1) == i)
1404 bf_set(lpfc_sli4_sge_last, sgl, 1);
1405
1406 physaddr = data_sg->dma_address;
1407 dma_len = data_sg->length;
1408 sgl->addr_lo = cpu_to_le32(
1409 putPaddrLow(physaddr));
1410 sgl->addr_hi = cpu_to_le32(
1411 putPaddrHigh(physaddr));
1412
1413 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
1414 sgl->word2 = cpu_to_le32(sgl->word2);
1415 sgl->sge_len = cpu_to_le32(dma_len);
1416
1417 dma_offset += dma_len;
1418 data_sg = sg_next(data_sg);
1419
1420 sgl++;
1421
1422 lsp_just_set = false;
1423 } else {
1424 sgl->word2 = cpu_to_le32(sgl->word2);
1425
1426 sgl->sge_len = cpu_to_le32(
1427 phba->cfg_sg_dma_buf_size);
1428
1429 sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
1430 i = i - 1;
1431
1432 lsp_just_set = true;
1433 }
1434
1435 j++;
1378 } 1436 }
1379 if (phba->cfg_enable_pbde) { 1437 if (phba->cfg_enable_pbde) {
1380 /* Use PBDE support for first SGL only, offset == 0 */ 1438 /* Use PBDE support for first SGL only, offset == 0 */
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 95ba5000d0ec..fb7df209c0aa 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -685,8 +685,9 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
685 IOCB_t *iocb; 685 IOCB_t *iocb;
686 dma_addr_t pdma_phys_fcp_rsp; 686 dma_addr_t pdma_phys_fcp_rsp;
687 dma_addr_t pdma_phys_fcp_cmd; 687 dma_addr_t pdma_phys_fcp_cmd;
688 uint32_t sgl_size, cpu, idx; 688 uint32_t cpu, idx;
689 int tag; 689 int tag;
690 struct fcp_cmd_rsp_buf *tmp = NULL;
690 691
691 cpu = raw_smp_processor_id(); 692 cpu = raw_smp_processor_id();
692 if (cmnd && phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) { 693 if (cmnd && phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
@@ -704,9 +705,6 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
704 return NULL; 705 return NULL;
705 } 706 }
706 707
707 sgl_size = phba->cfg_sg_dma_buf_size -
708 (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
709
710 /* Setup key fields in buffer that may have been changed 708 /* Setup key fields in buffer that may have been changed
711 * if other protocols used this buffer. 709 * if other protocols used this buffer.
712 */ 710 */
@@ -721,9 +719,12 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
721#ifdef CONFIG_SCSI_LPFC_DEBUG_FS 719#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
722 lpfc_cmd->prot_data_type = 0; 720 lpfc_cmd->prot_data_type = 0;
723#endif 721#endif
724 lpfc_cmd->fcp_cmnd = (lpfc_cmd->data + sgl_size); 722 tmp = lpfc_get_cmd_rsp_buf_per_hdwq(phba, lpfc_cmd);
725 lpfc_cmd->fcp_rsp = (struct fcp_rsp *)((uint8_t *)lpfc_cmd->fcp_cmnd + 723 if (!tmp)
726 sizeof(struct fcp_cmnd)); 724 return NULL;
725
726 lpfc_cmd->fcp_cmnd = tmp->fcp_cmnd;
727 lpfc_cmd->fcp_rsp = tmp->fcp_rsp;
727 728
728 /* 729 /*
729 * The first two SGEs are the FCP_CMD and FCP_RSP. 730 * The first two SGEs are the FCP_CMD and FCP_RSP.
@@ -731,7 +732,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
731 * first two and leave the rest for queuecommand. 732 * first two and leave the rest for queuecommand.
732 */ 733 */
733 sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl; 734 sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
734 pdma_phys_fcp_cmd = (lpfc_cmd->dma_handle + sgl_size); 735 pdma_phys_fcp_cmd = tmp->fcp_cmd_rsp_dma_handle;
735 sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_cmd)); 736 sgl->addr_hi = cpu_to_le32(putPaddrHigh(pdma_phys_fcp_cmd));
736 sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_cmd)); 737 sgl->addr_lo = cpu_to_le32(putPaddrLow(pdma_phys_fcp_cmd));
737 sgl->word2 = le32_to_cpu(sgl->word2); 738 sgl->word2 = le32_to_cpu(sgl->word2);
@@ -1990,7 +1991,8 @@ out:
1990 **/ 1991 **/
1991static int 1992static int
1992lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, 1993lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
1993 struct sli4_sge *sgl, int datasegcnt) 1994 struct sli4_sge *sgl, int datasegcnt,
1995 struct lpfc_io_buf *lpfc_cmd)
1994{ 1996{
1995 struct scatterlist *sgde = NULL; /* s/g data entry */ 1997 struct scatterlist *sgde = NULL; /* s/g data entry */
1996 struct sli4_sge_diseed *diseed = NULL; 1998 struct sli4_sge_diseed *diseed = NULL;
@@ -2004,6 +2006,9 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2004 uint32_t checking = 1; 2006 uint32_t checking = 1;
2005 uint32_t dma_len; 2007 uint32_t dma_len;
2006 uint32_t dma_offset = 0; 2008 uint32_t dma_offset = 0;
2009 struct sli4_hybrid_sgl *sgl_xtra = NULL;
2010 int j;
2011 bool lsp_just_set = false;
2007 2012
2008 status = lpfc_sc_to_bg_opcodes(phba, sc, &txop, &rxop); 2013 status = lpfc_sc_to_bg_opcodes(phba, sc, &txop, &rxop);
2009 if (status) 2014 if (status)
@@ -2063,23 +2068,64 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2063 sgl++; 2068 sgl++;
2064 2069
2065 /* assumption: caller has already run dma_map_sg on command data */ 2070 /* assumption: caller has already run dma_map_sg on command data */
2066 scsi_for_each_sg(sc, sgde, datasegcnt, i) { 2071 sgde = scsi_sglist(sc);
2067 physaddr = sg_dma_address(sgde); 2072 j = 3;
2068 dma_len = sg_dma_len(sgde); 2073 for (i = 0; i < datasegcnt; i++) {
2069 sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr)); 2074 /* clear it */
2070 sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr)); 2075 sgl->word2 = 0;
2071 if ((i + 1) == datasegcnt)
2072 bf_set(lpfc_sli4_sge_last, sgl, 1);
2073 else
2074 bf_set(lpfc_sli4_sge_last, sgl, 0);
2075 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
2076 bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
2077 2076
2078 sgl->sge_len = cpu_to_le32(dma_len); 2077 /* do we need to expand the segment */
2079 dma_offset += dma_len; 2078 if (!lsp_just_set && !((j + 1) % phba->border_sge_num) &&
2079 ((datasegcnt - 1) != i)) {
2080 /* set LSP type */
2081 bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_LSP);
2082
2083 sgl_xtra = lpfc_get_sgl_per_hdwq(phba, lpfc_cmd);
2084
2085 if (unlikely(!sgl_xtra)) {
2086 lpfc_cmd->seg_cnt = 0;
2087 return 0;
2088 }
2089 sgl->addr_lo = cpu_to_le32(putPaddrLow(
2090 sgl_xtra->dma_phys_sgl));
2091 sgl->addr_hi = cpu_to_le32(putPaddrHigh(
2092 sgl_xtra->dma_phys_sgl));
2093
2094 } else {
2095 bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
2096 }
2097
2098 if (!(bf_get(lpfc_sli4_sge_type, sgl) & LPFC_SGE_TYPE_LSP)) {
2099 if ((datasegcnt - 1) == i)
2100 bf_set(lpfc_sli4_sge_last, sgl, 1);
2101 physaddr = sg_dma_address(sgde);
2102 dma_len = sg_dma_len(sgde);
2103 sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
2104 sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
2105
2106 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
2107 sgl->word2 = cpu_to_le32(sgl->word2);
2108 sgl->sge_len = cpu_to_le32(dma_len);
2109
2110 dma_offset += dma_len;
2111 sgde = sg_next(sgde);
2112
2113 sgl++;
2114 num_sge++;
2115 lsp_just_set = false;
2116
2117 } else {
2118 sgl->word2 = cpu_to_le32(sgl->word2);
2119 sgl->sge_len = cpu_to_le32(phba->cfg_sg_dma_buf_size);
2120
2121 sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
2122 i = i - 1;
2123
2124 lsp_just_set = true;
2125 }
2126
2127 j++;
2080 2128
2081 sgl++;
2082 num_sge++;
2083 } 2129 }
2084 2130
2085out: 2131out:
@@ -2125,7 +2171,8 @@ out:
2125 **/ 2171 **/
2126static int 2172static int
2127lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, 2173lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2128 struct sli4_sge *sgl, int datacnt, int protcnt) 2174 struct sli4_sge *sgl, int datacnt, int protcnt,
2175 struct lpfc_io_buf *lpfc_cmd)
2129{ 2176{
2130 struct scatterlist *sgde = NULL; /* s/g data entry */ 2177 struct scatterlist *sgde = NULL; /* s/g data entry */
2131 struct scatterlist *sgpe = NULL; /* s/g prot entry */ 2178 struct scatterlist *sgpe = NULL; /* s/g prot entry */
@@ -2147,7 +2194,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2147#endif 2194#endif
2148 uint32_t checking = 1; 2195 uint32_t checking = 1;
2149 uint32_t dma_offset = 0; 2196 uint32_t dma_offset = 0;
2150 int num_sge = 0; 2197 int num_sge = 0, j = 2;
2198 struct sli4_hybrid_sgl *sgl_xtra = NULL;
2151 2199
2152 sgpe = scsi_prot_sglist(sc); 2200 sgpe = scsi_prot_sglist(sc);
2153 sgde = scsi_sglist(sc); 2201 sgde = scsi_sglist(sc);
@@ -2180,9 +2228,37 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2180 split_offset = 0; 2228 split_offset = 0;
2181 do { 2229 do {
2182 /* Check to see if we ran out of space */ 2230 /* Check to see if we ran out of space */
2183 if (num_sge >= (phba->cfg_total_seg_cnt - 2)) 2231 if ((num_sge >= (phba->cfg_total_seg_cnt - 2)) &&
2232 !(phba->cfg_xpsgl))
2184 return num_sge + 3; 2233 return num_sge + 3;
2185 2234
2235 /* DISEED and DIF have to be together */
2236 if (!((j + 1) % phba->border_sge_num) ||
2237 !((j + 2) % phba->border_sge_num) ||
2238 !((j + 3) % phba->border_sge_num)) {
2239 sgl->word2 = 0;
2240
2241 /* set LSP type */
2242 bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_LSP);
2243
2244 sgl_xtra = lpfc_get_sgl_per_hdwq(phba, lpfc_cmd);
2245
2246 if (unlikely(!sgl_xtra)) {
2247 goto out;
2248 } else {
2249 sgl->addr_lo = cpu_to_le32(putPaddrLow(
2250 sgl_xtra->dma_phys_sgl));
2251 sgl->addr_hi = cpu_to_le32(putPaddrHigh(
2252 sgl_xtra->dma_phys_sgl));
2253 }
2254
2255 sgl->word2 = cpu_to_le32(sgl->word2);
2256 sgl->sge_len = cpu_to_le32(phba->cfg_sg_dma_buf_size);
2257
2258 sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
2259 j = 0;
2260 }
2261
2186 /* setup DISEED with what we have */ 2262 /* setup DISEED with what we have */
2187 diseed = (struct sli4_sge_diseed *) sgl; 2263 diseed = (struct sli4_sge_diseed *) sgl;
2188 memset(diseed, 0, sizeof(struct sli4_sge_diseed)); 2264 memset(diseed, 0, sizeof(struct sli4_sge_diseed));
@@ -2229,7 +2305,9 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2229 2305
2230 /* advance sgl and increment bde count */ 2306 /* advance sgl and increment bde count */
2231 num_sge++; 2307 num_sge++;
2308
2232 sgl++; 2309 sgl++;
2310 j++;
2233 2311
2234 /* setup the first BDE that points to protection buffer */ 2312 /* setup the first BDE that points to protection buffer */
2235 protphysaddr = sg_dma_address(sgpe) + protgroup_offset; 2313 protphysaddr = sg_dma_address(sgpe) + protgroup_offset;
@@ -2244,6 +2322,7 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2244 sgl->addr_hi = le32_to_cpu(putPaddrHigh(protphysaddr)); 2322 sgl->addr_hi = le32_to_cpu(putPaddrHigh(protphysaddr));
2245 sgl->addr_lo = le32_to_cpu(putPaddrLow(protphysaddr)); 2323 sgl->addr_lo = le32_to_cpu(putPaddrLow(protphysaddr));
2246 sgl->word2 = cpu_to_le32(sgl->word2); 2324 sgl->word2 = cpu_to_le32(sgl->word2);
2325 sgl->sge_len = 0;
2247 2326
2248 protgrp_blks = protgroup_len / 8; 2327 protgrp_blks = protgroup_len / 8;
2249 protgrp_bytes = protgrp_blks * blksize; 2328 protgrp_bytes = protgrp_blks * blksize;
@@ -2264,9 +2343,14 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2264 /* setup SGE's for data blocks associated with DIF data */ 2343 /* setup SGE's for data blocks associated with DIF data */
2265 pgdone = 0; 2344 pgdone = 0;
2266 subtotal = 0; /* total bytes processed for current prot grp */ 2345 subtotal = 0; /* total bytes processed for current prot grp */
2346
2347 sgl++;
2348 j++;
2349
2267 while (!pgdone) { 2350 while (!pgdone) {
2268 /* Check to see if we ran out of space */ 2351 /* Check to see if we ran out of space */
2269 if (num_sge >= phba->cfg_total_seg_cnt) 2352 if ((num_sge >= phba->cfg_total_seg_cnt) &&
2353 !phba->cfg_xpsgl)
2270 return num_sge + 1; 2354 return num_sge + 1;
2271 2355
2272 if (!sgde) { 2356 if (!sgde) {
@@ -2275,60 +2359,101 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
2275 __func__); 2359 __func__);
2276 return 0; 2360 return 0;
2277 } 2361 }
2278 sgl++;
2279 dataphysaddr = sg_dma_address(sgde) + split_offset;
2280 2362
2281 remainder = sg_dma_len(sgde) - split_offset; 2363 if (!((j + 1) % phba->border_sge_num)) {
2364 sgl->word2 = 0;
2282 2365
2283 if ((subtotal + remainder) <= protgrp_bytes) { 2366 /* set LSP type */
2284 /* we can use this whole buffer */ 2367 bf_set(lpfc_sli4_sge_type, sgl,
2285 dma_len = remainder; 2368 LPFC_SGE_TYPE_LSP);
2286 split_offset = 0;
2287 2369
2288 if ((subtotal + remainder) == protgrp_bytes) 2370 sgl_xtra = lpfc_get_sgl_per_hdwq(phba,
2289 pgdone = 1; 2371 lpfc_cmd);
2372
2373 if (unlikely(!sgl_xtra)) {
2374 goto out;
2375 } else {
2376 sgl->addr_lo = cpu_to_le32(
2377 putPaddrLow(sgl_xtra->dma_phys_sgl));
2378 sgl->addr_hi = cpu_to_le32(
2379 putPaddrHigh(sgl_xtra->dma_phys_sgl));
2380 }
2381
2382 sgl->word2 = cpu_to_le32(sgl->word2);
2383 sgl->sge_len = cpu_to_le32(
2384 phba->cfg_sg_dma_buf_size);
2385
2386 sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
2290 } else { 2387 } else {
2291 /* must split this buffer with next prot grp */ 2388 dataphysaddr = sg_dma_address(sgde) +
2292 dma_len = protgrp_bytes - subtotal; 2389 split_offset;
2293 split_offset += dma_len;
2294 }
2295 2390
2296 subtotal += dma_len; 2391 remainder = sg_dma_len(sgde) - split_offset;
2297 2392
2298 sgl->addr_lo = cpu_to_le32(putPaddrLow(dataphysaddr)); 2393 if ((subtotal + remainder) <= protgrp_bytes) {
2299 sgl->addr_hi = cpu_to_le32(putPaddrHigh(dataphysaddr)); 2394 /* we can use this whole buffer */
2300 bf_set(lpfc_sli4_sge_last, sgl, 0); 2395 dma_len = remainder;
2301 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset); 2396 split_offset = 0;
2302 bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
2303 2397
2304 sgl->sge_len = cpu_to_le32(dma_len); 2398 if ((subtotal + remainder) ==
2305 dma_offset += dma_len; 2399 protgrp_bytes)
2400 pgdone = 1;
2401 } else {
2402 /* must split this buffer with next
2403 * prot grp
2404 */
2405 dma_len = protgrp_bytes - subtotal;
2406 split_offset += dma_len;
2407 }
2306 2408
2307 num_sge++; 2409 subtotal += dma_len;
2308 curr_data++;
2309 2410
2310 if (split_offset) 2411 sgl->word2 = 0;
2311 break; 2412 sgl->addr_lo = cpu_to_le32(putPaddrLow(
2413 dataphysaddr));
2414 sgl->addr_hi = cpu_to_le32(putPaddrHigh(
2415 dataphysaddr));
2416 bf_set(lpfc_sli4_sge_last, sgl, 0);
2417 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
2418 bf_set(lpfc_sli4_sge_type, sgl,
2419 LPFC_SGE_TYPE_DATA);
2312 2420
2313 /* Move to the next s/g segment if possible */ 2421 sgl->sge_len = cpu_to_le32(dma_len);
2314 sgde = sg_next(sgde); 2422 dma_offset += dma_len;
2423
2424 num_sge++;
2425 curr_data++;
2426
2427 if (split_offset) {
2428 sgl++;
2429 j++;
2430 break;
2431 }
2432
2433 /* Move to the next s/g segment if possible */
2434 sgde = sg_next(sgde);
2435
2436 sgl++;
2437 }
2438
2439 j++;
2315 } 2440 }
2316 2441
2317 if (protgroup_offset) { 2442 if (protgroup_offset) {
2318 /* update the reference tag */ 2443 /* update the reference tag */
2319 reftag += protgrp_blks; 2444 reftag += protgrp_blks;
2320 sgl++;
2321 continue; 2445 continue;
2322 } 2446 }
2323 2447
2324 /* are we done ? */ 2448 /* are we done ? */
2325 if (curr_prot == protcnt) { 2449 if (curr_prot == protcnt) {
2450 /* mark the last SGL */
2451 sgl--;
2326 bf_set(lpfc_sli4_sge_last, sgl, 1); 2452 bf_set(lpfc_sli4_sge_last, sgl, 1);
2327 alldone = 1; 2453 alldone = 1;
2328 } else if (curr_prot < protcnt) { 2454 } else if (curr_prot < protcnt) {
2329 /* advance to next prot buffer */ 2455 /* advance to next prot buffer */
2330 sgpe = sg_next(sgpe); 2456 sgpe = sg_next(sgpe);
2331 sgl++;
2332 2457
2333 /* update the reference tag */ 2458 /* update the reference tag */
2334 reftag += protgrp_blks; 2459 reftag += protgrp_blks;
@@ -2995,8 +3120,10 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
2995 uint32_t num_bde = 0; 3120 uint32_t num_bde = 0;
2996 uint32_t dma_len; 3121 uint32_t dma_len;
2997 uint32_t dma_offset = 0; 3122 uint32_t dma_offset = 0;
2998 int nseg; 3123 int nseg, i, j;
2999 struct ulp_bde64 *bde; 3124 struct ulp_bde64 *bde;
3125 bool lsp_just_set = false;
3126 struct sli4_hybrid_sgl *sgl_xtra = NULL;
3000 3127
3001 /* 3128 /*
3002 * There are three possibilities here - use scatter-gather segment, use 3129 * There are three possibilities here - use scatter-gather segment, use
@@ -3023,7 +3150,8 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
3023 sgl += 1; 3150 sgl += 1;
3024 first_data_sgl = sgl; 3151 first_data_sgl = sgl;
3025 lpfc_cmd->seg_cnt = nseg; 3152 lpfc_cmd->seg_cnt = nseg;
3026 if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) { 3153 if (!phba->cfg_xpsgl &&
3154 lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
3027 lpfc_printf_log(phba, KERN_ERR, LOG_BG, "9074 BLKGRD:" 3155 lpfc_printf_log(phba, KERN_ERR, LOG_BG, "9074 BLKGRD:"
3028 " %s: Too many sg segments from " 3156 " %s: Too many sg segments from "
3029 "dma_map_sg. Config %d, seg_cnt %d\n", 3157 "dma_map_sg. Config %d, seg_cnt %d\n",
@@ -3044,22 +3172,80 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
3044 * the IOCB. If it can't then the BDEs get added to a BPL as it 3172 * the IOCB. If it can't then the BDEs get added to a BPL as it
3045 * does for SLI-2 mode. 3173 * does for SLI-2 mode.
3046 */ 3174 */
3047 scsi_for_each_sg(scsi_cmnd, sgel, nseg, num_bde) { 3175
3048 physaddr = sg_dma_address(sgel); 3176 /* for tracking segment boundaries */
3049 dma_len = sg_dma_len(sgel); 3177 sgel = scsi_sglist(scsi_cmnd);
3050 sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr)); 3178 j = 2;
3051 sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr)); 3179 for (i = 0; i < nseg; i++) {
3052 sgl->word2 = le32_to_cpu(sgl->word2); 3180 sgl->word2 = 0;
3053 if ((num_bde + 1) == nseg) 3181 if ((num_bde + 1) == nseg) {
3054 bf_set(lpfc_sli4_sge_last, sgl, 1); 3182 bf_set(lpfc_sli4_sge_last, sgl, 1);
3055 else 3183 bf_set(lpfc_sli4_sge_type, sgl,
3184 LPFC_SGE_TYPE_DATA);
3185 } else {
3056 bf_set(lpfc_sli4_sge_last, sgl, 0); 3186 bf_set(lpfc_sli4_sge_last, sgl, 0);
3057 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset); 3187
3058 bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA); 3188 /* do we need to expand the segment */
3059 sgl->word2 = cpu_to_le32(sgl->word2); 3189 if (!lsp_just_set &&
3060 sgl->sge_len = cpu_to_le32(dma_len); 3190 !((j + 1) % phba->border_sge_num) &&
3061 dma_offset += dma_len; 3191 ((nseg - 1) != i)) {
3062 sgl++; 3192 /* set LSP type */
3193 bf_set(lpfc_sli4_sge_type, sgl,
3194 LPFC_SGE_TYPE_LSP);
3195
3196 sgl_xtra = lpfc_get_sgl_per_hdwq(
3197 phba, lpfc_cmd);
3198
3199 if (unlikely(!sgl_xtra)) {
3200 lpfc_cmd->seg_cnt = 0;
3201 scsi_dma_unmap(scsi_cmnd);
3202 return 1;
3203 }
3204 sgl->addr_lo = cpu_to_le32(putPaddrLow(
3205 sgl_xtra->dma_phys_sgl));
3206 sgl->addr_hi = cpu_to_le32(putPaddrHigh(
3207 sgl_xtra->dma_phys_sgl));
3208
3209 } else {
3210 bf_set(lpfc_sli4_sge_type, sgl,
3211 LPFC_SGE_TYPE_DATA);
3212 }
3213 }
3214
3215 if (!(bf_get(lpfc_sli4_sge_type, sgl) &
3216 LPFC_SGE_TYPE_LSP)) {
3217 if ((nseg - 1) == i)
3218 bf_set(lpfc_sli4_sge_last, sgl, 1);
3219
3220 physaddr = sg_dma_address(sgel);
3221 dma_len = sg_dma_len(sgel);
3222 sgl->addr_lo = cpu_to_le32(putPaddrLow(
3223 physaddr));
3224 sgl->addr_hi = cpu_to_le32(putPaddrHigh(
3225 physaddr));
3226
3227 bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
3228 sgl->word2 = cpu_to_le32(sgl->word2);
3229 sgl->sge_len = cpu_to_le32(dma_len);
3230
3231 dma_offset += dma_len;
3232 sgel = sg_next(sgel);
3233
3234 sgl++;
3235 lsp_just_set = false;
3236
3237 } else {
3238 sgl->word2 = cpu_to_le32(sgl->word2);
3239 sgl->sge_len = cpu_to_le32(
3240 phba->cfg_sg_dma_buf_size);
3241
3242 sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
3243 i = i - 1;
3244
3245 lsp_just_set = true;
3246 }
3247
3248 j++;
3063 } 3249 }
3064 /* 3250 /*
3065 * Setup the first Payload BDE. For FCoE we just key off 3251 * Setup the first Payload BDE. For FCoE we just key off
@@ -3175,7 +3361,8 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
3175 lpfc_cmd->seg_cnt = datasegcnt; 3361 lpfc_cmd->seg_cnt = datasegcnt;
3176 3362
3177 /* First check if data segment count from SCSI Layer is good */ 3363 /* First check if data segment count from SCSI Layer is good */
3178 if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) { 3364 if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt &&
3365 !phba->cfg_xpsgl) {
3179 WARN_ON_ONCE(lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt); 3366 WARN_ON_ONCE(lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt);
3180 ret = 2; 3367 ret = 2;
3181 goto err; 3368 goto err;
@@ -3186,13 +3373,15 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
3186 switch (prot_group_type) { 3373 switch (prot_group_type) {
3187 case LPFC_PG_TYPE_NO_DIF: 3374 case LPFC_PG_TYPE_NO_DIF:
3188 /* Here we need to add a DISEED to the count */ 3375 /* Here we need to add a DISEED to the count */
3189 if ((lpfc_cmd->seg_cnt + 1) > phba->cfg_total_seg_cnt) { 3376 if (((lpfc_cmd->seg_cnt + 1) >
3377 phba->cfg_total_seg_cnt) &&
3378 !phba->cfg_xpsgl) {
3190 ret = 2; 3379 ret = 2;
3191 goto err; 3380 goto err;
3192 } 3381 }
3193 3382
3194 num_sge = lpfc_bg_setup_sgl(phba, scsi_cmnd, sgl, 3383 num_sge = lpfc_bg_setup_sgl(phba, scsi_cmnd, sgl,
3195 datasegcnt); 3384 datasegcnt, lpfc_cmd);
3196 3385
3197 /* we should have 2 or more entries in buffer list */ 3386 /* we should have 2 or more entries in buffer list */
3198 if (num_sge < 2) { 3387 if (num_sge < 2) {
@@ -3220,18 +3409,20 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba,
3220 * There is a minimun of 3 SGEs used for every 3409 * There is a minimun of 3 SGEs used for every
3221 * protection data segment. 3410 * protection data segment.
3222 */ 3411 */
3223 if ((lpfc_cmd->prot_seg_cnt * 3) > 3412 if (((lpfc_cmd->prot_seg_cnt * 3) >
3224 (phba->cfg_total_seg_cnt - 2)) { 3413 (phba->cfg_total_seg_cnt - 2)) &&
3414 !phba->cfg_xpsgl) {
3225 ret = 2; 3415 ret = 2;
3226 goto err; 3416 goto err;
3227 } 3417 }
3228 3418
3229 num_sge = lpfc_bg_setup_sgl_prot(phba, scsi_cmnd, sgl, 3419 num_sge = lpfc_bg_setup_sgl_prot(phba, scsi_cmnd, sgl,
3230 datasegcnt, protsegcnt); 3420 datasegcnt, protsegcnt, lpfc_cmd);
3231 3421
3232 /* we should have 3 or more entries in buffer list */ 3422 /* we should have 3 or more entries in buffer list */
3233 if ((num_sge < 3) || 3423 if (num_sge < 3 ||
3234 (num_sge > phba->cfg_total_seg_cnt)) { 3424 (num_sge > phba->cfg_total_seg_cnt &&
3425 !phba->cfg_xpsgl)) {
3235 ret = 2; 3426 ret = 2;
3236 goto err; 3427 goto err;
3237 } 3428 }
@@ -5913,7 +6104,7 @@ struct scsi_host_template lpfc_template_no_hr = {
5913 .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, 6104 .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT,
5914 .cmd_per_lun = LPFC_CMD_PER_LUN, 6105 .cmd_per_lun = LPFC_CMD_PER_LUN,
5915 .shost_attrs = lpfc_hba_attrs, 6106 .shost_attrs = lpfc_hba_attrs,
5916 .max_sectors = 0xFFFF, 6107 .max_sectors = 0xFFFFFFFF,
5917 .vendor_id = LPFC_NL_VENDOR_ID, 6108 .vendor_id = LPFC_NL_VENDOR_ID,
5918 .change_queue_depth = scsi_change_queue_depth, 6109 .change_queue_depth = scsi_change_queue_depth,
5919 .track_queue_depth = 1, 6110 .track_queue_depth = 1,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 940dd82a265b..f4beb9104d3a 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -20233,6 +20233,13 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
20233 spin_unlock_irqrestore(&qp->io_buf_list_put_lock, 20233 spin_unlock_irqrestore(&qp->io_buf_list_put_lock,
20234 iflag); 20234 iflag);
20235 } 20235 }
20236
20237 if (phba->cfg_xpsgl && !phba->nvmet_support &&
20238 !list_empty(&lpfc_ncmd->dma_sgl_xtra_list))
20239 lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd);
20240
20241 if (!list_empty(&lpfc_ncmd->dma_cmd_rsp_list))
20242 lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd);
20236} 20243}
20237 20244
20238/** 20245/**
@@ -20447,3 +20454,288 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
20447 20454
20448 return lpfc_cmd; 20455 return lpfc_cmd;
20449} 20456}
20457
20458/**
20459 * lpfc_get_sgl_per_hdwq - Get one SGL chunk from hdwq's pool
20460 * @phba: The HBA for which this call is being executed.
20461 * @lpfc_buf: IO buf structure to append the SGL chunk
20462 *
20463 * This routine gets one SGL chunk buffer from hdwq's SGL chunk pool,
20464 * and will allocate an SGL chunk if the pool is empty.
20465 *
20466 * Return codes:
20467 * NULL - Error
20468 * Pointer to sli4_hybrid_sgl - Success
20469 **/
20470struct sli4_hybrid_sgl *
20471lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf)
20472{
20473 struct sli4_hybrid_sgl *list_entry = NULL;
20474 struct sli4_hybrid_sgl *tmp = NULL;
20475 struct sli4_hybrid_sgl *allocated_sgl = NULL;
20476 struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
20477 struct list_head *buf_list = &hdwq->sgl_list;
20478
20479 spin_lock_irq(&hdwq->hdwq_lock);
20480
20481 if (likely(!list_empty(buf_list))) {
20482 /* break off 1 chunk from the sgl_list */
20483 list_for_each_entry_safe(list_entry, tmp,
20484 buf_list, list_node) {
20485 list_move_tail(&list_entry->list_node,
20486 &lpfc_buf->dma_sgl_xtra_list);
20487 break;
20488 }
20489 } else {
20490 /* allocate more */
20491 spin_unlock_irq(&hdwq->hdwq_lock);
20492 tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC,
20493 cpu_to_node(smp_processor_id()));
20494 if (!tmp) {
20495 lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
20496 "8353 error kmalloc memory for HDWQ "
20497 "%d %s\n",
20498 lpfc_buf->hdwq_no, __func__);
20499 return NULL;
20500 }
20501
20502 tmp->dma_sgl = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool,
20503 GFP_ATOMIC, &tmp->dma_phys_sgl);
20504 if (!tmp->dma_sgl) {
20505 lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
20506 "8354 error pool_alloc memory for HDWQ "
20507 "%d %s\n",
20508 lpfc_buf->hdwq_no, __func__);
20509 kfree(tmp);
20510 return NULL;
20511 }
20512
20513 spin_lock_irq(&hdwq->hdwq_lock);
20514 list_add_tail(&tmp->list_node, &lpfc_buf->dma_sgl_xtra_list);
20515 }
20516
20517 allocated_sgl = list_last_entry(&lpfc_buf->dma_sgl_xtra_list,
20518 struct sli4_hybrid_sgl,
20519 list_node);
20520
20521 spin_unlock_irq(&hdwq->hdwq_lock);
20522
20523 return allocated_sgl;
20524}
20525
20526/**
20527 * lpfc_put_sgl_per_hdwq - Put one SGL chunk into hdwq pool
20528 * @phba: The HBA for which this call is being executed.
20529 * @lpfc_buf: IO buf structure with the SGL chunk
20530 *
20531 * This routine puts one SGL chunk buffer into hdwq's SGL chunk pool.
20532 *
20533 * Return codes:
20534 * 0 - Success
20535 * -EINVAL - Error
20536 **/
20537int
20538lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf)
20539{
20540 int rc = 0;
20541 struct sli4_hybrid_sgl *list_entry = NULL;
20542 struct sli4_hybrid_sgl *tmp = NULL;
20543 struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
20544 struct list_head *buf_list = &hdwq->sgl_list;
20545
20546 spin_lock_irq(&hdwq->hdwq_lock);
20547
20548 if (likely(!list_empty(&lpfc_buf->dma_sgl_xtra_list))) {
20549 list_for_each_entry_safe(list_entry, tmp,
20550 &lpfc_buf->dma_sgl_xtra_list,
20551 list_node) {
20552 list_move_tail(&list_entry->list_node,
20553 buf_list);
20554 }
20555 } else {
20556 rc = -EINVAL;
20557 }
20558
20559 spin_unlock_irq(&hdwq->hdwq_lock);
20560 return rc;
20561}
20562
20563/**
20564 * lpfc_free_sgl_per_hdwq - Free all SGL chunks of hdwq pool
20565 * @phba: phba object
20566 * @hdwq: hdwq to cleanup sgl buff resources on
20567 *
20568 * This routine frees all SGL chunks of hdwq SGL chunk pool.
20569 *
20570 * Return codes:
20571 * None
20572 **/
20573void
20574lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba,
20575 struct lpfc_sli4_hdw_queue *hdwq)
20576{
20577 struct list_head *buf_list = &hdwq->sgl_list;
20578 struct sli4_hybrid_sgl *list_entry = NULL;
20579 struct sli4_hybrid_sgl *tmp = NULL;
20580
20581 spin_lock_irq(&hdwq->hdwq_lock);
20582
20583 /* Free sgl pool */
20584 list_for_each_entry_safe(list_entry, tmp,
20585 buf_list, list_node) {
20586 dma_pool_free(phba->lpfc_sg_dma_buf_pool,
20587 list_entry->dma_sgl,
20588 list_entry->dma_phys_sgl);
20589 list_del(&list_entry->list_node);
20590 kfree(list_entry);
20591 }
20592
20593 spin_unlock_irq(&hdwq->hdwq_lock);
20594}
20595
20596/**
20597 * lpfc_get_cmd_rsp_buf_per_hdwq - Get one CMD/RSP buffer from hdwq
20598 * @phba: The HBA for which this call is being executed.
20599 * @lpfc_buf: IO buf structure to attach the CMD/RSP buffer
20600 *
20601 * This routine gets one CMD/RSP buffer from hdwq's CMD/RSP pool,
20602 * and will allocate an CMD/RSP buffer if the pool is empty.
20603 *
20604 * Return codes:
20605 * NULL - Error
20606 * Pointer to fcp_cmd_rsp_buf - Success
20607 **/
20608struct fcp_cmd_rsp_buf *
20609lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
20610 struct lpfc_io_buf *lpfc_buf)
20611{
20612 struct fcp_cmd_rsp_buf *list_entry = NULL;
20613 struct fcp_cmd_rsp_buf *tmp = NULL;
20614 struct fcp_cmd_rsp_buf *allocated_buf = NULL;
20615 struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
20616 struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
20617
20618 spin_lock_irq(&hdwq->hdwq_lock);
20619
20620 if (likely(!list_empty(buf_list))) {
20621 /* break off 1 chunk from the list */
20622 list_for_each_entry_safe(list_entry, tmp,
20623 buf_list,
20624 list_node) {
20625 list_move_tail(&list_entry->list_node,
20626 &lpfc_buf->dma_cmd_rsp_list);
20627 break;
20628 }
20629 } else {
20630 /* allocate more */
20631 spin_unlock_irq(&hdwq->hdwq_lock);
20632 tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC,
20633 cpu_to_node(smp_processor_id()));
20634 if (!tmp) {
20635 lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
20636 "8355 error kmalloc memory for HDWQ "
20637 "%d %s\n",
20638 lpfc_buf->hdwq_no, __func__);
20639 return NULL;
20640 }
20641
20642 tmp->fcp_cmnd = dma_pool_alloc(phba->lpfc_cmd_rsp_buf_pool,
20643 GFP_ATOMIC,
20644 &tmp->fcp_cmd_rsp_dma_handle);
20645
20646 if (!tmp->fcp_cmnd) {
20647 lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
20648 "8356 error pool_alloc memory for HDWQ "
20649 "%d %s\n",
20650 lpfc_buf->hdwq_no, __func__);
20651 kfree(tmp);
20652 return NULL;
20653 }
20654
20655 tmp->fcp_rsp = (struct fcp_rsp *)((uint8_t *)tmp->fcp_cmnd +
20656 sizeof(struct fcp_cmnd));
20657
20658 spin_lock_irq(&hdwq->hdwq_lock);
20659 list_add_tail(&tmp->list_node, &lpfc_buf->dma_cmd_rsp_list);
20660 }
20661
20662 allocated_buf = list_last_entry(&lpfc_buf->dma_cmd_rsp_list,
20663 struct fcp_cmd_rsp_buf,
20664 list_node);
20665
20666 spin_unlock_irq(&hdwq->hdwq_lock);
20667
20668 return allocated_buf;
20669}
20670
20671/**
20672 * lpfc_put_cmd_rsp_buf_per_hdwq - Put one CMD/RSP buffer into hdwq pool
20673 * @phba: The HBA for which this call is being executed.
20674 * @lpfc_buf: IO buf structure with the CMD/RSP buf
20675 *
20676 * This routine puts one CMD/RSP buffer into executing CPU's CMD/RSP pool.
20677 *
20678 * Return codes:
20679 * 0 - Success
20680 * -EINVAL - Error
20681 **/
20682int
20683lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
20684 struct lpfc_io_buf *lpfc_buf)
20685{
20686 int rc = 0;
20687 struct fcp_cmd_rsp_buf *list_entry = NULL;
20688 struct fcp_cmd_rsp_buf *tmp = NULL;
20689 struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq;
20690 struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
20691
20692 spin_lock_irq(&hdwq->hdwq_lock);
20693
20694 if (likely(!list_empty(&lpfc_buf->dma_cmd_rsp_list))) {
20695 list_for_each_entry_safe(list_entry, tmp,
20696 &lpfc_buf->dma_cmd_rsp_list,
20697 list_node) {
20698 list_move_tail(&list_entry->list_node,
20699 buf_list);
20700 }
20701 } else {
20702 rc = -EINVAL;
20703 }
20704
20705 spin_unlock_irq(&hdwq->hdwq_lock);
20706 return rc;
20707}
20708
20709/**
20710 * lpfc_free_cmd_rsp_buf_per_hdwq - Free all CMD/RSP chunks of hdwq pool
20711 * @phba: phba object
20712 * @hdwq: hdwq to cleanup cmd rsp buff resources on
20713 *
20714 * This routine frees all CMD/RSP buffers of hdwq's CMD/RSP buf pool.
20715 *
20716 * Return codes:
20717 * None
20718 **/
20719void
20720lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
20721 struct lpfc_sli4_hdw_queue *hdwq)
20722{
20723 struct list_head *buf_list = &hdwq->cmd_rsp_buf_list;
20724 struct fcp_cmd_rsp_buf *list_entry = NULL;
20725 struct fcp_cmd_rsp_buf *tmp = NULL;
20726
20727 spin_lock_irq(&hdwq->hdwq_lock);
20728
20729 /* Free cmd_rsp buf pool */
20730 list_for_each_entry_safe(list_entry, tmp,
20731 buf_list,
20732 list_node) {
20733 dma_pool_free(phba->lpfc_cmd_rsp_buf_pool,
20734 list_entry->fcp_cmnd,
20735 list_entry->fcp_cmd_rsp_dma_handle);
20736 list_del(&list_entry->list_node);
20737 kfree(list_entry);
20738 }
20739
20740 spin_unlock_irq(&hdwq->hdwq_lock);
20741}
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 467b8270f7fd..37fbcb46387e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -365,9 +365,18 @@ struct lpfc_io_buf {
365 /* Common fields */ 365 /* Common fields */
366 struct list_head list; 366 struct list_head list;
367 void *data; 367 void *data;
368
368 dma_addr_t dma_handle; 369 dma_addr_t dma_handle;
369 dma_addr_t dma_phys_sgl; 370 dma_addr_t dma_phys_sgl;
370 struct sli4_sge *dma_sgl; 371
372 struct sli4_sge *dma_sgl; /* initial segment chunk */
373
374 /* linked list of extra sli4_hybrid_sge */
375 struct list_head dma_sgl_xtra_list;
376
377 /* list head for fcp_cmd_rsp buf */
378 struct list_head dma_cmd_rsp_list;
379
371 struct lpfc_iocbq cur_iocbq; 380 struct lpfc_iocbq cur_iocbq;
372 struct lpfc_sli4_hdw_queue *hdwq; 381 struct lpfc_sli4_hdw_queue *hdwq;
373 uint16_t hdwq_no; 382 uint16_t hdwq_no;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 3aeca387b22a..3ec9cf4c6427 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -680,6 +680,13 @@ struct lpfc_sli4_hdw_queue {
680 uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT]; 680 uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
681 uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT]; 681 uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
682#endif 682#endif
683
684 /* Per HDWQ pool resources */
685 struct list_head sgl_list;
686 struct list_head cmd_rsp_buf_list;
687
688 /* Lock for syncing Per HDWQ pool resources */
689 spinlock_t hdwq_lock;
683}; 690};
684 691
685#ifdef LPFC_HDWQ_LOCK_STAT 692#ifdef LPFC_HDWQ_LOCK_STAT
@@ -1089,6 +1096,17 @@ int lpfc_sli4_post_status_check(struct lpfc_hba *);
1089uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *); 1096uint8_t lpfc_sli_config_mbox_subsys_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
1090uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *); 1097uint8_t lpfc_sli_config_mbox_opcode_get(struct lpfc_hba *, LPFC_MBOXQ_t *);
1091void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba); 1098void lpfc_sli4_ras_dma_free(struct lpfc_hba *phba);
1099struct sli4_hybrid_sgl *lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba,
1100 struct lpfc_io_buf *buf);
1101struct fcp_cmd_rsp_buf *lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
1102 struct lpfc_io_buf *buf);
1103int lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *buf);
1104int lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
1105 struct lpfc_io_buf *buf);
1106void lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba,
1107 struct lpfc_sli4_hdw_queue *hdwq);
1108void lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba,
1109 struct lpfc_sli4_hdw_queue *hdwq);
1092static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx) 1110static inline void *lpfc_sli4_qe(struct lpfc_queue *q, uint16_t idx)
1093{ 1111{
1094 return q->q_pgs[idx / q->entry_cnt_per_pg] + 1112 return q->q_pgs[idx / q->entry_cnt_per_pg] +