aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2015-12-15 14:10:44 -0500
committerDoug Ledford <dledford@redhat.com>2015-12-15 14:10:44 -0500
commitc6333f9f9f7646e311248a09e8ed96126a97aba8 (patch)
tree1a86124c5b43fa858f1d8d7beb99bcd29d6d6ab7
parent9f9499ae8e6415cefc4fe0a96ad0e27864353c89 (diff)
parentcfeb91b375ad5f52665e00d374a4e403d2e6063e (diff)
Merge branch 'rdma-cq.2' of git://git.infradead.org/users/hch/rdma into 4.5/rdma-cq
Signed-off-by: Doug Ledford <dledford@redhat.com> Conflicts: drivers/infiniband/ulp/srp/ib_srp.c - Conflicts with changes in ib_srp.c introduced during 4.4-rc updates
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt2
-rw-r--r--block/Makefile2
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/cq.c209
-rw-r--r--drivers/infiniband/core/device.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h102
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c255
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c21
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c270
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c173
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c427
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h48
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/be2iscsi/Kconfig1
-rw-r--r--drivers/scsi/be2iscsi/be.h4
-rw-r--r--drivers/scsi/be2iscsi/be_iscsi.c4
-rw-r--r--drivers/scsi/be2iscsi/be_main.c20
-rw-r--r--drivers/scsi/ipr.c25
-rw-r--r--drivers/scsi/ipr.h4
-rw-r--r--include/linux/blk-iopoll.h46
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/irq_poll.h25
-rw-r--r--include/rdma/ib_verbs.h38
-rw-r--r--include/trace/events/irq.h2
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Makefile1
-rw-r--r--lib/irq_poll.c (renamed from block/blk-iopoll.c)108
-rw-r--r--tools/lib/traceevent/event-parse.c2
-rw-r--r--tools/perf/util/trace-event-parse.c2
32 files changed, 886 insertions, 940 deletions
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index f4cbfe0ba108..edec3a3e648d 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -90,7 +90,7 @@ BLOCK_SOFTIRQ: Do all of the following:
90 from being initiated from tasks that might run on the CPU to 90 from being initiated from tasks that might run on the CPU to
91 be de-jittered. (It is OK to force this CPU offline and then 91 be de-jittered. (It is OK to force this CPU offline and then
92 bring it back online before you start your application.) 92 bring it back online before you start your application.)
93BLOCK_IOPOLL_SOFTIRQ: Do all of the following: 93IRQ_POLL_SOFTIRQ: Do all of the following:
941. Force block-device interrupts onto some other CPU. 941. Force block-device interrupts onto some other CPU.
952. Initiate any block I/O and block-I/O polling on other CPUs. 952. Initiate any block I/O and block-I/O polling on other CPUs.
963. Once your application has started, prevent CPU-hotplug operations 963. Once your application has started, prevent CPU-hotplug operations
diff --git a/block/Makefile b/block/Makefile
index 00ecc97629db..e8504748c7cb 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
5obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ 8 blk-lib.o blk-mq.o blk-mq-tag.o \
9 blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ 9 blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ 10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
11 partitions/ 11 partitions/
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index aa26f3c3416b..282ec0b664fe 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
5 depends on NET 5 depends on NET
6 depends on INET 6 depends on INET
7 depends on m || IPV6 != m 7 depends on m || IPV6 != m
8 select IRQ_POLL
8 ---help--- 9 ---help---
9 Core support for InfiniBand (IB). Make sure to also select 10 Core support for InfiniBand (IB). Make sure to also select
10 any protocols you wish to use as well as drivers for your 11 any protocols you wish to use as well as drivers for your
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d43a8994ac5c..ae48d874012f 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
8obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ 8obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
9 $(user_access-y) 9 $(user_access-y)
10 10
11ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ 11ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
12 device.o fmr_pool.o cache.o netlink.o \ 12 device.o fmr_pool.o cache.o netlink.o \
13 roce_gid_mgmt.o 13 roce_gid_mgmt.o
14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o 14ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
new file mode 100644
index 000000000000..a754fc727de5
--- /dev/null
+++ b/drivers/infiniband/core/cq.c
@@ -0,0 +1,209 @@
1/*
2 * Copyright (c) 2015 HGST, a Western Digital Company.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 */
13#include <linux/module.h>
14#include <linux/err.h>
15#include <linux/slab.h>
16#include <rdma/ib_verbs.h>
17
18/* # of WCs to poll for with a single call to ib_poll_cq */
19#define IB_POLL_BATCH 16
20
21/* # of WCs to iterate over before yielding */
22#define IB_POLL_BUDGET_IRQ 256
23#define IB_POLL_BUDGET_WORKQUEUE 65536
24
25#define IB_POLL_FLAGS \
26 (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
27
28static int __ib_process_cq(struct ib_cq *cq, int budget)
29{
30 int i, n, completed = 0;
31
32 while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
33 for (i = 0; i < n; i++) {
34 struct ib_wc *wc = &cq->wc[i];
35
36 if (wc->wr_cqe)
37 wc->wr_cqe->done(cq, wc);
38 else
39 WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
40 }
41
42 completed += n;
43
44 if (n != IB_POLL_BATCH ||
45 (budget != -1 && completed >= budget))
46 break;
47 }
48
49 return completed;
50}
51
52/**
53 * ib_process_direct_cq - process a CQ in caller context
54 * @cq: CQ to process
55 * @budget: number of CQEs to poll for
56 *
57 * This function is used to process all outstanding CQ entries on a
58 * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
59 * context and does not ask for completion interrupts from the HCA.
60 *
61 * Note: for compatibility reasons -1 can be passed in %budget for unlimited
62 * polling. Do not use this feature in new code, it will be removed soon.
63 */
64int ib_process_cq_direct(struct ib_cq *cq, int budget)
65{
66 WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
67
68 return __ib_process_cq(cq, budget);
69}
70EXPORT_SYMBOL(ib_process_cq_direct);
71
72static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
73{
74 WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
75}
76
77static int ib_poll_handler(struct irq_poll *iop, int budget)
78{
79 struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
80 int completed;
81
82 completed = __ib_process_cq(cq, budget);
83 if (completed < budget) {
84 irq_poll_complete(&cq->iop);
85 if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
86 irq_poll_sched(&cq->iop);
87 }
88
89 return completed;
90}
91
92static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
93{
94 irq_poll_sched(&cq->iop);
95}
96
97static void ib_cq_poll_work(struct work_struct *work)
98{
99 struct ib_cq *cq = container_of(work, struct ib_cq, work);
100 int completed;
101
102 completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
103 if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
104 ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
105 queue_work(ib_comp_wq, &cq->work);
106}
107
108static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
109{
110 queue_work(ib_comp_wq, &cq->work);
111}
112
113/**
114 * ib_alloc_cq - allocate a completion queue
115 * @dev: device to allocate the CQ for
116 * @private: driver private data, accessible from cq->cq_context
117 * @nr_cqe: number of CQEs to allocate
118 * @comp_vector: HCA completion vectors for this CQ
119 * @poll_ctx: context to poll the CQ from.
120 *
121 * This is the proper interface to allocate a CQ for in-kernel users. A
122 * CQ allocated with this interface will automatically be polled from the
123 * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
124 * to use this CQ abstraction.
125 */
126struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
127 int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
128{
129 struct ib_cq_init_attr cq_attr = {
130 .cqe = nr_cqe,
131 .comp_vector = comp_vector,
132 };
133 struct ib_cq *cq;
134 int ret = -ENOMEM;
135
136 cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
137 if (IS_ERR(cq))
138 return cq;
139
140 cq->device = dev;
141 cq->uobject = NULL;
142 cq->event_handler = NULL;
143 cq->cq_context = private;
144 cq->poll_ctx = poll_ctx;
145 atomic_set(&cq->usecnt, 0);
146
147 cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
148 if (!cq->wc)
149 goto out_destroy_cq;
150
151 switch (cq->poll_ctx) {
152 case IB_POLL_DIRECT:
153 cq->comp_handler = ib_cq_completion_direct;
154 break;
155 case IB_POLL_SOFTIRQ:
156 cq->comp_handler = ib_cq_completion_softirq;
157
158 irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
159 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
160 break;
161 case IB_POLL_WORKQUEUE:
162 cq->comp_handler = ib_cq_completion_workqueue;
163 INIT_WORK(&cq->work, ib_cq_poll_work);
164 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
165 break;
166 default:
167 ret = -EINVAL;
168 goto out_free_wc;
169 }
170
171 return cq;
172
173out_free_wc:
174 kfree(cq->wc);
175out_destroy_cq:
176 cq->device->destroy_cq(cq);
177 return ERR_PTR(ret);
178}
179EXPORT_SYMBOL(ib_alloc_cq);
180
181/**
182 * ib_free_cq - free a completion queue
183 * @cq: completion queue to free.
184 */
185void ib_free_cq(struct ib_cq *cq)
186{
187 int ret;
188
189 if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
190 return;
191
192 switch (cq->poll_ctx) {
193 case IB_POLL_DIRECT:
194 break;
195 case IB_POLL_SOFTIRQ:
196 irq_poll_disable(&cq->iop);
197 break;
198 case IB_POLL_WORKQUEUE:
199 flush_work(&cq->work);
200 break;
201 default:
202 WARN_ON_ONCE(1);
203 }
204
205 kfree(cq->wc);
206 ret = cq->device->destroy_cq(cq);
207 WARN_ON_ONCE(ret);
208}
209EXPORT_SYMBOL(ib_free_cq);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 179e8134d57f..6421d2317b6f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct ib_client_data {
58 bool going_down; 58 bool going_down;
59}; 59};
60 60
61struct workqueue_struct *ib_comp_wq;
61struct workqueue_struct *ib_wq; 62struct workqueue_struct *ib_wq;
62EXPORT_SYMBOL_GPL(ib_wq); 63EXPORT_SYMBOL_GPL(ib_wq);
63 64
@@ -954,10 +955,18 @@ static int __init ib_core_init(void)
954 if (!ib_wq) 955 if (!ib_wq)
955 return -ENOMEM; 956 return -ENOMEM;
956 957
958 ib_comp_wq = alloc_workqueue("ib-comp-wq",
959 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
960 WQ_UNBOUND_MAX_ACTIVE);
961 if (!ib_comp_wq) {
962 ret = -ENOMEM;
963 goto err;
964 }
965
957 ret = class_register(&ib_class); 966 ret = class_register(&ib_class);
958 if (ret) { 967 if (ret) {
959 printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); 968 printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
960 goto err; 969 goto err_comp;
961 } 970 }
962 971
963 ret = ibnl_init(); 972 ret = ibnl_init();
@@ -972,7 +981,8 @@ static int __init ib_core_init(void)
972 981
973err_sysfs: 982err_sysfs:
974 class_unregister(&ib_class); 983 class_unregister(&ib_class);
975 984err_comp:
985 destroy_workqueue(ib_comp_wq);
976err: 986err:
977 destroy_workqueue(ib_wq); 987 destroy_workqueue(ib_wq);
978 return ret; 988 return ret;
@@ -983,6 +993,7 @@ static void __exit ib_core_cleanup(void)
983 ib_cache_cleanup(); 993 ib_cache_cleanup();
984 ibnl_cleanup(); 994 ibnl_cleanup();
985 class_unregister(&ib_class); 995 class_unregister(&ib_class);
996 destroy_workqueue(ib_comp_wq);
986 /* Make sure that any pending umem accounting work is done. */ 997 /* Make sure that any pending umem accounting work is done. */
987 destroy_workqueue(ib_wq); 998 destroy_workqueue(ib_wq);
988} 999}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3ae9726efb98..9b014f153442 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
70#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff 70#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
71 71
72static struct ib_send_wr ipoib_cm_rx_drain_wr = { 72static struct ib_send_wr ipoib_cm_rx_drain_wr = {
73 .wr_id = IPOIB_CM_RX_DRAIN_WRID,
74 .opcode = IB_WR_SEND, 73 .opcode = IB_WR_SEND,
75}; 74};
76 75
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
223 * error" WC will be immediately generated for each WR we post. 222 * error" WC will be immediately generated for each WR we post.
224 */ 223 */
225 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); 224 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
225 ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
226 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) 226 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
227 ipoib_warn(priv, "failed to post drain wr\n"); 227 ipoib_warn(priv, "failed to post drain wr\n");
228 228
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 8a5998e6a407..2411680cdde6 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -151,16 +151,12 @@
151 - ISER_MAX_RX_MISC_PDUS) / \ 151 - ISER_MAX_RX_MISC_PDUS) / \
152 (1 + ISER_INFLIGHT_DATAOUTS)) 152 (1 + ISER_INFLIGHT_DATAOUTS))
153 153
154#define ISER_WC_BATCH_COUNT 16
155#define ISER_SIGNAL_CMD_COUNT 32 154#define ISER_SIGNAL_CMD_COUNT 32
156 155
157#define ISER_VER 0x10 156#define ISER_VER 0x10
158#define ISER_WSV 0x08 157#define ISER_WSV 0x08
159#define ISER_RSV 0x04 158#define ISER_RSV 0x04
160 159
161#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
162#define ISER_BEACON_WRID 0xfffffffffffffffeULL
163
164/** 160/**
165 * struct iser_hdr - iSER header 161 * struct iser_hdr - iSER header
166 * 162 *
@@ -269,7 +265,7 @@ enum iser_desc_type {
269#define ISER_MAX_WRS 7 265#define ISER_MAX_WRS 7
270 266
271/** 267/**
272 * struct iser_tx_desc - iSER TX descriptor (for send wr_id) 268 * struct iser_tx_desc - iSER TX descriptor
273 * 269 *
274 * @iser_header: iser header 270 * @iser_header: iser header
275 * @iscsi_header: iscsi header 271 * @iscsi_header: iscsi header
@@ -293,6 +289,7 @@ struct iser_tx_desc {
293 u64 dma_addr; 289 u64 dma_addr;
294 struct ib_sge tx_sg[2]; 290 struct ib_sge tx_sg[2];
295 int num_sge; 291 int num_sge;
292 struct ib_cqe cqe;
296 bool mapped; 293 bool mapped;
297 u8 wr_idx; 294 u8 wr_idx;
298 union iser_wr { 295 union iser_wr {
@@ -306,9 +303,10 @@ struct iser_tx_desc {
306}; 303};
307 304
308#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ 305#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
309 sizeof(u64) + sizeof(struct ib_sge))) 306 sizeof(u64) + sizeof(struct ib_sge) + \
307 sizeof(struct ib_cqe)))
310/** 308/**
311 * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) 309 * struct iser_rx_desc - iSER RX descriptor
312 * 310 *
313 * @iser_header: iser header 311 * @iser_header: iser header
314 * @iscsi_header: iscsi header 312 * @iscsi_header: iscsi header
@@ -323,7 +321,27 @@ struct iser_rx_desc {
323 char data[ISER_RECV_DATA_SEG_LEN]; 321 char data[ISER_RECV_DATA_SEG_LEN];
324 u64 dma_addr; 322 u64 dma_addr;
325 struct ib_sge rx_sg; 323 struct ib_sge rx_sg;
324 struct ib_cqe cqe;
326 char pad[ISER_RX_PAD_SIZE]; 325 char pad[ISER_RX_PAD_SIZE];
326} __packed;
327
328/**
329 * struct iser_login_desc - iSER login descriptor
330 *
331 * @req: pointer to login request buffer
332 * @resp: pointer to login response buffer
333 * @req_dma: DMA address of login request buffer
334 * @rsp_dma: DMA address of login response buffer
335 * @sge: IB sge for login post recv
336 * @cqe: completion handler
337 */
338struct iser_login_desc {
339 void *req;
340 void *rsp;
341 u64 req_dma;
342 u64 rsp_dma;
343 struct ib_sge sge;
344 struct ib_cqe cqe;
327} __attribute__((packed)); 345} __attribute__((packed));
328 346
329struct iser_conn; 347struct iser_conn;
@@ -333,18 +351,12 @@ struct iscsi_iser_task;
333/** 351/**
334 * struct iser_comp - iSER completion context 352 * struct iser_comp - iSER completion context
335 * 353 *
336 * @device: pointer to device handle
337 * @cq: completion queue 354 * @cq: completion queue
338 * @wcs: work completion array
339 * @tasklet: Tasklet handle
340 * @active_qps: Number of active QPs attached 355 * @active_qps: Number of active QPs attached
341 * to completion context 356 * to completion context
342 */ 357 */
343struct iser_comp { 358struct iser_comp {
344 struct iser_device *device;
345 struct ib_cq *cq; 359 struct ib_cq *cq;
346 struct ib_wc wcs[ISER_WC_BATCH_COUNT];
347 struct tasklet_struct tasklet;
348 int active_qps; 360 int active_qps;
349}; 361};
350 362
@@ -475,10 +487,11 @@ struct iser_fr_pool {
475 * @rx_wr: receive work request for batch posts 487 * @rx_wr: receive work request for batch posts
476 * @device: reference to iser device 488 * @device: reference to iser device
477 * @comp: iser completion context 489 * @comp: iser completion context
478 * @pi_support: Indicate device T10-PI support
479 * @beacon: beacon send wr to signal all flush errors were drained
480 * @flush_comp: completes when all connection completions consumed
481 * @fr_pool: connection fast registration poool 490 * @fr_pool: connection fast registration poool
491 * @pi_support: Indicate device T10-PI support
492 * @last: last send wr to signal all flush errors were drained
493 * @last_cqe: cqe handler for last wr
494 * @last_comp: completes when all connection completions consumed
482 */ 495 */
483struct ib_conn { 496struct ib_conn {
484 struct rdma_cm_id *cma_id; 497 struct rdma_cm_id *cma_id;
@@ -488,10 +501,12 @@ struct ib_conn {
488 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; 501 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
489 struct iser_device *device; 502 struct iser_device *device;
490 struct iser_comp *comp; 503 struct iser_comp *comp;
491 bool pi_support;
492 struct ib_send_wr beacon;
493 struct completion flush_comp;
494 struct iser_fr_pool fr_pool; 504 struct iser_fr_pool fr_pool;
505 bool pi_support;
506 struct ib_send_wr last;
507 struct ib_cqe last_cqe;
508 struct ib_cqe reg_cqe;
509 struct completion last_comp;
495}; 510};
496 511
497/** 512/**
@@ -514,11 +529,7 @@ struct ib_conn {
514 * @up_completion: connection establishment completed 529 * @up_completion: connection establishment completed
515 * (state is ISER_CONN_UP) 530 * (state is ISER_CONN_UP)
516 * @conn_list: entry in ig conn list 531 * @conn_list: entry in ig conn list
517 * @login_buf: login data buffer (stores login parameters) 532 * @login_desc: login descriptor
518 * @login_req_buf: login request buffer
519 * @login_req_dma: login request buffer dma address
520 * @login_resp_buf: login response buffer
521 * @login_resp_dma: login response buffer dma address
522 * @rx_desc_head: head of rx_descs cyclic buffer 533 * @rx_desc_head: head of rx_descs cyclic buffer
523 * @rx_descs: rx buffers array (cyclic buffer) 534 * @rx_descs: rx buffers array (cyclic buffer)
524 * @num_rx_descs: number of rx descriptors 535 * @num_rx_descs: number of rx descriptors
@@ -541,10 +552,7 @@ struct iser_conn {
541 struct completion ib_completion; 552 struct completion ib_completion;
542 struct completion up_completion; 553 struct completion up_completion;
543 struct list_head conn_list; 554 struct list_head conn_list;
544 555 struct iser_login_desc login_desc;
545 char *login_buf;
546 char *login_req_buf, *login_resp_buf;
547 u64 login_req_dma, login_resp_dma;
548 unsigned int rx_desc_head; 556 unsigned int rx_desc_head;
549 struct iser_rx_desc *rx_descs; 557 struct iser_rx_desc *rx_descs;
550 u32 num_rx_descs; 558 u32 num_rx_descs;
@@ -633,12 +641,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
633 641
634void iser_release_work(struct work_struct *work); 642void iser_release_work(struct work_struct *work);
635 643
636void iser_rcv_completion(struct iser_rx_desc *desc, 644void iser_err_comp(struct ib_wc *wc, const char *type);
637 unsigned long dto_xfer_len, 645void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
638 struct ib_conn *ib_conn); 646void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
639 647void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
640void iser_snd_completion(struct iser_tx_desc *desc, 648void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
641 struct ib_conn *ib_conn); 649void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
650void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
651void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
642 652
643void iser_task_rdma_init(struct iscsi_iser_task *task); 653void iser_task_rdma_init(struct iscsi_iser_task *task);
644 654
@@ -719,4 +729,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
719 return cur_wr; 729 return cur_wr;
720} 730}
721 731
732static inline struct iser_conn *
733to_iser_conn(struct ib_conn *ib_conn)
734{
735 return container_of(ib_conn, struct iser_conn, ib_conn);
736}
737
738static inline struct iser_rx_desc *
739iser_rx(struct ib_cqe *cqe)
740{
741 return container_of(cqe, struct iser_rx_desc, cqe);
742}
743
744static inline struct iser_tx_desc *
745iser_tx(struct ib_cqe *cqe)
746{
747 return container_of(cqe, struct iser_tx_desc, cqe);
748}
749
750static inline struct iser_login_desc *
751iser_login(struct ib_cqe *cqe)
752{
753 return container_of(cqe, struct iser_login_desc, cqe);
754}
755
722#endif 756#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ffd00c420729..44ecb89689f5 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -174,73 +174,63 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
174static void iser_free_login_buf(struct iser_conn *iser_conn) 174static void iser_free_login_buf(struct iser_conn *iser_conn)
175{ 175{
176 struct iser_device *device = iser_conn->ib_conn.device; 176 struct iser_device *device = iser_conn->ib_conn.device;
177 struct iser_login_desc *desc = &iser_conn->login_desc;
177 178
178 if (!iser_conn->login_buf) 179 if (!desc->req)
179 return; 180 return;
180 181
181 if (iser_conn->login_req_dma) 182 ib_dma_unmap_single(device->ib_device, desc->req_dma,
182 ib_dma_unmap_single(device->ib_device, 183 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
183 iser_conn->login_req_dma,
184 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
185 184
186 if (iser_conn->login_resp_dma) 185 ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
187 ib_dma_unmap_single(device->ib_device, 186 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
188 iser_conn->login_resp_dma,
189 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
190 187
191 kfree(iser_conn->login_buf); 188 kfree(desc->req);
189 kfree(desc->rsp);
192 190
193 /* make sure we never redo any unmapping */ 191 /* make sure we never redo any unmapping */
194 iser_conn->login_req_dma = 0; 192 desc->req = NULL;
195 iser_conn->login_resp_dma = 0; 193 desc->rsp = NULL;
196 iser_conn->login_buf = NULL;
197} 194}
198 195
199static int iser_alloc_login_buf(struct iser_conn *iser_conn) 196static int iser_alloc_login_buf(struct iser_conn *iser_conn)
200{ 197{
201 struct iser_device *device = iser_conn->ib_conn.device; 198 struct iser_device *device = iser_conn->ib_conn.device;
202 int req_err, resp_err; 199 struct iser_login_desc *desc = &iser_conn->login_desc;
203 200
204 BUG_ON(device == NULL); 201 desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
205 202 if (!desc->req)
206 iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + 203 return -ENOMEM;
207 ISER_RX_LOGIN_SIZE, GFP_KERNEL); 204
208 if (!iser_conn->login_buf) 205 desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
209 goto out_err; 206 ISCSI_DEF_MAX_RECV_SEG_LEN,
210 207 DMA_TO_DEVICE);
211 iser_conn->login_req_buf = iser_conn->login_buf; 208 if (ib_dma_mapping_error(device->ib_device,
212 iser_conn->login_resp_buf = iser_conn->login_buf + 209 desc->req_dma))
213 ISCSI_DEF_MAX_RECV_SEG_LEN; 210 goto free_req;
214 211
215 iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, 212 desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
216 iser_conn->login_req_buf, 213 if (!desc->rsp)
217 ISCSI_DEF_MAX_RECV_SEG_LEN, 214 goto unmap_req;
218 DMA_TO_DEVICE); 215
219 216 desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
220 iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, 217 ISER_RX_LOGIN_SIZE,
221 iser_conn->login_resp_buf, 218 DMA_FROM_DEVICE);
222 ISER_RX_LOGIN_SIZE, 219 if (ib_dma_mapping_error(device->ib_device,
223 DMA_FROM_DEVICE); 220 desc->rsp_dma))
224 221 goto free_rsp;
225 req_err = ib_dma_mapping_error(device->ib_device, 222
226 iser_conn->login_req_dma);
227 resp_err = ib_dma_mapping_error(device->ib_device,
228 iser_conn->login_resp_dma);
229
230 if (req_err || resp_err) {
231 if (req_err)
232 iser_conn->login_req_dma = 0;
233 if (resp_err)
234 iser_conn->login_resp_dma = 0;
235 goto free_login_buf;
236 }
237 return 0; 223 return 0;
238 224
239free_login_buf: 225free_rsp:
240 iser_free_login_buf(iser_conn); 226 kfree(desc->rsp);
227unmap_req:
228 ib_dma_unmap_single(device->ib_device, desc->req_dma,
229 ISCSI_DEF_MAX_RECV_SEG_LEN,
230 DMA_TO_DEVICE);
231free_req:
232 kfree(desc->req);
241 233
242out_err:
243 iser_err("unable to alloc or map login buf\n");
244 return -ENOMEM; 234 return -ENOMEM;
245} 235}
246 236
@@ -280,11 +270,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
280 goto rx_desc_dma_map_failed; 270 goto rx_desc_dma_map_failed;
281 271
282 rx_desc->dma_addr = dma_addr; 272 rx_desc->dma_addr = dma_addr;
283 273 rx_desc->cqe.done = iser_task_rsp;
284 rx_sg = &rx_desc->rx_sg; 274 rx_sg = &rx_desc->rx_sg;
285 rx_sg->addr = rx_desc->dma_addr; 275 rx_sg->addr = rx_desc->dma_addr;
286 rx_sg->length = ISER_RX_PAYLOAD_SIZE; 276 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
287 rx_sg->lkey = device->pd->local_dma_lkey; 277 rx_sg->lkey = device->pd->local_dma_lkey;
288 } 278 }
289 279
290 iser_conn->rx_desc_head = 0; 280 iser_conn->rx_desc_head = 0;
@@ -383,6 +373,7 @@ int iser_send_command(struct iscsi_conn *conn,
383 373
384 /* build the tx desc regd header and add it to the tx desc dto */ 374 /* build the tx desc regd header and add it to the tx desc dto */
385 tx_desc->type = ISCSI_TX_SCSI_COMMAND; 375 tx_desc->type = ISCSI_TX_SCSI_COMMAND;
376 tx_desc->cqe.done = iser_cmd_comp;
386 iser_create_send_desc(iser_conn, tx_desc); 377 iser_create_send_desc(iser_conn, tx_desc);
387 378
388 if (hdr->flags & ISCSI_FLAG_CMD_READ) { 379 if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +455,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
464 } 455 }
465 456
466 tx_desc->type = ISCSI_TX_DATAOUT; 457 tx_desc->type = ISCSI_TX_DATAOUT;
458 tx_desc->cqe.done = iser_dataout_comp;
467 tx_desc->iser_header.flags = ISER_VER; 459 tx_desc->iser_header.flags = ISER_VER;
468 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); 460 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
469 461
@@ -513,6 +505,7 @@ int iser_send_control(struct iscsi_conn *conn,
513 505
514 /* build the tx desc regd header and add it to the tx desc dto */ 506 /* build the tx desc regd header and add it to the tx desc dto */
515 mdesc->type = ISCSI_TX_CONTROL; 507 mdesc->type = ISCSI_TX_CONTROL;
508 mdesc->cqe.done = iser_ctrl_comp;
516 iser_create_send_desc(iser_conn, mdesc); 509 iser_create_send_desc(iser_conn, mdesc);
517 510
518 device = iser_conn->ib_conn.device; 511 device = iser_conn->ib_conn.device;
@@ -520,25 +513,25 @@ int iser_send_control(struct iscsi_conn *conn,
520 data_seg_len = ntoh24(task->hdr->dlength); 513 data_seg_len = ntoh24(task->hdr->dlength);
521 514
522 if (data_seg_len > 0) { 515 if (data_seg_len > 0) {
516 struct iser_login_desc *desc = &iser_conn->login_desc;
523 struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; 517 struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
518
524 if (task != conn->login_task) { 519 if (task != conn->login_task) {
525 iser_err("data present on non login task!!!\n"); 520 iser_err("data present on non login task!!!\n");
526 goto send_control_error; 521 goto send_control_error;
527 } 522 }
528 523
529 ib_dma_sync_single_for_cpu(device->ib_device, 524 ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
530 iser_conn->login_req_dma, task->data_count, 525 task->data_count, DMA_TO_DEVICE);
531 DMA_TO_DEVICE);
532 526
533 memcpy(iser_conn->login_req_buf, task->data, task->data_count); 527 memcpy(desc->req, task->data, task->data_count);
534 528
535 ib_dma_sync_single_for_device(device->ib_device, 529 ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
536 iser_conn->login_req_dma, task->data_count, 530 task->data_count, DMA_TO_DEVICE);
537 DMA_TO_DEVICE);
538 531
539 tx_dsg->addr = iser_conn->login_req_dma; 532 tx_dsg->addr = desc->req_dma;
540 tx_dsg->length = task->data_count; 533 tx_dsg->length = task->data_count;
541 tx_dsg->lkey = device->pd->local_dma_lkey; 534 tx_dsg->lkey = device->pd->local_dma_lkey;
542 mdesc->num_sge = 2; 535 mdesc->num_sge = 2;
543 } 536 }
544 537
@@ -562,41 +555,69 @@ send_control_error:
562 return err; 555 return err;
563} 556}
564 557
565/** 558void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
566 * iser_rcv_dto_completion - recv DTO completion
567 */
568void iser_rcv_completion(struct iser_rx_desc *rx_desc,
569 unsigned long rx_xfer_len,
570 struct ib_conn *ib_conn)
571{ 559{
572 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 560 struct ib_conn *ib_conn = wc->qp->qp_context;
573 ib_conn); 561 struct iser_conn *iser_conn = to_iser_conn(ib_conn);
562 struct iser_login_desc *desc = iser_login(wc->wr_cqe);
574 struct iscsi_hdr *hdr; 563 struct iscsi_hdr *hdr;
575 u64 rx_dma; 564 char *data;
576 int rx_buflen, outstanding, count, err; 565 int length;
577 566
578 /* differentiate between login to all other PDUs */ 567 if (unlikely(wc->status != IB_WC_SUCCESS)) {
579 if ((char *)rx_desc == iser_conn->login_resp_buf) { 568 iser_err_comp(wc, "login_rsp");
580 rx_dma = iser_conn->login_resp_dma; 569 return;
581 rx_buflen = ISER_RX_LOGIN_SIZE;
582 } else {
583 rx_dma = rx_desc->dma_addr;
584 rx_buflen = ISER_RX_PAYLOAD_SIZE;
585 } 570 }
586 571
587 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, 572 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
588 rx_buflen, DMA_FROM_DEVICE); 573 desc->rsp_dma, ISER_RX_LOGIN_SIZE,
574 DMA_FROM_DEVICE);
589 575
590 hdr = &rx_desc->iscsi_header; 576 hdr = desc->rsp + sizeof(struct iser_hdr);
577 data = desc->rsp + ISER_HEADERS_LEN;
578 length = wc->byte_len - ISER_HEADERS_LEN;
591 579
592 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, 580 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
593 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); 581 hdr->itt, length);
582
583 iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
584
585 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
586 desc->rsp_dma, ISER_RX_LOGIN_SIZE,
587 DMA_FROM_DEVICE);
588
589 ib_conn->post_recv_buf_count--;
590}
591
592void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
593{
594 struct ib_conn *ib_conn = wc->qp->qp_context;
595 struct iser_conn *iser_conn = to_iser_conn(ib_conn);
596 struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
597 struct iscsi_hdr *hdr;
598 int length;
599 int outstanding, count, err;
594 600
595 iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, 601 if (unlikely(wc->status != IB_WC_SUCCESS)) {
596 rx_xfer_len - ISER_HEADERS_LEN); 602 iser_err_comp(wc, "task_rsp");
603 return;
604 }
597 605
598 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, 606 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
599 rx_buflen, DMA_FROM_DEVICE); 607 desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
608 DMA_FROM_DEVICE);
609
610 hdr = &desc->iscsi_header;
611 length = wc->byte_len - ISER_HEADERS_LEN;
612
613 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
614 hdr->itt, length);
615
616 iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
617
618 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
619 desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
620 DMA_FROM_DEVICE);
600 621
601 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 622 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
602 * task eliminates the need to worry on tasks which are completed in * 623 * task eliminates the need to worry on tasks which are completed in *
@@ -604,9 +625,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
604 * for the posted rx bufs refcount to become zero handles everything */ 625 * for the posted rx bufs refcount to become zero handles everything */
605 ib_conn->post_recv_buf_count--; 626 ib_conn->post_recv_buf_count--;
606 627
607 if (rx_dma == iser_conn->login_resp_dma)
608 return;
609
610 outstanding = ib_conn->post_recv_buf_count; 628 outstanding = ib_conn->post_recv_buf_count;
611 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { 629 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
612 count = min(iser_conn->qp_max_recv_dtos - outstanding, 630 count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +635,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
617 } 635 }
618} 636}
619 637
620void iser_snd_completion(struct iser_tx_desc *tx_desc, 638void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
621 struct ib_conn *ib_conn) 639{
640 if (unlikely(wc->status != IB_WC_SUCCESS))
641 iser_err_comp(wc, "command");
642}
643
644void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
622{ 645{
646 struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
623 struct iscsi_task *task; 647 struct iscsi_task *task;
624 struct iser_device *device = ib_conn->device;
625 648
626 if (tx_desc->type == ISCSI_TX_DATAOUT) { 649 if (unlikely(wc->status != IB_WC_SUCCESS)) {
627 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 650 iser_err_comp(wc, "control");
628 ISER_HEADERS_LEN, DMA_TO_DEVICE); 651 return;
629 kmem_cache_free(ig.desc_cache, tx_desc);
630 tx_desc = NULL;
631 } 652 }
632 653
633 if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { 654 /* this arithmetic is legal by libiscsi dd_data allocation */
634 /* this arithmetic is legal by libiscsi dd_data allocation */ 655 task = (void *)desc - sizeof(struct iscsi_task);
635 task = (void *) ((long)(void *)tx_desc - 656 if (task->hdr->itt == RESERVED_ITT)
636 sizeof(struct iscsi_task)); 657 iscsi_put_task(task);
637 if (task->hdr->itt == RESERVED_ITT) 658}
638 iscsi_put_task(task); 659
639 } 660void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
661{
662 struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
663 struct ib_conn *ib_conn = wc->qp->qp_context;
664 struct iser_device *device = ib_conn->device;
665
666 if (unlikely(wc->status != IB_WC_SUCCESS))
667 iser_err_comp(wc, "dataout");
668
669 ib_dma_unmap_single(device->ib_device, desc->dma_addr,
670 ISER_HEADERS_LEN, DMA_TO_DEVICE);
671 kmem_cache_free(ig.desc_cache, desc);
672}
673
674void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
675{
676 struct ib_conn *ib_conn = wc->qp->qp_context;
677
678 complete(&ib_conn->last_comp);
640} 679}
641 680
642void iser_task_rdma_init(struct iscsi_iser_task *iser_task) 681void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ea765fb9664d..76ca09bac806 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -67,6 +67,11 @@ static struct iser_reg_ops fmr_ops = {
67 .reg_desc_put = iser_reg_desc_put_fmr, 67 .reg_desc_put = iser_reg_desc_put_fmr,
68}; 68};
69 69
70void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
71{
72 iser_err_comp(wc, "memreg");
73}
74
70int iser_assign_reg_ops(struct iser_device *device) 75int iser_assign_reg_ops(struct iser_device *device)
71{ 76{
72 struct ib_device_attr *dev_attr = &device->dev_attr; 77 struct ib_device_attr *dev_attr = &device->dev_attr;
@@ -414,12 +419,14 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
414} 419}
415 420
416static void 421static void
417iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) 422iser_inv_rkey(struct ib_send_wr *inv_wr,
423 struct ib_mr *mr,
424 struct ib_cqe *cqe)
418{ 425{
419 u32 rkey; 426 u32 rkey;
420 427
421 inv_wr->opcode = IB_WR_LOCAL_INV; 428 inv_wr->opcode = IB_WR_LOCAL_INV;
422 inv_wr->wr_id = ISER_FASTREG_LI_WRID; 429 inv_wr->wr_cqe = cqe;
423 inv_wr->ex.invalidate_rkey = mr->rkey; 430 inv_wr->ex.invalidate_rkey = mr->rkey;
424 inv_wr->send_flags = 0; 431 inv_wr->send_flags = 0;
425 inv_wr->num_sge = 0; 432 inv_wr->num_sge = 0;
@@ -437,6 +444,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
437{ 444{
438 struct iser_tx_desc *tx_desc = &iser_task->desc; 445 struct iser_tx_desc *tx_desc = &iser_task->desc;
439 struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; 446 struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
447 struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
440 struct ib_sig_handover_wr *wr; 448 struct ib_sig_handover_wr *wr;
441 int ret; 449 int ret;
442 450
@@ -448,11 +456,11 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
448 iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); 456 iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
449 457
450 if (!pi_ctx->sig_mr_valid) 458 if (!pi_ctx->sig_mr_valid)
451 iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr); 459 iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr, cqe);
452 460
453 wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); 461 wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
454 wr->wr.opcode = IB_WR_REG_SIG_MR; 462 wr->wr.opcode = IB_WR_REG_SIG_MR;
455 wr->wr.wr_id = ISER_FASTREG_LI_WRID; 463 wr->wr.wr_cqe = cqe;
456 wr->wr.sg_list = &data_reg->sge; 464 wr->wr.sg_list = &data_reg->sge;
457 wr->wr.num_sge = 1; 465 wr->wr.num_sge = 1;
458 wr->wr.send_flags = 0; 466 wr->wr.send_flags = 0;
@@ -485,12 +493,13 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
485 struct iser_mem_reg *reg) 493 struct iser_mem_reg *reg)
486{ 494{
487 struct iser_tx_desc *tx_desc = &iser_task->desc; 495 struct iser_tx_desc *tx_desc = &iser_task->desc;
496 struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
488 struct ib_mr *mr = rsc->mr; 497 struct ib_mr *mr = rsc->mr;
489 struct ib_reg_wr *wr; 498 struct ib_reg_wr *wr;
490 int n; 499 int n;
491 500
492 if (!rsc->mr_valid) 501 if (!rsc->mr_valid)
493 iser_inv_rkey(iser_tx_next_wr(tx_desc), mr); 502 iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
494 503
495 n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); 504 n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
496 if (unlikely(n != mem->size)) { 505 if (unlikely(n != mem->size)) {
@@ -501,7 +510,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
501 510
502 wr = reg_wr(iser_tx_next_wr(tx_desc)); 511 wr = reg_wr(iser_tx_next_wr(tx_desc));
503 wr->wr.opcode = IB_WR_REG_MR; 512 wr->wr.opcode = IB_WR_REG_MR;
504 wr->wr.wr_id = ISER_FASTREG_LI_WRID; 513 wr->wr.wr_cqe = cqe;
505 wr->wr.send_flags = 0; 514 wr->wr.send_flags = 0;
506 wr->wr.num_sge = 0; 515 wr->wr.num_sge = 0;
507 wr->mr = mr; 516 wr->mr = mr;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 42f4da620f2e..2f2fc42ca836 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -44,17 +44,6 @@
44#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ 44#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
45 ISCSI_ISER_MAX_CONN) 45 ISCSI_ISER_MAX_CONN)
46 46
47static int iser_cq_poll_limit = 512;
48
49static void iser_cq_tasklet_fn(unsigned long data);
50static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
51
52static void iser_cq_event_callback(struct ib_event *cause, void *context)
53{
54 iser_err("cq event %s (%d)\n",
55 ib_event_msg(cause->event), cause->event);
56}
57
58static void iser_qp_event_callback(struct ib_event *cause, void *context) 47static void iser_qp_event_callback(struct ib_event *cause, void *context)
59{ 48{
60 iser_err("qp event %s (%d)\n", 49 iser_err("qp event %s (%d)\n",
@@ -110,27 +99,14 @@ static int iser_create_device_ib_res(struct iser_device *device)
110 goto pd_err; 99 goto pd_err;
111 100
112 for (i = 0; i < device->comps_used; i++) { 101 for (i = 0; i < device->comps_used; i++) {
113 struct ib_cq_init_attr cq_attr = {};
114 struct iser_comp *comp = &device->comps[i]; 102 struct iser_comp *comp = &device->comps[i];
115 103
116 comp->device = device; 104 comp->cq = ib_alloc_cq(device->ib_device, comp, max_cqe, i,
117 cq_attr.cqe = max_cqe; 105 IB_POLL_SOFTIRQ);
118 cq_attr.comp_vector = i;
119 comp->cq = ib_create_cq(device->ib_device,
120 iser_cq_callback,
121 iser_cq_event_callback,
122 (void *)comp,
123 &cq_attr);
124 if (IS_ERR(comp->cq)) { 106 if (IS_ERR(comp->cq)) {
125 comp->cq = NULL; 107 comp->cq = NULL;
126 goto cq_err; 108 goto cq_err;
127 } 109 }
128
129 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
130 goto cq_err;
131
132 tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
133 (unsigned long)comp);
134 } 110 }
135 111
136 if (!iser_always_reg) { 112 if (!iser_always_reg) {
@@ -140,7 +116,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
140 116
141 device->mr = ib_get_dma_mr(device->pd, access); 117 device->mr = ib_get_dma_mr(device->pd, access);
142 if (IS_ERR(device->mr)) 118 if (IS_ERR(device->mr))
143 goto dma_mr_err; 119 goto cq_err;
144 } 120 }
145 121
146 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 122 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
@@ -153,15 +129,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
153handler_err: 129handler_err:
154 if (device->mr) 130 if (device->mr)
155 ib_dereg_mr(device->mr); 131 ib_dereg_mr(device->mr);
156dma_mr_err:
157 for (i = 0; i < device->comps_used; i++)
158 tasklet_kill(&device->comps[i].tasklet);
159cq_err: 132cq_err:
160 for (i = 0; i < device->comps_used; i++) { 133 for (i = 0; i < device->comps_used; i++) {
161 struct iser_comp *comp = &device->comps[i]; 134 struct iser_comp *comp = &device->comps[i];
162 135
163 if (comp->cq) 136 if (comp->cq)
164 ib_destroy_cq(comp->cq); 137 ib_free_cq(comp->cq);
165 } 138 }
166 ib_dealloc_pd(device->pd); 139 ib_dealloc_pd(device->pd);
167pd_err: 140pd_err:
@@ -182,8 +155,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
182 for (i = 0; i < device->comps_used; i++) { 155 for (i = 0; i < device->comps_used; i++) {
183 struct iser_comp *comp = &device->comps[i]; 156 struct iser_comp *comp = &device->comps[i];
184 157
185 tasklet_kill(&comp->tasklet); 158 ib_free_cq(comp->cq);
186 ib_destroy_cq(comp->cq);
187 comp->cq = NULL; 159 comp->cq = NULL;
188 } 160 }
189 161
@@ -461,8 +433,7 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
461 */ 433 */
462static int iser_create_ib_conn_res(struct ib_conn *ib_conn) 434static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
463{ 435{
464 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 436 struct iser_conn *iser_conn = to_iser_conn(ib_conn);
465 ib_conn);
466 struct iser_device *device; 437 struct iser_device *device;
467 struct ib_device_attr *dev_attr; 438 struct ib_device_attr *dev_attr;
468 struct ib_qp_init_attr init_attr; 439 struct ib_qp_init_attr init_attr;
@@ -724,13 +695,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
724 iser_conn, err); 695 iser_conn, err);
725 696
726 /* post an indication that all flush errors were consumed */ 697 /* post an indication that all flush errors were consumed */
727 err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr); 698 err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
728 if (err) { 699 if (err) {
729 iser_err("conn %p failed to post beacon", ib_conn); 700 iser_err("conn %p failed to post last wr", ib_conn);
730 return 1; 701 return 1;
731 } 702 }
732 703
733 wait_for_completion(&ib_conn->flush_comp); 704 wait_for_completion(&ib_conn->last_comp);
734 } 705 }
735 706
736 return 1; 707 return 1;
@@ -967,14 +938,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
967 938
968void iser_conn_init(struct iser_conn *iser_conn) 939void iser_conn_init(struct iser_conn *iser_conn)
969{ 940{
941 struct ib_conn *ib_conn = &iser_conn->ib_conn;
942
970 iser_conn->state = ISER_CONN_INIT; 943 iser_conn->state = ISER_CONN_INIT;
971 iser_conn->ib_conn.post_recv_buf_count = 0;
972 init_completion(&iser_conn->ib_conn.flush_comp);
973 init_completion(&iser_conn->stop_completion); 944 init_completion(&iser_conn->stop_completion);
974 init_completion(&iser_conn->ib_completion); 945 init_completion(&iser_conn->ib_completion);
975 init_completion(&iser_conn->up_completion); 946 init_completion(&iser_conn->up_completion);
976 INIT_LIST_HEAD(&iser_conn->conn_list); 947 INIT_LIST_HEAD(&iser_conn->conn_list);
977 mutex_init(&iser_conn->state_mutex); 948 mutex_init(&iser_conn->state_mutex);
949
950 ib_conn->post_recv_buf_count = 0;
951 ib_conn->reg_cqe.done = iser_reg_comp;
952 ib_conn->last_cqe.done = iser_last_comp;
953 ib_conn->last.wr_cqe = &ib_conn->last_cqe;
954 ib_conn->last.opcode = IB_WR_SEND;
955 init_completion(&ib_conn->last_comp);
978} 956}
979 957
980 /** 958 /**
@@ -1000,9 +978,6 @@ int iser_connect(struct iser_conn *iser_conn,
1000 978
1001 iser_conn->state = ISER_CONN_PENDING; 979 iser_conn->state = ISER_CONN_PENDING;
1002 980
1003 ib_conn->beacon.wr_id = ISER_BEACON_WRID;
1004 ib_conn->beacon.opcode = IB_WR_SEND;
1005
1006 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, 981 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
1007 (void *)iser_conn, 982 (void *)iser_conn,
1008 RDMA_PS_TCP, IB_QPT_RC); 983 RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1020,60 @@ connect_failure:
1045 1020
1046int iser_post_recvl(struct iser_conn *iser_conn) 1021int iser_post_recvl(struct iser_conn *iser_conn)
1047{ 1022{
1048 struct ib_recv_wr rx_wr, *rx_wr_failed;
1049 struct ib_conn *ib_conn = &iser_conn->ib_conn; 1023 struct ib_conn *ib_conn = &iser_conn->ib_conn;
1050 struct ib_sge sge; 1024 struct iser_login_desc *desc = &iser_conn->login_desc;
1025 struct ib_recv_wr wr, *wr_failed;
1051 int ib_ret; 1026 int ib_ret;
1052 1027
1053 sge.addr = iser_conn->login_resp_dma; 1028 desc->sge.addr = desc->rsp_dma;
1054 sge.length = ISER_RX_LOGIN_SIZE; 1029 desc->sge.length = ISER_RX_LOGIN_SIZE;
1055 sge.lkey = ib_conn->device->pd->local_dma_lkey; 1030 desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
1056 1031
1057 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; 1032 desc->cqe.done = iser_login_rsp;
1058 rx_wr.sg_list = &sge; 1033 wr.wr_cqe = &desc->cqe;
1059 rx_wr.num_sge = 1; 1034 wr.sg_list = &desc->sge;
1060 rx_wr.next = NULL; 1035 wr.num_sge = 1;
1036 wr.next = NULL;
1061 1037
1062 ib_conn->post_recv_buf_count++; 1038 ib_conn->post_recv_buf_count++;
1063 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 1039 ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
1064 if (ib_ret) { 1040 if (ib_ret) {
1065 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1041 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1066 ib_conn->post_recv_buf_count--; 1042 ib_conn->post_recv_buf_count--;
1067 } 1043 }
1044
1068 return ib_ret; 1045 return ib_ret;
1069} 1046}
1070 1047
1071int iser_post_recvm(struct iser_conn *iser_conn, int count) 1048int iser_post_recvm(struct iser_conn *iser_conn, int count)
1072{ 1049{
1073 struct ib_recv_wr *rx_wr, *rx_wr_failed;
1074 int i, ib_ret;
1075 struct ib_conn *ib_conn = &iser_conn->ib_conn; 1050 struct ib_conn *ib_conn = &iser_conn->ib_conn;
1076 unsigned int my_rx_head = iser_conn->rx_desc_head; 1051 unsigned int my_rx_head = iser_conn->rx_desc_head;
1077 struct iser_rx_desc *rx_desc; 1052 struct iser_rx_desc *rx_desc;
1053 struct ib_recv_wr *wr, *wr_failed;
1054 int i, ib_ret;
1078 1055
1079 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 1056 for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
1080 rx_desc = &iser_conn->rx_descs[my_rx_head]; 1057 rx_desc = &iser_conn->rx_descs[my_rx_head];
1081 rx_wr->wr_id = (uintptr_t)rx_desc; 1058 rx_desc->cqe.done = iser_task_rsp;
1082 rx_wr->sg_list = &rx_desc->rx_sg; 1059 wr->wr_cqe = &rx_desc->cqe;
1083 rx_wr->num_sge = 1; 1060 wr->sg_list = &rx_desc->rx_sg;
1084 rx_wr->next = rx_wr + 1; 1061 wr->num_sge = 1;
1062 wr->next = wr + 1;
1085 my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; 1063 my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
1086 } 1064 }
1087 1065
1088 rx_wr--; 1066 wr--;
1089 rx_wr->next = NULL; /* mark end of work requests list */ 1067 wr->next = NULL; /* mark end of work requests list */
1090 1068
1091 ib_conn->post_recv_buf_count += count; 1069 ib_conn->post_recv_buf_count += count;
1092 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 1070 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
1093 if (ib_ret) { 1071 if (ib_ret) {
1094 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1072 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1095 ib_conn->post_recv_buf_count -= count; 1073 ib_conn->post_recv_buf_count -= count;
1096 } else 1074 } else
1097 iser_conn->rx_desc_head = my_rx_head; 1075 iser_conn->rx_desc_head = my_rx_head;
1076
1098 return ib_ret; 1077 return ib_ret;
1099} 1078}
1100 1079
@@ -1115,7 +1094,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1115 DMA_TO_DEVICE); 1094 DMA_TO_DEVICE);
1116 1095
1117 wr->next = NULL; 1096 wr->next = NULL;
1118 wr->wr_id = (uintptr_t)tx_desc; 1097 wr->wr_cqe = &tx_desc->cqe;
1119 wr->sg_list = tx_desc->tx_sg; 1098 wr->sg_list = tx_desc->tx_sg;
1120 wr->num_sge = tx_desc->num_sge; 1099 wr->num_sge = tx_desc->num_sge;
1121 wr->opcode = IB_WR_SEND; 1100 wr->opcode = IB_WR_SEND;
@@ -1129,149 +1108,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1129 return ib_ret; 1108 return ib_ret;
1130} 1109}
1131 1110
1132/**
1133 * is_iser_tx_desc - Indicate if the completion wr_id
1134 * is a TX descriptor or not.
1135 * @iser_conn: iser connection
1136 * @wr_id: completion WR identifier
1137 *
1138 * Since we cannot rely on wc opcode in FLUSH errors
1139 * we must work around it by checking if the wr_id address
1140 * falls in the iser connection rx_descs buffer. If so
1141 * it is an RX descriptor, otherwize it is a TX.
1142 */
1143static inline bool
1144is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
1145{
1146 void *start = iser_conn->rx_descs;
1147 int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
1148
1149 if (wr_id >= start && wr_id < start + len)
1150 return false;
1151
1152 return true;
1153}
1154
1155/**
1156 * iser_handle_comp_error() - Handle error completion
1157 * @ib_conn: connection RDMA resources
1158 * @wc: work completion
1159 *
1160 * Notes: We may handle a FLUSH error completion and in this case
1161 * we only cleanup in case TX type was DATAOUT. For non-FLUSH
1162 * error completion we should also notify iscsi layer that
1163 * connection is failed (in case we passed bind stage).
1164 */
1165static void
1166iser_handle_comp_error(struct ib_conn *ib_conn,
1167 struct ib_wc *wc)
1168{
1169 void *wr_id = (void *)(uintptr_t)wc->wr_id;
1170 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
1171 ib_conn);
1172
1173 if (wc->status != IB_WC_WR_FLUSH_ERR)
1174 if (iser_conn->iscsi_conn)
1175 iscsi_conn_failure(iser_conn->iscsi_conn,
1176 ISCSI_ERR_CONN_FAILED);
1177
1178 if (wc->wr_id == ISER_FASTREG_LI_WRID)
1179 return;
1180
1181 if (is_iser_tx_desc(iser_conn, wr_id)) {
1182 struct iser_tx_desc *desc = wr_id;
1183
1184 if (desc->type == ISCSI_TX_DATAOUT)
1185 kmem_cache_free(ig.desc_cache, desc);
1186 } else {
1187 ib_conn->post_recv_buf_count--;
1188 }
1189}
1190
1191/**
1192 * iser_handle_wc - handle a single work completion
1193 * @wc: work completion
1194 *
1195 * Soft-IRQ context, work completion can be either
1196 * SEND or RECV, and can turn out successful or
1197 * with error (or flush error).
1198 */
1199static void iser_handle_wc(struct ib_wc *wc)
1200{
1201 struct ib_conn *ib_conn;
1202 struct iser_tx_desc *tx_desc;
1203 struct iser_rx_desc *rx_desc;
1204
1205 ib_conn = wc->qp->qp_context;
1206 if (likely(wc->status == IB_WC_SUCCESS)) {
1207 if (wc->opcode == IB_WC_RECV) {
1208 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
1209 iser_rcv_completion(rx_desc, wc->byte_len,
1210 ib_conn);
1211 } else
1212 if (wc->opcode == IB_WC_SEND) {
1213 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
1214 iser_snd_completion(tx_desc, ib_conn);
1215 } else {
1216 iser_err("Unknown wc opcode %d\n", wc->opcode);
1217 }
1218 } else {
1219 if (wc->status != IB_WC_WR_FLUSH_ERR)
1220 iser_err("%s (%d): wr id %llx vend_err %x\n",
1221 ib_wc_status_msg(wc->status), wc->status,
1222 wc->wr_id, wc->vendor_err);
1223 else
1224 iser_dbg("%s (%d): wr id %llx\n",
1225 ib_wc_status_msg(wc->status), wc->status,
1226 wc->wr_id);
1227
1228 if (wc->wr_id == ISER_BEACON_WRID)
1229 /* all flush errors were consumed */
1230 complete(&ib_conn->flush_comp);
1231 else
1232 iser_handle_comp_error(ib_conn, wc);
1233 }
1234}
1235
1236/**
1237 * iser_cq_tasklet_fn - iSER completion polling loop
1238 * @data: iSER completion context
1239 *
1240 * Soft-IRQ context, polling connection CQ until
1241 * either CQ was empty or we exausted polling budget
1242 */
1243static void iser_cq_tasklet_fn(unsigned long data)
1244{
1245 struct iser_comp *comp = (struct iser_comp *)data;
1246 struct ib_cq *cq = comp->cq;
1247 struct ib_wc *const wcs = comp->wcs;
1248 int i, n, completed = 0;
1249
1250 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
1251 for (i = 0; i < n; i++)
1252 iser_handle_wc(&wcs[i]);
1253
1254 completed += n;
1255 if (completed >= iser_cq_poll_limit)
1256 break;
1257 }
1258
1259 /*
1260 * It is assumed here that arming CQ only once its empty
1261 * would not cause interrupts to be missed.
1262 */
1263 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1264
1265 iser_dbg("got %d completions\n", completed);
1266}
1267
1268static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
1269{
1270 struct iser_comp *comp = cq_context;
1271
1272 tasklet_schedule(&comp->tasklet);
1273}
1274
1275u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, 1111u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
1276 enum iser_data_dir cmd_dir, sector_t *sector) 1112 enum iser_data_dir cmd_dir, sector_t *sector)
1277{ 1113{
@@ -1319,3 +1155,21 @@ err:
1319 /* Not alot we can do here, return ambiguous guard error */ 1155 /* Not alot we can do here, return ambiguous guard error */
1320 return 0x1; 1156 return 0x1;
1321} 1157}
1158
1159void iser_err_comp(struct ib_wc *wc, const char *type)
1160{
1161 if (wc->status != IB_WC_WR_FLUSH_ERR) {
1162 struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
1163
1164 iser_err("%s failure: %s (%d) vend_err %x\n", type,
1165 ib_wc_status_msg(wc->status), wc->status,
1166 wc->vendor_err);
1167
1168 if (iser_conn->iscsi_conn)
1169 iscsi_conn_failure(iser_conn->iscsi_conn,
1170 ISCSI_ERR_CONN_FAILED);
1171 } else {
1172 iser_dbg("%s failure: %s (%d)\n", type,
1173 ib_wc_status_msg(wc->status), wc->status);
1174 }
1175}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 3db9a659719b..20fa332ced8a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
132 132
133static void srp_add_one(struct ib_device *device); 133static void srp_add_one(struct ib_device *device);
134static void srp_remove_one(struct ib_device *device, void *client_data); 134static void srp_remove_one(struct ib_device *device, void *client_data);
135static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr); 135static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
136static void srp_send_completion(struct ib_cq *cq, void *ch_ptr); 136static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
137 const char *opname);
137static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 138static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
138 139
139static struct scsi_transport_template *ib_srp_transport_template; 140static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
445 dev->max_pages_per_mr); 446 dev->max_pages_per_mr);
446} 447}
447 448
449static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
450{
451 struct srp_rdma_ch *ch = cq->cq_context;
452
453 complete(&ch->done);
454}
455
456static struct ib_cqe srp_drain_cqe = {
457 .done = srp_drain_done,
458};
459
448/** 460/**
449 * srp_destroy_qp() - destroy an RDMA queue pair 461 * srp_destroy_qp() - destroy an RDMA queue pair
450 * @ch: SRP RDMA channel. 462 * @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
457static void srp_destroy_qp(struct srp_rdma_ch *ch) 469static void srp_destroy_qp(struct srp_rdma_ch *ch)
458{ 470{
459 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 471 static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
460 static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; 472 static struct ib_recv_wr wr = { 0 };
461 struct ib_recv_wr *bad_wr; 473 struct ib_recv_wr *bad_wr;
462 int ret; 474 int ret;
463 475
476 wr.wr_cqe = &srp_drain_cqe;
464 /* Destroying a QP and reusing ch->done is only safe if not connected */ 477 /* Destroying a QP and reusing ch->done is only safe if not connected */
465 WARN_ON_ONCE(ch->connected); 478 WARN_ON_ONCE(ch->connected);
466 479
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
489 struct ib_fmr_pool *fmr_pool = NULL; 502 struct ib_fmr_pool *fmr_pool = NULL;
490 struct srp_fr_pool *fr_pool = NULL; 503 struct srp_fr_pool *fr_pool = NULL;
491 const int m = dev->use_fast_reg ? 3 : 1; 504 const int m = dev->use_fast_reg ? 3 : 1;
492 struct ib_cq_init_attr cq_attr = {};
493 int ret; 505 int ret;
494 506
495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 507 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
496 if (!init_attr) 508 if (!init_attr)
497 return -ENOMEM; 509 return -ENOMEM;
498 510
499 /* + 1 for SRP_LAST_WR_ID */ 511 /* queue_size + 1 for ib_drain_qp */
500 cq_attr.cqe = target->queue_size + 1; 512 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
501 cq_attr.comp_vector = ch->comp_vector; 513 ch->comp_vector, IB_POLL_SOFTIRQ);
502 recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
503 &cq_attr);
504 if (IS_ERR(recv_cq)) { 514 if (IS_ERR(recv_cq)) {
505 ret = PTR_ERR(recv_cq); 515 ret = PTR_ERR(recv_cq);
506 goto err; 516 goto err;
507 } 517 }
508 518
509 cq_attr.cqe = m * target->queue_size; 519 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
510 cq_attr.comp_vector = ch->comp_vector; 520 ch->comp_vector, IB_POLL_DIRECT);
511 send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
512 &cq_attr);
513 if (IS_ERR(send_cq)) { 521 if (IS_ERR(send_cq)) {
514 ret = PTR_ERR(send_cq); 522 ret = PTR_ERR(send_cq);
515 goto err_recv_cq; 523 goto err_recv_cq;
516 } 524 }
517 525
518 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
519
520 init_attr->event_handler = srp_qp_event; 526 init_attr->event_handler = srp_qp_event;
521 init_attr->cap.max_send_wr = m * target->queue_size; 527 init_attr->cap.max_send_wr = m * target->queue_size;
522 init_attr->cap.max_recv_wr = target->queue_size + 1; 528 init_attr->cap.max_recv_wr = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
558 if (ch->qp) 564 if (ch->qp)
559 srp_destroy_qp(ch); 565 srp_destroy_qp(ch);
560 if (ch->recv_cq) 566 if (ch->recv_cq)
561 ib_destroy_cq(ch->recv_cq); 567 ib_free_cq(ch->recv_cq);
562 if (ch->send_cq) 568 if (ch->send_cq)
563 ib_destroy_cq(ch->send_cq); 569 ib_free_cq(ch->send_cq);
564 570
565 ch->qp = qp; 571 ch->qp = qp;
566 ch->recv_cq = recv_cq; 572 ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
580 return 0; 586 return 0;
581 587
582err_qp: 588err_qp:
583 ib_destroy_qp(qp); 589 srp_destroy_qp(ch);
584 590
585err_send_cq: 591err_send_cq:
586 ib_destroy_cq(send_cq); 592 ib_free_cq(send_cq);
587 593
588err_recv_cq: 594err_recv_cq:
589 ib_destroy_cq(recv_cq); 595 ib_free_cq(recv_cq);
590 596
591err: 597err:
592 kfree(init_attr); 598 kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
622 if (ch->fmr_pool) 628 if (ch->fmr_pool)
623 ib_destroy_fmr_pool(ch->fmr_pool); 629 ib_destroy_fmr_pool(ch->fmr_pool);
624 } 630 }
631
625 srp_destroy_qp(ch); 632 srp_destroy_qp(ch);
626 ib_destroy_cq(ch->send_cq); 633 ib_free_cq(ch->send_cq);
627 ib_destroy_cq(ch->recv_cq); 634 ib_free_cq(ch->recv_cq);
628 635
629 /* 636 /*
630 * Avoid that the SCSI error handler tries to use this channel after 637 * Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
1041 return ret <= 0 ? ret : -ENODEV; 1048 return ret <= 0 ? ret : -ENODEV;
1042} 1049}
1043 1050
1044static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey) 1051static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1052{
1053 srp_handle_qp_err(cq, wc, "INV RKEY");
1054}
1055
1056static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1057 u32 rkey)
1045{ 1058{
1046 struct ib_send_wr *bad_wr; 1059 struct ib_send_wr *bad_wr;
1047 struct ib_send_wr wr = { 1060 struct ib_send_wr wr = {
1048 .opcode = IB_WR_LOCAL_INV, 1061 .opcode = IB_WR_LOCAL_INV,
1049 .wr_id = LOCAL_INV_WR_ID_MASK,
1050 .next = NULL, 1062 .next = NULL,
1051 .num_sge = 0, 1063 .num_sge = 0,
1052 .send_flags = 0, 1064 .send_flags = 0,
1053 .ex.invalidate_rkey = rkey, 1065 .ex.invalidate_rkey = rkey,
1054 }; 1066 };
1055 1067
1068 wr.wr_cqe = &req->reg_cqe;
1069 req->reg_cqe.done = srp_inv_rkey_err_done;
1056 return ib_post_send(ch->qp, &wr, &bad_wr); 1070 return ib_post_send(ch->qp, &wr, &bad_wr);
1057} 1071}
1058 1072
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
1074 struct srp_fr_desc **pfr; 1088 struct srp_fr_desc **pfr;
1075 1089
1076 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1090 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1077 res = srp_inv_rkey(ch, (*pfr)->mr->rkey); 1091 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1078 if (res < 0) { 1092 if (res < 0) {
1079 shost_printk(KERN_ERR, target->scsi_host, PFX 1093 shost_printk(KERN_ERR, target->scsi_host, PFX
1080 "Queueing INV WR for rkey %#x failed (%d)\n", 1094 "Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
1312 return 0; 1326 return 0;
1313} 1327}
1314 1328
1329static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1330{
1331 srp_handle_qp_err(cq, wc, "FAST REG");
1332}
1333
1315static int srp_map_finish_fr(struct srp_map_state *state, 1334static int srp_map_finish_fr(struct srp_map_state *state,
1335 struct srp_request *req,
1316 struct srp_rdma_ch *ch, int sg_nents) 1336 struct srp_rdma_ch *ch, int sg_nents)
1317{ 1337{
1318 struct srp_target_port *target = ch->target; 1338 struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1349 if (unlikely(n < 0)) 1369 if (unlikely(n < 0))
1350 return n; 1370 return n;
1351 1371
1372 req->reg_cqe.done = srp_reg_mr_err_done;
1373
1352 wr.wr.next = NULL; 1374 wr.wr.next = NULL;
1353 wr.wr.opcode = IB_WR_REG_MR; 1375 wr.wr.opcode = IB_WR_REG_MR;
1354 wr.wr.wr_id = FAST_REG_WR_ID_MASK; 1376 wr.wr.wr_cqe = &req->reg_cqe;
1355 wr.wr.num_sge = 0; 1377 wr.wr.num_sge = 0;
1356 wr.wr.send_flags = 0; 1378 wr.wr.send_flags = 0;
1357 wr.mr = desc->mr; 1379 wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1455 while (count) { 1477 while (count) {
1456 int i, n; 1478 int i, n;
1457 1479
1458 n = srp_map_finish_fr(state, ch, count); 1480 n = srp_map_finish_fr(state, req, ch, count);
1459 if (unlikely(n < 0)) 1481 if (unlikely(n < 0))
1460 return n; 1482 return n;
1461 1483
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1524#ifdef CONFIG_NEED_SG_DMA_LENGTH 1546#ifdef CONFIG_NEED_SG_DMA_LENGTH
1525 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1547 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1526#endif 1548#endif
1527 ret = srp_map_finish_fr(&state, ch, 1); 1549 ret = srp_map_finish_fr(&state, req, ch, 1);
1528 if (ret < 0) 1550 if (ret < 0)
1529 return ret; 1551 return ret;
1530 } else if (dev->use_fmr) { 1552 } else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1719 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 1741 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1720 struct srp_iu *iu; 1742 struct srp_iu *iu;
1721 1743
1722 srp_send_completion(ch->send_cq, ch); 1744 ib_process_cq_direct(ch->send_cq, -1);
1723 1745
1724 if (list_empty(&ch->free_tx)) 1746 if (list_empty(&ch->free_tx))
1725 return NULL; 1747 return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1739 return iu; 1761 return iu;
1740} 1762}
1741 1763
1764static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1765{
1766 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1767 struct srp_rdma_ch *ch = cq->cq_context;
1768
1769 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1770 srp_handle_qp_err(cq, wc, "SEND");
1771 return;
1772 }
1773
1774 list_add(&iu->list, &ch->free_tx);
1775}
1776
1742static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 1777static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1743{ 1778{
1744 struct srp_target_port *target = ch->target; 1779 struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1749 list.length = len; 1784 list.length = len;
1750 list.lkey = target->lkey; 1785 list.lkey = target->lkey;
1751 1786
1787 iu->cqe.done = srp_send_done;
1788
1752 wr.next = NULL; 1789 wr.next = NULL;
1753 wr.wr_id = (uintptr_t) iu; 1790 wr.wr_cqe = &iu->cqe;
1754 wr.sg_list = &list; 1791 wr.sg_list = &list;
1755 wr.num_sge = 1; 1792 wr.num_sge = 1;
1756 wr.opcode = IB_WR_SEND; 1793 wr.opcode = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1769 list.length = iu->size; 1806 list.length = iu->size;
1770 list.lkey = target->lkey; 1807 list.lkey = target->lkey;
1771 1808
1809 iu->cqe.done = srp_recv_done;
1810
1772 wr.next = NULL; 1811 wr.next = NULL;
1773 wr.wr_id = (uintptr_t) iu; 1812 wr.wr_cqe = &iu->cqe;
1774 wr.sg_list = &list; 1813 wr.sg_list = &list;
1775 wr.num_sge = 1; 1814 wr.num_sge = 1;
1776 1815
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
1902 "problems processing SRP_AER_REQ\n"); 1941 "problems processing SRP_AER_REQ\n");
1903} 1942}
1904 1943
1905static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc) 1944static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1906{ 1945{
1946 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1947 struct srp_rdma_ch *ch = cq->cq_context;
1907 struct srp_target_port *target = ch->target; 1948 struct srp_target_port *target = ch->target;
1908 struct ib_device *dev = target->srp_host->srp_dev->dev; 1949 struct ib_device *dev = target->srp_host->srp_dev->dev;
1909 struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
1910 int res; 1950 int res;
1911 u8 opcode; 1951 u8 opcode;
1912 1952
1953 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1954 srp_handle_qp_err(cq, wc, "RECV");
1955 return;
1956 }
1957
1913 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 1958 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
1914 DMA_FROM_DEVICE); 1959 DMA_FROM_DEVICE);
1915 1960
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
1972 srp_start_tl_fail_timers(target->rport); 2017 srp_start_tl_fail_timers(target->rport);
1973} 2018}
1974 2019
1975static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, 2020static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
1976 bool send_err, struct srp_rdma_ch *ch) 2021 const char *opname)
1977{ 2022{
2023 struct srp_rdma_ch *ch = cq->cq_context;
1978 struct srp_target_port *target = ch->target; 2024 struct srp_target_port *target = ch->target;
1979 2025
1980 if (wr_id == SRP_LAST_WR_ID) {
1981 complete(&ch->done);
1982 return;
1983 }
1984
1985 if (ch->connected && !target->qp_in_error) { 2026 if (ch->connected && !target->qp_in_error) {
1986 if (wr_id & LOCAL_INV_WR_ID_MASK) { 2027 shost_printk(KERN_ERR, target->scsi_host,
1987 shost_printk(KERN_ERR, target->scsi_host, PFX 2028 PFX "failed %s status %s (%d) for CQE %p\n",
1988 "LOCAL_INV failed with status %s (%d)\n", 2029 opname, ib_wc_status_msg(wc->status), wc->status,
1989 ib_wc_status_msg(wc_status), wc_status); 2030 wc->wr_cqe);
1990 } else if (wr_id & FAST_REG_WR_ID_MASK) {
1991 shost_printk(KERN_ERR, target->scsi_host, PFX
1992 "FAST_REG_MR failed status %s (%d)\n",
1993 ib_wc_status_msg(wc_status), wc_status);
1994 } else {
1995 shost_printk(KERN_ERR, target->scsi_host,
1996 PFX "failed %s status %s (%d) for iu %p\n",
1997 send_err ? "send" : "receive",
1998 ib_wc_status_msg(wc_status), wc_status,
1999 (void *)(uintptr_t)wr_id);
2000 }
2001 queue_work(system_long_wq, &target->tl_err_work); 2031 queue_work(system_long_wq, &target->tl_err_work);
2002 } 2032 }
2003 target->qp_in_error = true; 2033 target->qp_in_error = true;
2004} 2034}
2005 2035
2006static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
2007{
2008 struct srp_rdma_ch *ch = ch_ptr;
2009 struct ib_wc wc;
2010
2011 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2012 while (ib_poll_cq(cq, 1, &wc) > 0) {
2013 if (likely(wc.status == IB_WC_SUCCESS)) {
2014 srp_handle_recv(ch, &wc);
2015 } else {
2016 srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
2017 }
2018 }
2019}
2020
2021static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
2022{
2023 struct srp_rdma_ch *ch = ch_ptr;
2024 struct ib_wc wc;
2025 struct srp_iu *iu;
2026
2027 while (ib_poll_cq(cq, 1, &wc) > 0) {
2028 if (likely(wc.status == IB_WC_SUCCESS)) {
2029 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
2030 list_add(&iu->list, &ch->free_tx);
2031 } else {
2032 srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
2033 }
2034 }
2035}
2036
2037static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2036static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2038{ 2037{
2039 struct srp_target_port *target = host_to_target(shost); 2038 struct srp_target_port *target = host_to_target(shost);
@@ -3587,8 +3586,6 @@ static int __init srp_init_module(void)
3587{ 3586{
3588 int ret; 3587 int ret;
3589 3588
3590 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
3591
3592 if (srp_sg_tablesize) { 3589 if (srp_sg_tablesize) {
3593 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 3590 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3594 if (!cmd_sg_entries) 3591 if (!cmd_sg_entries)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index f6af531f9f32..9e05ce4a04fd 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -66,11 +66,6 @@ enum {
66 SRP_TAG_TSK_MGMT = 1U << 31, 66 SRP_TAG_TSK_MGMT = 1U << 31,
67 67
68 SRP_MAX_PAGES_PER_MR = 512, 68 SRP_MAX_PAGES_PER_MR = 512,
69
70 LOCAL_INV_WR_ID_MASK = 1,
71 FAST_REG_WR_ID_MASK = 2,
72
73 SRP_LAST_WR_ID = 0xfffffffcU,
74}; 69};
75 70
76enum srp_target_state { 71enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
128 struct srp_direct_buf *indirect_desc; 123 struct srp_direct_buf *indirect_desc;
129 dma_addr_t indirect_dma_addr; 124 dma_addr_t indirect_dma_addr;
130 short nmdesc; 125 short nmdesc;
126 struct ib_cqe reg_cqe;
131}; 127};
132 128
133/** 129/**
@@ -231,6 +227,7 @@ struct srp_iu {
231 void *buf; 227 void *buf;
232 size_t size; 228 size_t size;
233 enum dma_data_direction direction; 229 enum dma_data_direction direction;
230 struct ib_cqe cqe;
234}; 231};
235 232
236/** 233/**
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2e2fe818ca9f..8068affe25b5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
93static struct ib_client srpt_client; 93static struct ib_client srpt_client;
94static void srpt_release_channel(struct srpt_rdma_ch *ch); 94static void srpt_release_channel(struct srpt_rdma_ch *ch);
95static int srpt_queue_status(struct se_cmd *cmd); 95static int srpt_queue_status(struct se_cmd *cmd);
96static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
97static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
96 98
97/** 99/**
98 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. 100 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -778,12 +780,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
778 struct ib_recv_wr wr, *bad_wr; 780 struct ib_recv_wr wr, *bad_wr;
779 781
780 BUG_ON(!sdev); 782 BUG_ON(!sdev);
781 wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
782
783 list.addr = ioctx->ioctx.dma; 783 list.addr = ioctx->ioctx.dma;
784 list.length = srp_max_req_size; 784 list.length = srp_max_req_size;
785 list.lkey = sdev->pd->local_dma_lkey; 785 list.lkey = sdev->pd->local_dma_lkey;
786 786
787 ioctx->ioctx.cqe.done = srpt_recv_done;
788 wr.wr_cqe = &ioctx->ioctx.cqe;
787 wr.next = NULL; 789 wr.next = NULL;
788 wr.sg_list = &list; 790 wr.sg_list = &list;
789 wr.num_sge = 1; 791 wr.num_sge = 1;
@@ -819,8 +821,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
819 list.length = len; 821 list.length = len;
820 list.lkey = sdev->pd->local_dma_lkey; 822 list.lkey = sdev->pd->local_dma_lkey;
821 823
824 ioctx->ioctx.cqe.done = srpt_send_done;
822 wr.next = NULL; 825 wr.next = NULL;
823 wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); 826 wr.wr_cqe = &ioctx->ioctx.cqe;
824 wr.sg_list = &list; 827 wr.sg_list = &list;
825 wr.num_sge = 1; 828 wr.num_sge = 1;
826 wr.opcode = IB_WR_SEND; 829 wr.opcode = IB_WR_SEND;
@@ -1052,13 +1055,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1052 1055
1053 BUG_ON(!ch); 1056 BUG_ON(!ch);
1054 BUG_ON(!ioctx); 1057 BUG_ON(!ioctx);
1055 BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius); 1058 BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
1056 1059
1057 while (ioctx->n_rdma) 1060 while (ioctx->n_rdma)
1058 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge); 1061 kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
1059 1062
1060 kfree(ioctx->rdma_ius); 1063 kfree(ioctx->rdma_wrs);
1061 ioctx->rdma_ius = NULL; 1064 ioctx->rdma_wrs = NULL;
1062 1065
1063 if (ioctx->mapped_sg_count) { 1066 if (ioctx->mapped_sg_count) {
1064 sg = ioctx->sg; 1067 sg = ioctx->sg;
@@ -1082,7 +1085,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1082 struct scatterlist *sg, *sg_orig; 1085 struct scatterlist *sg, *sg_orig;
1083 int sg_cnt; 1086 int sg_cnt;
1084 enum dma_data_direction dir; 1087 enum dma_data_direction dir;
1085 struct rdma_iu *riu; 1088 struct ib_rdma_wr *riu;
1086 struct srp_direct_buf *db; 1089 struct srp_direct_buf *db;
1087 dma_addr_t dma_addr; 1090 dma_addr_t dma_addr;
1088 struct ib_sge *sge; 1091 struct ib_sge *sge;
@@ -1109,23 +1112,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1109 1112
1110 ioctx->mapped_sg_count = count; 1113 ioctx->mapped_sg_count = count;
1111 1114
1112 if (ioctx->rdma_ius && ioctx->n_rdma_ius) 1115 if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
1113 nrdma = ioctx->n_rdma_ius; 1116 nrdma = ioctx->n_rdma_wrs;
1114 else { 1117 else {
1115 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE 1118 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
1116 + ioctx->n_rbuf; 1119 + ioctx->n_rbuf;
1117 1120
1118 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL); 1121 ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
1119 if (!ioctx->rdma_ius) 1122 GFP_KERNEL);
1123 if (!ioctx->rdma_wrs)
1120 goto free_mem; 1124 goto free_mem;
1121 1125
1122 ioctx->n_rdma_ius = nrdma; 1126 ioctx->n_rdma_wrs = nrdma;
1123 } 1127 }
1124 1128
1125 db = ioctx->rbufs; 1129 db = ioctx->rbufs;
1126 tsize = cmd->data_length; 1130 tsize = cmd->data_length;
1127 dma_len = ib_sg_dma_len(dev, &sg[0]); 1131 dma_len = ib_sg_dma_len(dev, &sg[0]);
1128 riu = ioctx->rdma_ius; 1132 riu = ioctx->rdma_wrs;
1129 1133
1130 /* 1134 /*
1131 * For each remote desc - calculate the #ib_sge. 1135 * For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1143,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1139 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1143 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1140 rsize = be32_to_cpu(db->len); 1144 rsize = be32_to_cpu(db->len);
1141 raddr = be64_to_cpu(db->va); 1145 raddr = be64_to_cpu(db->va);
1142 riu->raddr = raddr; 1146 riu->remote_addr = raddr;
1143 riu->rkey = be32_to_cpu(db->key); 1147 riu->rkey = be32_to_cpu(db->key);
1144 riu->sge_cnt = 0; 1148 riu->wr.num_sge = 0;
1145 1149
1146 /* calculate how many sge required for this remote_buf */ 1150 /* calculate how many sge required for this remote_buf */
1147 while (rsize > 0 && tsize > 0) { 1151 while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1169,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1165 rsize = 0; 1169 rsize = 0;
1166 } 1170 }
1167 1171
1168 ++riu->sge_cnt; 1172 ++riu->wr.num_sge;
1169 1173
1170 if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) { 1174 if (rsize > 0 &&
1175 riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
1171 ++ioctx->n_rdma; 1176 ++ioctx->n_rdma;
1172 riu->sge = 1177 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1173 kmalloc(riu->sge_cnt * sizeof *riu->sge, 1178 sizeof(*riu->wr.sg_list),
1174 GFP_KERNEL); 1179 GFP_KERNEL);
1175 if (!riu->sge) 1180 if (!riu->wr.sg_list)
1176 goto free_mem; 1181 goto free_mem;
1177 1182
1178 ++riu; 1183 ++riu;
1179 riu->sge_cnt = 0; 1184 riu->wr.num_sge = 0;
1180 riu->raddr = raddr; 1185 riu->remote_addr = raddr;
1181 riu->rkey = be32_to_cpu(db->key); 1186 riu->rkey = be32_to_cpu(db->key);
1182 } 1187 }
1183 } 1188 }
1184 1189
1185 ++ioctx->n_rdma; 1190 ++ioctx->n_rdma;
1186 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge, 1191 riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
1187 GFP_KERNEL); 1192 sizeof(*riu->wr.sg_list),
1188 if (!riu->sge) 1193 GFP_KERNEL);
1194 if (!riu->wr.sg_list)
1189 goto free_mem; 1195 goto free_mem;
1190 } 1196 }
1191 1197
1192 db = ioctx->rbufs; 1198 db = ioctx->rbufs;
1193 tsize = cmd->data_length; 1199 tsize = cmd->data_length;
1194 riu = ioctx->rdma_ius; 1200 riu = ioctx->rdma_wrs;
1195 sg = sg_orig; 1201 sg = sg_orig;
1196 dma_len = ib_sg_dma_len(dev, &sg[0]); 1202 dma_len = ib_sg_dma_len(dev, &sg[0]);
1197 dma_addr = ib_sg_dma_address(dev, &sg[0]); 1203 dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1206,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1200 for (i = 0, j = 0; 1206 for (i = 0, j = 0;
1201 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { 1207 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1202 rsize = be32_to_cpu(db->len); 1208 rsize = be32_to_cpu(db->len);
1203 sge = riu->sge; 1209 sge = riu->wr.sg_list;
1204 k = 0; 1210 k = 0;
1205 1211
1206 while (rsize > 0 && tsize > 0) { 1212 while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1238,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1232 } 1238 }
1233 1239
1234 ++k; 1240 ++k;
1235 if (k == riu->sge_cnt && rsize > 0 && tsize > 0) { 1241 if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
1236 ++riu; 1242 ++riu;
1237 sge = riu->sge; 1243 sge = riu->wr.sg_list;
1238 k = 0; 1244 k = 0;
1239 } else if (rsize > 0 && tsize > 0) 1245 } else if (rsize > 0 && tsize > 0)
1240 ++sge; 1246 ++sge;
@@ -1277,8 +1283,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1277 ioctx->n_rbuf = 0; 1283 ioctx->n_rbuf = 0;
1278 ioctx->rbufs = NULL; 1284 ioctx->rbufs = NULL;
1279 ioctx->n_rdma = 0; 1285 ioctx->n_rdma = 0;
1280 ioctx->n_rdma_ius = 0; 1286 ioctx->n_rdma_wrs = 0;
1281 ioctx->rdma_ius = NULL; 1287 ioctx->rdma_wrs = NULL;
1282 ioctx->mapped_sg_count = 0; 1288 ioctx->mapped_sg_count = 0;
1283 init_completion(&ioctx->tx_done); 1289 init_completion(&ioctx->tx_done);
1284 ioctx->queue_status_only = false; 1290 ioctx->queue_status_only = false;
@@ -1380,118 +1386,44 @@ out:
1380} 1386}
1381 1387
1382/** 1388/**
1383 * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
1384 */
1385static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
1386{
1387 struct srpt_send_ioctx *ioctx;
1388 enum srpt_command_state state;
1389 u32 index;
1390
1391 atomic_inc(&ch->sq_wr_avail);
1392
1393 index = idx_from_wr_id(wr_id);
1394 ioctx = ch->ioctx_ring[index];
1395 state = srpt_get_cmd_state(ioctx);
1396
1397 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1398 && state != SRPT_STATE_MGMT_RSP_SENT
1399 && state != SRPT_STATE_NEED_DATA
1400 && state != SRPT_STATE_DONE);
1401
1402 /* If SRP_RSP sending failed, undo the ch->req_lim change. */
1403 if (state == SRPT_STATE_CMD_RSP_SENT
1404 || state == SRPT_STATE_MGMT_RSP_SENT)
1405 atomic_dec(&ch->req_lim);
1406
1407 srpt_abort_cmd(ioctx);
1408}
1409
1410/**
1411 * srpt_handle_send_comp() - Process an IB send completion notification.
1412 */
1413static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1414 struct srpt_send_ioctx *ioctx)
1415{
1416 enum srpt_command_state state;
1417
1418 atomic_inc(&ch->sq_wr_avail);
1419
1420 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1421
1422 if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1423 && state != SRPT_STATE_MGMT_RSP_SENT
1424 && state != SRPT_STATE_DONE))
1425 pr_debug("state = %d\n", state);
1426
1427 if (state != SRPT_STATE_DONE) {
1428 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1429 transport_generic_free_cmd(&ioctx->cmd, 0);
1430 } else {
1431 pr_err("IB completion has been received too late for"
1432 " wr_id = %u.\n", ioctx->ioctx.index);
1433 }
1434}
1435
1436/**
1437 * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
1438 *
1439 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping 1389 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
1440 * the data that has been transferred via IB RDMA had to be postponed until the 1390 * the data that has been transferred via IB RDMA had to be postponed until the
1441 * check_stop_free() callback. None of this is necessary anymore and needs to 1391 * check_stop_free() callback. None of this is necessary anymore and needs to
1442 * be cleaned up. 1392 * be cleaned up.
1443 */ 1393 */
1444static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, 1394static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
1445 struct srpt_send_ioctx *ioctx,
1446 enum srpt_opcode opcode)
1447{ 1395{
1396 struct srpt_rdma_ch *ch = cq->cq_context;
1397 struct srpt_send_ioctx *ioctx =
1398 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
1399
1448 WARN_ON(ioctx->n_rdma <= 0); 1400 WARN_ON(ioctx->n_rdma <= 0);
1449 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); 1401 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1450 1402
1451 if (opcode == SRPT_RDMA_READ_LAST) { 1403 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1452 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, 1404 pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
1453 SRPT_STATE_DATA_IN)) 1405 ioctx, wc->status);
1454 target_execute_cmd(&ioctx->cmd); 1406 srpt_abort_cmd(ioctx);
1455 else 1407 return;
1456 pr_err("%s[%d]: wrong state = %d\n", __func__,
1457 __LINE__, srpt_get_cmd_state(ioctx));
1458 } else if (opcode == SRPT_RDMA_ABORT) {
1459 ioctx->rdma_aborted = true;
1460 } else {
1461 WARN(true, "unexpected opcode %d\n", opcode);
1462 } 1408 }
1409
1410 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1411 SRPT_STATE_DATA_IN))
1412 target_execute_cmd(&ioctx->cmd);
1413 else
1414 pr_err("%s[%d]: wrong state = %d\n", __func__,
1415 __LINE__, srpt_get_cmd_state(ioctx));
1463} 1416}
1464 1417
1465/** 1418static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
1466 * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
1467 */
1468static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
1469 struct srpt_send_ioctx *ioctx,
1470 enum srpt_opcode opcode)
1471{ 1419{
1472 enum srpt_command_state state; 1420 struct srpt_send_ioctx *ioctx =
1421 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
1473 1422
1474 state = srpt_get_cmd_state(ioctx); 1423 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1475 switch (opcode) { 1424 pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
1476 case SRPT_RDMA_READ_LAST: 1425 ioctx, wc->status);
1477 if (ioctx->n_rdma <= 0) { 1426 srpt_abort_cmd(ioctx);
1478 pr_err("Received invalid RDMA read"
1479 " error completion with idx %d\n",
1480 ioctx->ioctx.index);
1481 break;
1482 }
1483 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1484 if (state == SRPT_STATE_NEED_DATA)
1485 srpt_abort_cmd(ioctx);
1486 else
1487 pr_err("%s[%d]: wrong state = %d\n",
1488 __func__, __LINE__, state);
1489 break;
1490 case SRPT_RDMA_WRITE_LAST:
1491 break;
1492 default:
1493 pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
1494 break;
1495 } 1427 }
1496} 1428}
1497 1429
@@ -1926,32 +1858,26 @@ out:
1926 return; 1858 return;
1927} 1859}
1928 1860
1929static void srpt_process_rcv_completion(struct ib_cq *cq, 1861static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1930 struct srpt_rdma_ch *ch,
1931 struct ib_wc *wc)
1932{ 1862{
1933 struct srpt_device *sdev = ch->sport->sdev; 1863 struct srpt_rdma_ch *ch = cq->cq_context;
1934 struct srpt_recv_ioctx *ioctx; 1864 struct srpt_recv_ioctx *ioctx =
1935 u32 index; 1865 container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
1936 1866
1937 index = idx_from_wr_id(wc->wr_id);
1938 if (wc->status == IB_WC_SUCCESS) { 1867 if (wc->status == IB_WC_SUCCESS) {
1939 int req_lim; 1868 int req_lim;
1940 1869
1941 req_lim = atomic_dec_return(&ch->req_lim); 1870 req_lim = atomic_dec_return(&ch->req_lim);
1942 if (unlikely(req_lim < 0)) 1871 if (unlikely(req_lim < 0))
1943 pr_err("req_lim = %d < 0\n", req_lim); 1872 pr_err("req_lim = %d < 0\n", req_lim);
1944 ioctx = sdev->ioctx_ring[index];
1945 srpt_handle_new_iu(ch, ioctx, NULL); 1873 srpt_handle_new_iu(ch, ioctx, NULL);
1946 } else { 1874 } else {
1947 pr_info("receiving failed for idx %u with status %d\n", 1875 pr_info("receiving failed for ioctx %p with status %d\n",
1948 index, wc->status); 1876 ioctx, wc->status);
1949 } 1877 }
1950} 1878}
1951 1879
1952/** 1880/**
1953 * srpt_process_send_completion() - Process an IB send completion.
1954 *
1955 * Note: Although this has not yet been observed during tests, at least in 1881 * Note: Although this has not yet been observed during tests, at least in
1956 * theory it is possible that the srpt_get_send_ioctx() call invoked by 1882 * theory it is possible that the srpt_get_send_ioctx() call invoked by
1957 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta 1883 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,108 +1890,51 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
1964 * are queued on cmd_wait_list. The code below processes these delayed 1890 * are queued on cmd_wait_list. The code below processes these delayed
1965 * requests one at a time. 1891 * requests one at a time.
1966 */ 1892 */
1967static void srpt_process_send_completion(struct ib_cq *cq, 1893static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
1968 struct srpt_rdma_ch *ch,
1969 struct ib_wc *wc)
1970{ 1894{
1971 struct srpt_send_ioctx *send_ioctx; 1895 struct srpt_rdma_ch *ch = cq->cq_context;
1972 uint32_t index; 1896 struct srpt_send_ioctx *ioctx =
1973 enum srpt_opcode opcode; 1897 container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
1898 enum srpt_command_state state;
1974 1899
1975 index = idx_from_wr_id(wc->wr_id); 1900 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1976 opcode = opcode_from_wr_id(wc->wr_id); 1901
1977 send_ioctx = ch->ioctx_ring[index]; 1902 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
1978 if (wc->status == IB_WC_SUCCESS) { 1903 state != SRPT_STATE_MGMT_RSP_SENT);
1979 if (opcode == SRPT_SEND) 1904
1980 srpt_handle_send_comp(ch, send_ioctx); 1905 atomic_inc(&ch->sq_wr_avail);
1981 else { 1906
1982 WARN_ON(opcode != SRPT_RDMA_ABORT && 1907 if (wc->status != IB_WC_SUCCESS) {
1983 wc->opcode != IB_WC_RDMA_READ); 1908 pr_info("sending response for ioctx 0x%p failed"
1984 srpt_handle_rdma_comp(ch, send_ioctx, opcode); 1909 " with status %d\n", ioctx, wc->status);
1985 } 1910
1911 atomic_dec(&ch->req_lim);
1912 srpt_abort_cmd(ioctx);
1913 goto out;
1914 }
1915
1916 if (state != SRPT_STATE_DONE) {
1917 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1918 transport_generic_free_cmd(&ioctx->cmd, 0);
1986 } else { 1919 } else {
1987 if (opcode == SRPT_SEND) { 1920 pr_err("IB completion has been received too late for"
1988 pr_info("sending response for idx %u failed" 1921 " wr_id = %u.\n", ioctx->ioctx.index);
1989 " with status %d\n", index, wc->status);
1990 srpt_handle_send_err_comp(ch, wc->wr_id);
1991 } else if (opcode != SRPT_RDMA_MID) {
1992 pr_info("RDMA t %d for idx %u failed with"
1993 " status %d\n", opcode, index, wc->status);
1994 srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
1995 }
1996 } 1922 }
1997 1923
1998 while (unlikely(opcode == SRPT_SEND 1924out:
1999 && !list_empty(&ch->cmd_wait_list) 1925 while (!list_empty(&ch->cmd_wait_list) &&
2000 && srpt_get_ch_state(ch) == CH_LIVE 1926 srpt_get_ch_state(ch) == CH_LIVE &&
2001 && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { 1927 (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
2002 struct srpt_recv_ioctx *recv_ioctx; 1928 struct srpt_recv_ioctx *recv_ioctx;
2003 1929
2004 recv_ioctx = list_first_entry(&ch->cmd_wait_list, 1930 recv_ioctx = list_first_entry(&ch->cmd_wait_list,
2005 struct srpt_recv_ioctx, 1931 struct srpt_recv_ioctx,
2006 wait_list); 1932 wait_list);
2007 list_del(&recv_ioctx->wait_list); 1933 list_del(&recv_ioctx->wait_list);
2008 srpt_handle_new_iu(ch, recv_ioctx, send_ioctx); 1934 srpt_handle_new_iu(ch, recv_ioctx, ioctx);
2009 } 1935 }
2010} 1936}
2011 1937
2012static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
2013{
2014 struct ib_wc *const wc = ch->wc;
2015 int i, n;
2016
2017 WARN_ON(cq != ch->cq);
2018
2019 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2020 while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
2021 for (i = 0; i < n; i++) {
2022 if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
2023 srpt_process_rcv_completion(cq, ch, &wc[i]);
2024 else
2025 srpt_process_send_completion(cq, ch, &wc[i]);
2026 }
2027 }
2028}
2029
2030/**
2031 * srpt_completion() - IB completion queue callback function.
2032 *
2033 * Notes:
2034 * - It is guaranteed that a completion handler will never be invoked
2035 * concurrently on two different CPUs for the same completion queue. See also
2036 * Documentation/infiniband/core_locking.txt and the implementation of
2037 * handle_edge_irq() in kernel/irq/chip.c.
2038 * - When threaded IRQs are enabled, completion handlers are invoked in thread
2039 * context instead of interrupt context.
2040 */
2041static void srpt_completion(struct ib_cq *cq, void *ctx)
2042{
2043 struct srpt_rdma_ch *ch = ctx;
2044
2045 wake_up_interruptible(&ch->wait_queue);
2046}
2047
2048static int srpt_compl_thread(void *arg)
2049{
2050 struct srpt_rdma_ch *ch;
2051
2052 /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2053 current->flags |= PF_NOFREEZE;
2054
2055 ch = arg;
2056 BUG_ON(!ch);
2057 pr_info("Session %s: kernel thread %s (PID %d) started\n",
2058 ch->sess_name, ch->thread->comm, current->pid);
2059 while (!kthread_should_stop()) {
2060 wait_event_interruptible(ch->wait_queue,
2061 (srpt_process_completion(ch->cq, ch),
2062 kthread_should_stop()));
2063 }
2064 pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
2065 ch->sess_name, ch->thread->comm, current->pid);
2066 return 0;
2067}
2068
2069/** 1938/**
2070 * srpt_create_ch_ib() - Create receive and send completion queues. 1939 * srpt_create_ch_ib() - Create receive and send completion queues.
2071 */ 1940 */
@@ -2075,7 +1944,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2075 struct srpt_port *sport = ch->sport; 1944 struct srpt_port *sport = ch->sport;
2076 struct srpt_device *sdev = sport->sdev; 1945 struct srpt_device *sdev = sport->sdev;
2077 u32 srp_sq_size = sport->port_attrib.srp_sq_size; 1946 u32 srp_sq_size = sport->port_attrib.srp_sq_size;
2078 struct ib_cq_init_attr cq_attr = {};
2079 int ret; 1947 int ret;
2080 1948
2081 WARN_ON(ch->rq_size < 1); 1949 WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1954,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2086 goto out; 1954 goto out;
2087 1955
2088retry: 1956retry:
2089 cq_attr.cqe = ch->rq_size + srp_sq_size; 1957 ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
2090 ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, 1958 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
2091 &cq_attr);
2092 if (IS_ERR(ch->cq)) { 1959 if (IS_ERR(ch->cq)) {
2093 ret = PTR_ERR(ch->cq); 1960 ret = PTR_ERR(ch->cq);
2094 pr_err("failed to create CQ cqe= %d ret= %d\n", 1961 pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1998,6 @@ retry:
2131 if (ret) 1998 if (ret)
2132 goto err_destroy_qp; 1999 goto err_destroy_qp;
2133 2000
2134 init_waitqueue_head(&ch->wait_queue);
2135
2136 pr_debug("creating thread for session %s\n", ch->sess_name);
2137
2138 ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
2139 if (IS_ERR(ch->thread)) {
2140 pr_err("failed to create kernel thread %ld\n",
2141 PTR_ERR(ch->thread));
2142 ch->thread = NULL;
2143 goto err_destroy_qp;
2144 }
2145
2146out: 2001out:
2147 kfree(qp_init); 2002 kfree(qp_init);
2148 return ret; 2003 return ret;
@@ -2150,17 +2005,14 @@ out:
2150err_destroy_qp: 2005err_destroy_qp:
2151 ib_destroy_qp(ch->qp); 2006 ib_destroy_qp(ch->qp);
2152err_destroy_cq: 2007err_destroy_cq:
2153 ib_destroy_cq(ch->cq); 2008 ib_free_cq(ch->cq);
2154 goto out; 2009 goto out;
2155} 2010}
2156 2011
2157static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) 2012static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
2158{ 2013{
2159 if (ch->thread)
2160 kthread_stop(ch->thread);
2161
2162 ib_destroy_qp(ch->qp); 2014 ib_destroy_qp(ch->qp);
2163 ib_destroy_cq(ch->cq); 2015 ib_free_cq(ch->cq);
2164} 2016}
2165 2017
2166/** 2018/**
@@ -2821,12 +2673,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2821static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, 2673static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2822 struct srpt_send_ioctx *ioctx) 2674 struct srpt_send_ioctx *ioctx)
2823{ 2675{
2824 struct ib_rdma_wr wr;
2825 struct ib_send_wr *bad_wr; 2676 struct ib_send_wr *bad_wr;
2826 struct rdma_iu *riu; 2677 int sq_wr_avail, ret, i;
2827 int i;
2828 int ret;
2829 int sq_wr_avail;
2830 enum dma_data_direction dir; 2678 enum dma_data_direction dir;
2831 const int n_rdma = ioctx->n_rdma; 2679 const int n_rdma = ioctx->n_rdma;
2832 2680
@@ -2842,59 +2690,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2842 } 2690 }
2843 } 2691 }
2844 2692
2845 ioctx->rdma_aborted = false; 2693 for (i = 0; i < n_rdma; i++) {
2846 ret = 0; 2694 struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
2847 riu = ioctx->rdma_ius;
2848 memset(&wr, 0, sizeof wr);
2849
2850 for (i = 0; i < n_rdma; ++i, ++riu) {
2851 if (dir == DMA_FROM_DEVICE) {
2852 wr.wr.opcode = IB_WR_RDMA_WRITE;
2853 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2854 SRPT_RDMA_WRITE_LAST :
2855 SRPT_RDMA_MID,
2856 ioctx->ioctx.index);
2857 } else {
2858 wr.wr.opcode = IB_WR_RDMA_READ;
2859 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2860 SRPT_RDMA_READ_LAST :
2861 SRPT_RDMA_MID,
2862 ioctx->ioctx.index);
2863 }
2864 wr.wr.next = NULL;
2865 wr.remote_addr = riu->raddr;
2866 wr.rkey = riu->rkey;
2867 wr.wr.num_sge = riu->sge_cnt;
2868 wr.wr.sg_list = riu->sge;
2869 2695
2870 /* only get completion event for the last rdma write */ 2696 wr->opcode = (dir == DMA_FROM_DEVICE) ?
2871 if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) 2697 IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2872 wr.wr.send_flags = IB_SEND_SIGNALED;
2873 2698
2874 ret = ib_post_send(ch->qp, &wr.wr, &bad_wr); 2699 if (i == n_rdma - 1) {
2875 if (ret) 2700 /* only get completion event for the last rdma read */
2876 break; 2701 if (dir == DMA_TO_DEVICE) {
2702 wr->send_flags = IB_SEND_SIGNALED;
2703 ioctx->rdma_cqe.done = srpt_rdma_read_done;
2704 } else {
2705 ioctx->rdma_cqe.done = srpt_rdma_write_done;
2706 }
2707 wr->wr_cqe = &ioctx->rdma_cqe;
2708 wr->next = NULL;
2709 } else {
2710 wr->wr_cqe = NULL;
2711 wr->next = &ioctx->rdma_wrs[i + 1].wr;
2712 }
2877 } 2713 }
2878 2714
2715 ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
2879 if (ret) 2716 if (ret)
2880 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", 2717 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
2881 __func__, __LINE__, ret, i, n_rdma); 2718 __func__, __LINE__, ret, i, n_rdma);
2882 if (ret && i > 0) {
2883 wr.wr.num_sge = 0;
2884 wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
2885 wr.wr.send_flags = IB_SEND_SIGNALED;
2886 while (ch->state == CH_LIVE &&
2887 ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
2888 pr_info("Trying to abort failed RDMA transfer [%d]\n",
2889 ioctx->ioctx.index);
2890 msleep(1000);
2891 }
2892 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
2893 pr_info("Waiting until RDMA abort finished [%d]\n",
2894 ioctx->ioctx.index);
2895 msleep(1000);
2896 }
2897 }
2898out: 2719out:
2899 if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) 2720 if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
2900 atomic_add(n_rdma, &ch->sq_wr_avail); 2721 atomic_add(n_rdma, &ch->sq_wr_avail);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 5faad8acd789..a98b86b73ed6 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -128,36 +128,6 @@ enum {
128 DEFAULT_MAX_RDMA_SIZE = 65536, 128 DEFAULT_MAX_RDMA_SIZE = 65536,
129}; 129};
130 130
131enum srpt_opcode {
132 SRPT_RECV,
133 SRPT_SEND,
134 SRPT_RDMA_MID,
135 SRPT_RDMA_ABORT,
136 SRPT_RDMA_READ_LAST,
137 SRPT_RDMA_WRITE_LAST,
138};
139
140static inline u64 encode_wr_id(u8 opcode, u32 idx)
141{
142 return ((u64)opcode << 32) | idx;
143}
144static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
145{
146 return wr_id >> 32;
147}
148static inline u32 idx_from_wr_id(u64 wr_id)
149{
150 return (u32)wr_id;
151}
152
153struct rdma_iu {
154 u64 raddr;
155 u32 rkey;
156 struct ib_sge *sge;
157 u32 sge_cnt;
158 int mem_id;
159};
160
161/** 131/**
162 * enum srpt_command_state - SCSI command state managed by SRPT. 132 * enum srpt_command_state - SCSI command state managed by SRPT.
163 * @SRPT_STATE_NEW: New command arrived and is being processed. 133 * @SRPT_STATE_NEW: New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
189 * @index: Index of the I/O context in its ioctx_ring array. 159 * @index: Index of the I/O context in its ioctx_ring array.
190 */ 160 */
191struct srpt_ioctx { 161struct srpt_ioctx {
162 struct ib_cqe cqe;
192 void *buf; 163 void *buf;
193 dma_addr_t dma; 164 dma_addr_t dma;
194 uint32_t index; 165 uint32_t index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
215 * @sg: Pointer to sg-list associated with this I/O context. 186 * @sg: Pointer to sg-list associated with this I/O context.
216 * @sg_cnt: SG-list size. 187 * @sg_cnt: SG-list size.
217 * @mapped_sg_count: ib_dma_map_sg() return value. 188 * @mapped_sg_count: ib_dma_map_sg() return value.
218 * @n_rdma_ius: Number of elements in the rdma_ius array. 189 * @n_rdma_wrs: Number of elements in the rdma_wrs array.
219 * @rdma_ius: Array with information about the RDMA mapping. 190 * @rdma_wrs: Array with information about the RDMA mapping.
220 * @tag: Tag of the received SRP information unit. 191 * @tag: Tag of the received SRP information unit.
221 * @spinlock: Protects 'state'. 192 * @spinlock: Protects 'state'.
222 * @state: I/O context state. 193 * @state: I/O context state.
223 * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
224 * the already initiated transfers have finished.
225 * @cmd: Target core command data structure. 194 * @cmd: Target core command data structure.
226 * @sense_data: SCSI sense data. 195 * @sense_data: SCSI sense data.
227 */ 196 */
228struct srpt_send_ioctx { 197struct srpt_send_ioctx {
229 struct srpt_ioctx ioctx; 198 struct srpt_ioctx ioctx;
230 struct srpt_rdma_ch *ch; 199 struct srpt_rdma_ch *ch;
231 struct rdma_iu *rdma_ius; 200 struct ib_rdma_wr *rdma_wrs;
201 struct ib_cqe rdma_cqe;
232 struct srp_direct_buf *rbufs; 202 struct srp_direct_buf *rbufs;
233 struct srp_direct_buf single_rbuf; 203 struct srp_direct_buf single_rbuf;
234 struct scatterlist *sg; 204 struct scatterlist *sg;
235 struct list_head free_list; 205 struct list_head free_list;
236 spinlock_t spinlock; 206 spinlock_t spinlock;
237 enum srpt_command_state state; 207 enum srpt_command_state state;
238 bool rdma_aborted;
239 struct se_cmd cmd; 208 struct se_cmd cmd;
240 struct completion tx_done; 209 struct completion tx_done;
241 int sg_cnt; 210 int sg_cnt;
242 int mapped_sg_count; 211 int mapped_sg_count;
243 u16 n_rdma_ius; 212 u16 n_rdma_wrs;
244 u8 n_rdma; 213 u8 n_rdma;
245 u8 n_rbuf; 214 u8 n_rbuf;
246 bool queue_status_only; 215 bool queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
267 236
268/** 237/**
269 * struct srpt_rdma_ch - RDMA channel. 238 * struct srpt_rdma_ch - RDMA channel.
270 * @wait_queue: Allows the kernel thread to wait for more work.
271 * @thread: Kernel thread that processes the IB queues associated with
272 * the channel.
273 * @cm_id: IB CM ID associated with the channel. 239 * @cm_id: IB CM ID associated with the channel.
274 * @qp: IB queue pair used for communicating over this channel. 240 * @qp: IB queue pair used for communicating over this channel.
275 * @cq: IB completion queue for this channel. 241 * @cq: IB completion queue for this channel.
@@ -299,8 +265,6 @@ enum rdma_ch_state {
299 * @release_done: Enables waiting for srpt_release_channel() completion. 265 * @release_done: Enables waiting for srpt_release_channel() completion.
300 */ 266 */
301struct srpt_rdma_ch { 267struct srpt_rdma_ch {
302 wait_queue_head_t wait_queue;
303 struct task_struct *thread;
304 struct ib_cm_id *cm_id; 268 struct ib_cm_id *cm_id;
305 struct ib_qp *qp; 269 struct ib_qp *qp;
306 struct ib_cq *cq; 270 struct ib_cq *cq;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 64eed87d34a8..9900e6191b27 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1104,6 +1104,7 @@ config SCSI_IPR
1104 tristate "IBM Power Linux RAID adapter support" 1104 tristate "IBM Power Linux RAID adapter support"
1105 depends on PCI && SCSI && ATA 1105 depends on PCI && SCSI && ATA
1106 select FW_LOADER 1106 select FW_LOADER
1107 select IRQ_POLL
1107 ---help--- 1108 ---help---
1108 This driver supports the IBM Power Linux family RAID adapters. 1109 This driver supports the IBM Power Linux family RAID adapters.
1109 This includes IBM pSeries 5712, 5703, 5709, and 570A, as well 1110 This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig
index 4e7cad272469..bad5f32e1f67 100644
--- a/drivers/scsi/be2iscsi/Kconfig
+++ b/drivers/scsi/be2iscsi/Kconfig
@@ -3,6 +3,7 @@ config BE2ISCSI
3 depends on PCI && SCSI && NET 3 depends on PCI && SCSI && NET
4 select SCSI_ISCSI_ATTRS 4 select SCSI_ISCSI_ATTRS
5 select ISCSI_BOOT_SYSFS 5 select ISCSI_BOOT_SYSFS
6 select IRQ_POLL
6 7
7 help 8 help
8 This driver implements the iSCSI functionality for Emulex 9 This driver implements the iSCSI functionality for Emulex
diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h
index 77f992e74726..a41c6432f444 100644
--- a/drivers/scsi/be2iscsi/be.h
+++ b/drivers/scsi/be2iscsi/be.h
@@ -20,7 +20,7 @@
20 20
21#include <linux/pci.h> 21#include <linux/pci.h>
22#include <linux/if_vlan.h> 22#include <linux/if_vlan.h>
23#include <linux/blk-iopoll.h> 23#include <linux/irq_poll.h>
24#define FW_VER_LEN 32 24#define FW_VER_LEN 32
25#define MCC_Q_LEN 128 25#define MCC_Q_LEN 128
26#define MCC_CQ_LEN 256 26#define MCC_CQ_LEN 256
@@ -101,7 +101,7 @@ struct be_eq_obj {
101 struct beiscsi_hba *phba; 101 struct beiscsi_hba *phba;
102 struct be_queue_info *cq; 102 struct be_queue_info *cq;
103 struct work_struct work_cqs; /* Work Item */ 103 struct work_struct work_cqs; /* Work Item */
104 struct blk_iopoll iopoll; 104 struct irq_poll iopoll;
105}; 105};
106 106
107struct be_mcc_obj { 107struct be_mcc_obj {
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index b7087ba69d8d..022e87b62e40 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba)
1292 1292
1293 for (i = 0; i < phba->num_cpus; i++) { 1293 for (i = 0; i < phba->num_cpus; i++) {
1294 pbe_eq = &phwi_context->be_eq[i]; 1294 pbe_eq = &phwi_context->be_eq[i];
1295 blk_iopoll_disable(&pbe_eq->iopoll); 1295 irq_poll_disable(&pbe_eq->iopoll);
1296 beiscsi_process_cq(pbe_eq); 1296 beiscsi_process_cq(pbe_eq);
1297 blk_iopoll_enable(&pbe_eq->iopoll); 1297 irq_poll_enable(&pbe_eq->iopoll);
1298 } 1298 }
1299} 1299}
1300 1300
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index fe0c5143f8e6..cb9072a841be 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
910 num_eq_processed = 0; 910 num_eq_processed = 0;
911 while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] 911 while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
912 & EQE_VALID_MASK) { 912 & EQE_VALID_MASK) {
913 if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) 913 irq_poll_sched(&pbe_eq->iopoll);
914 blk_iopoll_sched(&pbe_eq->iopoll);
915 914
916 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); 915 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
917 queue_tail_inc(eq); 916 queue_tail_inc(eq);
@@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
972 spin_unlock_irqrestore(&phba->isr_lock, flags); 971 spin_unlock_irqrestore(&phba->isr_lock, flags);
973 num_mcceq_processed++; 972 num_mcceq_processed++;
974 } else { 973 } else {
975 if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) 974 irq_poll_sched(&pbe_eq->iopoll);
976 blk_iopoll_sched(&pbe_eq->iopoll);
977 num_ioeq_processed++; 975 num_ioeq_processed++;
978 } 976 }
979 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); 977 AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
@@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work)
2295 hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1); 2293 hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
2296} 2294}
2297 2295
2298static int be_iopoll(struct blk_iopoll *iop, int budget) 2296static int be_iopoll(struct irq_poll *iop, int budget)
2299{ 2297{
2300 unsigned int ret; 2298 unsigned int ret;
2301 struct beiscsi_hba *phba; 2299 struct beiscsi_hba *phba;
@@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget)
2306 pbe_eq->cq_count += ret; 2304 pbe_eq->cq_count += ret;
2307 if (ret < budget) { 2305 if (ret < budget) {
2308 phba = pbe_eq->phba; 2306 phba = pbe_eq->phba;
2309 blk_iopoll_complete(iop); 2307 irq_poll_complete(iop);
2310 beiscsi_log(phba, KERN_INFO, 2308 beiscsi_log(phba, KERN_INFO,
2311 BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO, 2309 BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
2312 "BM_%d : rearm pbe_eq->q.id =%d\n", 2310 "BM_%d : rearm pbe_eq->q.id =%d\n",
@@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
5293 5291
5294 for (i = 0; i < phba->num_cpus; i++) { 5292 for (i = 0; i < phba->num_cpus; i++) {
5295 pbe_eq = &phwi_context->be_eq[i]; 5293 pbe_eq = &phwi_context->be_eq[i];
5296 blk_iopoll_disable(&pbe_eq->iopoll); 5294 irq_poll_disable(&pbe_eq->iopoll);
5297 } 5295 }
5298 5296
5299 if (unload_state == BEISCSI_CLEAN_UNLOAD) { 5297 if (unload_state == BEISCSI_CLEAN_UNLOAD) {
@@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
5579 5577
5580 for (i = 0; i < phba->num_cpus; i++) { 5578 for (i = 0; i < phba->num_cpus; i++) {
5581 pbe_eq = &phwi_context->be_eq[i]; 5579 pbe_eq = &phwi_context->be_eq[i];
5582 blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, 5580 irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
5583 be_iopoll); 5581 be_iopoll);
5584 blk_iopoll_enable(&pbe_eq->iopoll);
5585 } 5582 }
5586 5583
5587 i = (phba->msix_enabled) ? i : 0; 5584 i = (phba->msix_enabled) ? i : 0;
@@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
5752 5749
5753 for (i = 0; i < phba->num_cpus; i++) { 5750 for (i = 0; i < phba->num_cpus; i++) {
5754 pbe_eq = &phwi_context->be_eq[i]; 5751 pbe_eq = &phwi_context->be_eq[i];
5755 blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, 5752 irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
5756 be_iopoll); 5753 be_iopoll);
5757 blk_iopoll_enable(&pbe_eq->iopoll);
5758 } 5754 }
5759 5755
5760 i = (phba->msix_enabled) ? i : 0; 5756 i = (phba->msix_enabled) ? i : 0;
@@ -5795,7 +5791,7 @@ free_blkenbld:
5795 destroy_workqueue(phba->wq); 5791 destroy_workqueue(phba->wq);
5796 for (i = 0; i < phba->num_cpus; i++) { 5792 for (i = 0; i < phba->num_cpus; i++) {
5797 pbe_eq = &phwi_context->be_eq[i]; 5793 pbe_eq = &phwi_context->be_eq[i];
5798 blk_iopoll_disable(&pbe_eq->iopoll); 5794 irq_poll_disable(&pbe_eq->iopoll);
5799 } 5795 }
5800free_twq: 5796free_twq:
5801 beiscsi_clean_port(phba); 5797 beiscsi_clean_port(phba);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 536cd5a80422..82031e00b2e9 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = {
3638 .store = ipr_store_reset_adapter 3638 .store = ipr_store_reset_adapter
3639}; 3639};
3640 3640
3641static int ipr_iopoll(struct blk_iopoll *iop, int budget); 3641static int ipr_iopoll(struct irq_poll *iop, int budget);
3642 /** 3642 /**
3643 * ipr_show_iopoll_weight - Show ipr polling mode 3643 * ipr_show_iopoll_weight - Show ipr polling mode
3644 * @dev: class device struct 3644 * @dev: class device struct
@@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
3681 int i; 3681 int i;
3682 3682
3683 if (!ioa_cfg->sis64) { 3683 if (!ioa_cfg->sis64) {
3684 dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); 3684 dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
3685 return -EINVAL; 3685 return -EINVAL;
3686 } 3686 }
3687 if (kstrtoul(buf, 10, &user_iopoll_weight)) 3687 if (kstrtoul(buf, 10, &user_iopoll_weight))
3688 return -EINVAL; 3688 return -EINVAL;
3689 3689
3690 if (user_iopoll_weight > 256) { 3690 if (user_iopoll_weight > 256) {
3691 dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); 3691 dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
3692 return -EINVAL; 3692 return -EINVAL;
3693 } 3693 }
3694 3694
3695 if (user_iopoll_weight == ioa_cfg->iopoll_weight) { 3695 if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
3696 dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); 3696 dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
3697 return strlen(buf); 3697 return strlen(buf);
3698 } 3698 }
3699 3699
3700 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 3700 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
3701 for (i = 1; i < ioa_cfg->hrrq_num; i++) 3701 for (i = 1; i < ioa_cfg->hrrq_num; i++)
3702 blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); 3702 irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
3703 } 3703 }
3704 3704
3705 spin_lock_irqsave(shost->host_lock, lock_flags); 3705 spin_lock_irqsave(shost->host_lock, lock_flags);
3706 ioa_cfg->iopoll_weight = user_iopoll_weight; 3706 ioa_cfg->iopoll_weight = user_iopoll_weight;
3707 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 3707 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
3708 for (i = 1; i < ioa_cfg->hrrq_num; i++) { 3708 for (i = 1; i < ioa_cfg->hrrq_num; i++) {
3709 blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, 3709 irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
3710 ioa_cfg->iopoll_weight, ipr_iopoll); 3710 ioa_cfg->iopoll_weight, ipr_iopoll);
3711 blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
3712 } 3711 }
3713 } 3712 }
3714 spin_unlock_irqrestore(shost->host_lock, lock_flags); 3713 spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -5569,7 +5568,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
5569 return num_hrrq; 5568 return num_hrrq;
5570} 5569}
5571 5570
5572static int ipr_iopoll(struct blk_iopoll *iop, int budget) 5571static int ipr_iopoll(struct irq_poll *iop, int budget)
5573{ 5572{
5574 struct ipr_ioa_cfg *ioa_cfg; 5573 struct ipr_ioa_cfg *ioa_cfg;
5575 struct ipr_hrr_queue *hrrq; 5574 struct ipr_hrr_queue *hrrq;
@@ -5585,7 +5584,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget)
5585 completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); 5584 completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
5586 5585
5587 if (completed_ops < budget) 5586 if (completed_ops < budget)
5588 blk_iopoll_complete(iop); 5587 irq_poll_complete(iop);
5589 spin_unlock_irqrestore(hrrq->lock, hrrq_flags); 5588 spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
5590 5589
5591 list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { 5590 list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
@@ -5693,8 +5692,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
5693 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 5692 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
5694 if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == 5693 if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
5695 hrrq->toggle_bit) { 5694 hrrq->toggle_bit) {
5696 if (!blk_iopoll_sched_prep(&hrrq->iopoll)) 5695 irq_poll_sched(&hrrq->iopoll);
5697 blk_iopoll_sched(&hrrq->iopoll);
5698 spin_unlock_irqrestore(hrrq->lock, hrrq_flags); 5696 spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
5699 return IRQ_HANDLED; 5697 return IRQ_HANDLED;
5700 } 5698 }
@@ -10405,9 +10403,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
10405 10403
10406 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 10404 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
10407 for (i = 1; i < ioa_cfg->hrrq_num; i++) { 10405 for (i = 1; i < ioa_cfg->hrrq_num; i++) {
10408 blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, 10406 irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
10409 ioa_cfg->iopoll_weight, ipr_iopoll); 10407 ioa_cfg->iopoll_weight, ipr_iopoll);
10410 blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
10411 } 10408 }
10412 } 10409 }
10413 10410
@@ -10436,7 +10433,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
10436 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { 10433 if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
10437 ioa_cfg->iopoll_weight = 0; 10434 ioa_cfg->iopoll_weight = 0;
10438 for (i = 1; i < ioa_cfg->hrrq_num; i++) 10435 for (i = 1; i < ioa_cfg->hrrq_num; i++)
10439 blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); 10436 irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
10440 } 10437 }
10441 10438
10442 while (ioa_cfg->in_reset_reload) { 10439 while (ioa_cfg->in_reset_reload) {
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index a34c7a5a995e..56c57068300a 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -32,7 +32,7 @@
32#include <linux/libata.h> 32#include <linux/libata.h>
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/kref.h> 34#include <linux/kref.h>
35#include <linux/blk-iopoll.h> 35#include <linux/irq_poll.h>
36#include <scsi/scsi.h> 36#include <scsi/scsi.h>
37#include <scsi/scsi_cmnd.h> 37#include <scsi/scsi_cmnd.h>
38 38
@@ -517,7 +517,7 @@ struct ipr_hrr_queue {
517 u8 allow_cmds:1; 517 u8 allow_cmds:1;
518 u8 removing_ioa:1; 518 u8 removing_ioa:1;
519 519
520 struct blk_iopoll iopoll; 520 struct irq_poll iopoll;
521}; 521};
522 522
523/* Command packet structure */ 523/* Command packet structure */
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
deleted file mode 100644
index 77ae77c0b704..000000000000
--- a/include/linux/blk-iopoll.h
+++ /dev/null
@@ -1,46 +0,0 @@
1#ifndef BLK_IOPOLL_H
2#define BLK_IOPOLL_H
3
4struct blk_iopoll;
5typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
6
7struct blk_iopoll {
8 struct list_head list;
9 unsigned long state;
10 unsigned long data;
11 int weight;
12 int max;
13 blk_iopoll_fn *poll;
14};
15
16enum {
17 IOPOLL_F_SCHED = 0,
18 IOPOLL_F_DISABLE = 1,
19};
20
21/*
22 * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
23 * that we were the first to acquire this iop for scheduling. If this iop
24 * is currently disabled, return "failure".
25 */
26static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
27{
28 if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
29 return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
30
31 return 1;
32}
33
34static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
35{
36 return test_bit(IOPOLL_F_DISABLE, &iop->state);
37}
38
39extern void blk_iopoll_sched(struct blk_iopoll *);
40extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
41extern void blk_iopoll_complete(struct blk_iopoll *);
42extern void __blk_iopoll_complete(struct blk_iopoll *);
43extern void blk_iopoll_enable(struct blk_iopoll *);
44extern void blk_iopoll_disable(struct blk_iopoll *);
45
46#endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ad16809c8596..7ff98c23199a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -412,7 +412,7 @@ enum
412 NET_TX_SOFTIRQ, 412 NET_TX_SOFTIRQ,
413 NET_RX_SOFTIRQ, 413 NET_RX_SOFTIRQ,
414 BLOCK_SOFTIRQ, 414 BLOCK_SOFTIRQ,
415 BLOCK_IOPOLL_SOFTIRQ, 415 IRQ_POLL_SOFTIRQ,
416 TASKLET_SOFTIRQ, 416 TASKLET_SOFTIRQ,
417 SCHED_SOFTIRQ, 417 SCHED_SOFTIRQ,
418 HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the 418 HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h
new file mode 100644
index 000000000000..3e8c1b8fb9be
--- /dev/null
+++ b/include/linux/irq_poll.h
@@ -0,0 +1,25 @@
1#ifndef IRQ_POLL_H
2#define IRQ_POLL_H
3
4struct irq_poll;
5typedef int (irq_poll_fn)(struct irq_poll *, int);
6
7struct irq_poll {
8 struct list_head list;
9 unsigned long state;
10 int weight;
11 irq_poll_fn *poll;
12};
13
14enum {
15 IRQ_POLL_F_SCHED = 0,
16 IRQ_POLL_F_DISABLE = 1,
17};
18
19extern void irq_poll_sched(struct irq_poll *);
20extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *);
21extern void irq_poll_complete(struct irq_poll *);
22extern void irq_poll_enable(struct irq_poll *);
23extern void irq_poll_disable(struct irq_poll *);
24
25#endif
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 120da1d7f57e..89ef8d1f2b9d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -49,6 +49,7 @@
49#include <linux/scatterlist.h> 49#include <linux/scatterlist.h>
50#include <linux/workqueue.h> 50#include <linux/workqueue.h>
51#include <linux/socket.h> 51#include <linux/socket.h>
52#include <linux/irq_poll.h>
52#include <uapi/linux/if_ether.h> 53#include <uapi/linux/if_ether.h>
53 54
54#include <linux/atomic.h> 55#include <linux/atomic.h>
@@ -56,6 +57,7 @@
56#include <asm/uaccess.h> 57#include <asm/uaccess.h>
57 58
58extern struct workqueue_struct *ib_wq; 59extern struct workqueue_struct *ib_wq;
60extern struct workqueue_struct *ib_comp_wq;
59 61
60union ib_gid { 62union ib_gid {
61 u8 raw[16]; 63 u8 raw[16];
@@ -758,7 +760,10 @@ enum ib_wc_flags {
758}; 760};
759 761
760struct ib_wc { 762struct ib_wc {
761 u64 wr_id; 763 union {
764 u64 wr_id;
765 struct ib_cqe *wr_cqe;
766 };
762 enum ib_wc_status status; 767 enum ib_wc_status status;
763 enum ib_wc_opcode opcode; 768 enum ib_wc_opcode opcode;
764 u32 vendor_err; 769 u32 vendor_err;
@@ -1079,9 +1084,16 @@ struct ib_mw_bind_info {
1079 int mw_access_flags; 1084 int mw_access_flags;
1080}; 1085};
1081 1086
1087struct ib_cqe {
1088 void (*done)(struct ib_cq *cq, struct ib_wc *wc);
1089};
1090
1082struct ib_send_wr { 1091struct ib_send_wr {
1083 struct ib_send_wr *next; 1092 struct ib_send_wr *next;
1084 u64 wr_id; 1093 union {
1094 u64 wr_id;
1095 struct ib_cqe *wr_cqe;
1096 };
1085 struct ib_sge *sg_list; 1097 struct ib_sge *sg_list;
1086 int num_sge; 1098 int num_sge;
1087 enum ib_wr_opcode opcode; 1099 enum ib_wr_opcode opcode;
@@ -1175,7 +1187,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
1175 1187
1176struct ib_recv_wr { 1188struct ib_recv_wr {
1177 struct ib_recv_wr *next; 1189 struct ib_recv_wr *next;
1178 u64 wr_id; 1190 union {
1191 u64 wr_id;
1192 struct ib_cqe *wr_cqe;
1193 };
1179 struct ib_sge *sg_list; 1194 struct ib_sge *sg_list;
1180 int num_sge; 1195 int num_sge;
1181}; 1196};
@@ -1307,6 +1322,12 @@ struct ib_ah {
1307 1322
1308typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); 1323typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
1309 1324
1325enum ib_poll_context {
1326 IB_POLL_DIRECT, /* caller context, no hw completions */
1327 IB_POLL_SOFTIRQ, /* poll from softirq context */
1328 IB_POLL_WORKQUEUE, /* poll from workqueue */
1329};
1330
1310struct ib_cq { 1331struct ib_cq {
1311 struct ib_device *device; 1332 struct ib_device *device;
1312 struct ib_uobject *uobject; 1333 struct ib_uobject *uobject;
@@ -1315,6 +1336,12 @@ struct ib_cq {
1315 void *cq_context; 1336 void *cq_context;
1316 int cqe; 1337 int cqe;
1317 atomic_t usecnt; /* count number of work queues */ 1338 atomic_t usecnt; /* count number of work queues */
1339 enum ib_poll_context poll_ctx;
1340 struct ib_wc *wc;
1341 union {
1342 struct irq_poll iop;
1343 struct work_struct work;
1344 };
1318}; 1345};
1319 1346
1320struct ib_srq { 1347struct ib_srq {
@@ -2454,6 +2481,11 @@ static inline int ib_post_recv(struct ib_qp *qp,
2454 return qp->device->post_recv(qp, recv_wr, bad_recv_wr); 2481 return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
2455} 2482}
2456 2483
2484struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
2485 int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
2486void ib_free_cq(struct ib_cq *cq);
2487int ib_process_cq_direct(struct ib_cq *cq, int budget);
2488
2457/** 2489/**
2458 * ib_create_cq - Creates a CQ on the specified device. 2490 * ib_create_cq - Creates a CQ on the specified device.
2459 * @device: The device on which to create the CQ. 2491 * @device: The device on which to create the CQ.
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index ff8f6c091a15..f95f25e786ef 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -15,7 +15,7 @@ struct softirq_action;
15 softirq_name(NET_TX) \ 15 softirq_name(NET_TX) \
16 softirq_name(NET_RX) \ 16 softirq_name(NET_RX) \
17 softirq_name(BLOCK) \ 17 softirq_name(BLOCK) \
18 softirq_name(BLOCK_IOPOLL) \ 18 softirq_name(IRQ_POLL) \
19 softirq_name(TASKLET) \ 19 softirq_name(TASKLET) \
20 softirq_name(SCHED) \ 20 softirq_name(SCHED) \
21 softirq_name(HRTIMER) \ 21 softirq_name(HRTIMER) \
diff --git a/lib/Kconfig b/lib/Kconfig
index f0df318104e7..e00e1960260a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -475,6 +475,11 @@ config DDR
475 information. This data is useful for drivers handling 475 information. This data is useful for drivers handling
476 DDR SDRAM controllers. 476 DDR SDRAM controllers.
477 477
478config IRQ_POLL
479 bool "IRQ polling library"
480 help
481 Helper library to poll interrupt mitigation using polling.
482
478config MPILIB 483config MPILIB
479 tristate 484 tristate
480 select CLZ_TAB 485 select CLZ_TAB
diff --git a/lib/Makefile b/lib/Makefile
index 7f1de26613d2..1478ae256561 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -164,6 +164,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
164 164
165obj-$(CONFIG_SG_SPLIT) += sg_split.o 165obj-$(CONFIG_SG_SPLIT) += sg_split.o
166obj-$(CONFIG_STMP_DEVICE) += stmp_device.o 166obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
167obj-$(CONFIG_IRQ_POLL) += irq_poll.o
167 168
168libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ 169libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
169 fdt_empty_tree.o 170 fdt_empty_tree.o
diff --git a/block/blk-iopoll.c b/lib/irq_poll.c
index 0736729d6494..2836620e889f 100644
--- a/block/blk-iopoll.c
+++ b/lib/irq_poll.c
@@ -6,84 +6,84 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/bio.h> 8#include <linux/bio.h>
9#include <linux/blkdev.h>
10#include <linux/interrupt.h> 9#include <linux/interrupt.h>
11#include <linux/cpu.h> 10#include <linux/cpu.h>
12#include <linux/blk-iopoll.h> 11#include <linux/irq_poll.h>
13#include <linux/delay.h> 12#include <linux/delay.h>
14 13
15#include "blk.h" 14static unsigned int irq_poll_budget __read_mostly = 256;
16
17static unsigned int blk_iopoll_budget __read_mostly = 256;
18 15
19static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); 16static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
20 17
21/** 18/**
22 * blk_iopoll_sched - Schedule a run of the iopoll handler 19 * irq_poll_sched - Schedule a run of the iopoll handler
23 * @iop: The parent iopoll structure 20 * @iop: The parent iopoll structure
24 * 21 *
25 * Description: 22 * Description:
26 * Add this blk_iopoll structure to the pending poll list and trigger the 23 * Add this irq_poll structure to the pending poll list and trigger the
27 * raise of the blk iopoll softirq. The driver must already have gotten a 24 * raise of the blk iopoll softirq.
28 * successful return from blk_iopoll_sched_prep() before calling this.
29 **/ 25 **/
30void blk_iopoll_sched(struct blk_iopoll *iop) 26void irq_poll_sched(struct irq_poll *iop)
31{ 27{
32 unsigned long flags; 28 unsigned long flags;
33 29
30 if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
31 return;
32 if (!test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
33 return;
34
34 local_irq_save(flags); 35 local_irq_save(flags);
35 list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); 36 list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
36 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); 37 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
37 local_irq_restore(flags); 38 local_irq_restore(flags);
38} 39}
39EXPORT_SYMBOL(blk_iopoll_sched); 40EXPORT_SYMBOL(irq_poll_sched);
40 41
41/** 42/**
42 * __blk_iopoll_complete - Mark this @iop as un-polled again 43 * __irq_poll_complete - Mark this @iop as un-polled again
43 * @iop: The parent iopoll structure 44 * @iop: The parent iopoll structure
44 * 45 *
45 * Description: 46 * Description:
46 * See blk_iopoll_complete(). This function must be called with interrupts 47 * See irq_poll_complete(). This function must be called with interrupts
47 * disabled. 48 * disabled.
48 **/ 49 **/
49void __blk_iopoll_complete(struct blk_iopoll *iop) 50static void __irq_poll_complete(struct irq_poll *iop)
50{ 51{
51 list_del(&iop->list); 52 list_del(&iop->list);
52 smp_mb__before_atomic(); 53 smp_mb__before_atomic();
53 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); 54 clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
54} 55}
55EXPORT_SYMBOL(__blk_iopoll_complete);
56 56
57/** 57/**
58 * blk_iopoll_complete - Mark this @iop as un-polled again 58 * irq_poll_complete - Mark this @iop as un-polled again
59 * @iop: The parent iopoll structure 59 * @iop: The parent iopoll structure
60 * 60 *
61 * Description: 61 * Description:
62 * If a driver consumes less than the assigned budget in its run of the 62 * If a driver consumes less than the assigned budget in its run of the
63 * iopoll handler, it'll end the polled mode by calling this function. The 63 * iopoll handler, it'll end the polled mode by calling this function. The
64 * iopoll handler will not be invoked again before blk_iopoll_sched_prep() 64 * iopoll handler will not be invoked again before irq_poll_sched()
65 * is called. 65 * is called.
66 **/ 66 **/
67void blk_iopoll_complete(struct blk_iopoll *iop) 67void irq_poll_complete(struct irq_poll *iop)
68{ 68{
69 unsigned long flags; 69 unsigned long flags;
70 70
71 local_irq_save(flags); 71 local_irq_save(flags);
72 __blk_iopoll_complete(iop); 72 __irq_poll_complete(iop);
73 local_irq_restore(flags); 73 local_irq_restore(flags);
74} 74}
75EXPORT_SYMBOL(blk_iopoll_complete); 75EXPORT_SYMBOL(irq_poll_complete);
76 76
77static void blk_iopoll_softirq(struct softirq_action *h) 77static void irq_poll_softirq(struct softirq_action *h)
78{ 78{
79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); 79 struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
80 int rearm = 0, budget = blk_iopoll_budget; 80 int rearm = 0, budget = irq_poll_budget;
81 unsigned long start_time = jiffies; 81 unsigned long start_time = jiffies;
82 82
83 local_irq_disable(); 83 local_irq_disable();
84 84
85 while (!list_empty(list)) { 85 while (!list_empty(list)) {
86 struct blk_iopoll *iop; 86 struct irq_poll *iop;
87 int work, weight; 87 int work, weight;
88 88
89 /* 89 /*
@@ -101,11 +101,11 @@ static void blk_iopoll_softirq(struct softirq_action *h)
101 * entries to the tail of this list, and only ->poll() 101 * entries to the tail of this list, and only ->poll()
102 * calls can remove this head entry from the list. 102 * calls can remove this head entry from the list.
103 */ 103 */
104 iop = list_entry(list->next, struct blk_iopoll, list); 104 iop = list_entry(list->next, struct irq_poll, list);
105 105
106 weight = iop->weight; 106 weight = iop->weight;
107 work = 0; 107 work = 0;
108 if (test_bit(IOPOLL_F_SCHED, &iop->state)) 108 if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
109 work = iop->poll(iop, weight); 109 work = iop->poll(iop, weight);
110 110
111 budget -= work; 111 budget -= work;
@@ -121,72 +121,70 @@ static void blk_iopoll_softirq(struct softirq_action *h)
121 * move the instance around on the list at-will. 121 * move the instance around on the list at-will.
122 */ 122 */
123 if (work >= weight) { 123 if (work >= weight) {
124 if (blk_iopoll_disable_pending(iop)) 124 if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
125 __blk_iopoll_complete(iop); 125 __irq_poll_complete(iop);
126 else 126 else
127 list_move_tail(&iop->list, list); 127 list_move_tail(&iop->list, list);
128 } 128 }
129 } 129 }
130 130
131 if (rearm) 131 if (rearm)
132 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); 132 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
133 133
134 local_irq_enable(); 134 local_irq_enable();
135} 135}
136 136
137/** 137/**
138 * blk_iopoll_disable - Disable iopoll on this @iop 138 * irq_poll_disable - Disable iopoll on this @iop
139 * @iop: The parent iopoll structure 139 * @iop: The parent iopoll structure
140 * 140 *
141 * Description: 141 * Description:
142 * Disable io polling and wait for any pending callbacks to have completed. 142 * Disable io polling and wait for any pending callbacks to have completed.
143 **/ 143 **/
144void blk_iopoll_disable(struct blk_iopoll *iop) 144void irq_poll_disable(struct irq_poll *iop)
145{ 145{
146 set_bit(IOPOLL_F_DISABLE, &iop->state); 146 set_bit(IRQ_POLL_F_DISABLE, &iop->state);
147 while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) 147 while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
148 msleep(1); 148 msleep(1);
149 clear_bit(IOPOLL_F_DISABLE, &iop->state); 149 clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
150} 150}
151EXPORT_SYMBOL(blk_iopoll_disable); 151EXPORT_SYMBOL(irq_poll_disable);
152 152
153/** 153/**
154 * blk_iopoll_enable - Enable iopoll on this @iop 154 * irq_poll_enable - Enable iopoll on this @iop
155 * @iop: The parent iopoll structure 155 * @iop: The parent iopoll structure
156 * 156 *
157 * Description: 157 * Description:
158 * Enable iopoll on this @iop. Note that the handler run will not be 158 * Enable iopoll on this @iop. Note that the handler run will not be
159 * scheduled, it will only mark it as active. 159 * scheduled, it will only mark it as active.
160 **/ 160 **/
161void blk_iopoll_enable(struct blk_iopoll *iop) 161void irq_poll_enable(struct irq_poll *iop)
162{ 162{
163 BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); 163 BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
164 smp_mb__before_atomic(); 164 smp_mb__before_atomic();
165 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); 165 clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
166} 166}
167EXPORT_SYMBOL(blk_iopoll_enable); 167EXPORT_SYMBOL(irq_poll_enable);
168 168
169/** 169/**
170 * blk_iopoll_init - Initialize this @iop 170 * irq_poll_init - Initialize this @iop
171 * @iop: The parent iopoll structure 171 * @iop: The parent iopoll structure
172 * @weight: The default weight (or command completion budget) 172 * @weight: The default weight (or command completion budget)
173 * @poll_fn: The handler to invoke 173 * @poll_fn: The handler to invoke
174 * 174 *
175 * Description: 175 * Description:
176 * Initialize this blk_iopoll structure. Before being actively used, the 176 * Initialize and enable this irq_poll structure.
177 * driver must call blk_iopoll_enable().
178 **/ 177 **/
179void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) 178void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
180{ 179{
181 memset(iop, 0, sizeof(*iop)); 180 memset(iop, 0, sizeof(*iop));
182 INIT_LIST_HEAD(&iop->list); 181 INIT_LIST_HEAD(&iop->list);
183 iop->weight = weight; 182 iop->weight = weight;
184 iop->poll = poll_fn; 183 iop->poll = poll_fn;
185 set_bit(IOPOLL_F_SCHED, &iop->state);
186} 184}
187EXPORT_SYMBOL(blk_iopoll_init); 185EXPORT_SYMBOL(irq_poll_init);
188 186
189static int blk_iopoll_cpu_notify(struct notifier_block *self, 187static int irq_poll_cpu_notify(struct notifier_block *self,
190 unsigned long action, void *hcpu) 188 unsigned long action, void *hcpu)
191{ 189{
192 /* 190 /*
@@ -199,26 +197,26 @@ static int blk_iopoll_cpu_notify(struct notifier_block *self,
199 local_irq_disable(); 197 local_irq_disable();
200 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), 198 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
201 this_cpu_ptr(&blk_cpu_iopoll)); 199 this_cpu_ptr(&blk_cpu_iopoll));
202 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); 200 __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
203 local_irq_enable(); 201 local_irq_enable();
204 } 202 }
205 203
206 return NOTIFY_OK; 204 return NOTIFY_OK;
207} 205}
208 206
209static struct notifier_block blk_iopoll_cpu_notifier = { 207static struct notifier_block irq_poll_cpu_notifier = {
210 .notifier_call = blk_iopoll_cpu_notify, 208 .notifier_call = irq_poll_cpu_notify,
211}; 209};
212 210
213static __init int blk_iopoll_setup(void) 211static __init int irq_poll_setup(void)
214{ 212{
215 int i; 213 int i;
216 214
217 for_each_possible_cpu(i) 215 for_each_possible_cpu(i)
218 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); 216 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
219 217
220 open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); 218 open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
221 register_hotcpu_notifier(&blk_iopoll_cpu_notifier); 219 register_hotcpu_notifier(&irq_poll_cpu_notifier);
222 return 0; 220 return 0;
223} 221}
224subsys_initcall(blk_iopoll_setup); 222subsys_initcall(irq_poll_setup);
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 2a912df6771b..af5a31661086 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3746,7 +3746,7 @@ static const struct flag flags[] = {
3746 { "NET_TX_SOFTIRQ", 2 }, 3746 { "NET_TX_SOFTIRQ", 2 },
3747 { "NET_RX_SOFTIRQ", 3 }, 3747 { "NET_RX_SOFTIRQ", 3 },
3748 { "BLOCK_SOFTIRQ", 4 }, 3748 { "BLOCK_SOFTIRQ", 4 },
3749 { "BLOCK_IOPOLL_SOFTIRQ", 5 }, 3749 { "IRQ_POLL_SOFTIRQ", 5 },
3750 { "TASKLET_SOFTIRQ", 6 }, 3750 { "TASKLET_SOFTIRQ", 6 },
3751 { "SCHED_SOFTIRQ", 7 }, 3751 { "SCHED_SOFTIRQ", 7 },
3752 { "HRTIMER_SOFTIRQ", 8 }, 3752 { "HRTIMER_SOFTIRQ", 8 },
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 8ff7d620d942..33b52eaa39db 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -209,7 +209,7 @@ static const struct flag flags[] = {
209 { "NET_TX_SOFTIRQ", 2 }, 209 { "NET_TX_SOFTIRQ", 2 },
210 { "NET_RX_SOFTIRQ", 3 }, 210 { "NET_RX_SOFTIRQ", 3 },
211 { "BLOCK_SOFTIRQ", 4 }, 211 { "BLOCK_SOFTIRQ", 4 },
212 { "BLOCK_IOPOLL_SOFTIRQ", 5 }, 212 { "IRQ_POLL_SOFTIRQ", 5 },
213 { "TASKLET_SOFTIRQ", 6 }, 213 { "TASKLET_SOFTIRQ", 6 },
214 { "SCHED_SOFTIRQ", 7 }, 214 { "SCHED_SOFTIRQ", 7 },
215 { "HRTIMER_SOFTIRQ", 8 }, 215 { "HRTIMER_SOFTIRQ", 8 },