aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
authorJames Smart <jsmart2021@gmail.com>2017-08-01 18:12:39 -0400
committerChristoph Hellwig <hch@lst.de>2017-08-10 05:06:38 -0400
commit0fb228d30b8d72bfee51f57e638d412324d44a11 (patch)
tree2e31dfcf89cb621eba3aa18cff9e363b3af8330b /drivers/nvme
parent758f3735580c21b8a36d644128af6608120a1dde (diff)
nvmet_fc: add defer_req callback for deferment of cmd buffer return
At queue creation, the transport allocates a local job struct (struct nvmet_fc_fcp_iod) for each possible element of the queue. When a new CMD is received from the wire, a jobs struct is allocated from the queue and then used for the duration of the command. The job struct contains buffer space for the wire command iu. Thus, upon allocation of the job struct, the cmd iu buffer is copied to the job struct and the LLDD may immediately free/reuse the CMD IU buffer passed in the call. However, in some circumstances, due to the packetized nature of FC and the api of the FC LLDD which may issue a hw command to send the wire response, but the LLDD may not get the hw completion for the command and upcall the nvmet_fc layer before a new command may be asynchronously received on the wire. In other words, its possible for the initiator to get the response from the wire, thus believe a command slot free, and send a new command iu. The new command iu may be received by the LLDD and passed to the transport before the LLDD had serviced the hw completion and made the teardown calls for the original job struct. As such, there is no available job struct available for the new io. E.g. it appears like the host sent more queue elements than the queue size. It didn't based on it's understanding. Rather than treat this as a hard connection failure queue the new request until the job struct does free up. As the buffer isn't copied as there's no job struct, a special return value must be returned to the LLDD to signify to hold off on recycling the cmd iu buffer. And later, when a job struct is allocated and the buffer copied, a new LLDD callback is introduced to notify the LLDD and allow it to recycle it's command iu buffer. Signed-off-by: James Smart <james.smart@broadcom.com> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/target/fc.c212
1 files changed, 184 insertions, 28 deletions
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 31ca55dfcb1d..1b7f2520a20d 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -114,6 +114,11 @@ struct nvmet_fc_tgtport {
114 struct kref ref; 114 struct kref ref;
115}; 115};
116 116
117struct nvmet_fc_defer_fcp_req {
118 struct list_head req_list;
119 struct nvmefc_tgt_fcp_req *fcp_req;
120};
121
117struct nvmet_fc_tgt_queue { 122struct nvmet_fc_tgt_queue {
118 bool ninetypercent; 123 bool ninetypercent;
119 u16 qid; 124 u16 qid;
@@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue {
132 struct nvmet_fc_tgt_assoc *assoc; 137 struct nvmet_fc_tgt_assoc *assoc;
133 struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */ 138 struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */
134 struct list_head fod_list; 139 struct list_head fod_list;
140 struct list_head pending_cmd_list;
141 struct list_head avail_defer_list;
135 struct workqueue_struct *work_q; 142 struct workqueue_struct *work_q;
136 struct kref ref; 143 struct kref ref;
137} __aligned(sizeof(unsigned long long)); 144} __aligned(sizeof(unsigned long long));
@@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
223static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); 230static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
224static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); 231static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
225static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); 232static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
233static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
234 struct nvmet_fc_fcp_iod *fod);
226 235
227 236
228/* *********************** FC-NVME DMA Handling **************************** */ 237/* *********************** FC-NVME DMA Handling **************************** */
@@ -463,9 +472,9 @@ static struct nvmet_fc_fcp_iod *
463nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) 472nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
464{ 473{
465 static struct nvmet_fc_fcp_iod *fod; 474 static struct nvmet_fc_fcp_iod *fod;
466 unsigned long flags;
467 475
468 spin_lock_irqsave(&queue->qlock, flags); 476 lockdep_assert_held(&queue->qlock);
477
469 fod = list_first_entry_or_null(&queue->fod_list, 478 fod = list_first_entry_or_null(&queue->fod_list,
470 struct nvmet_fc_fcp_iod, fcp_list); 479 struct nvmet_fc_fcp_iod, fcp_list);
471 if (fod) { 480 if (fod) {
@@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
477 * will "inherit" that reference. 486 * will "inherit" that reference.
478 */ 487 */
479 } 488 }
480 spin_unlock_irqrestore(&queue->qlock, flags);
481 return fod; 489 return fod;
482} 490}
483 491
484 492
485static void 493static void
494nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
495 struct nvmet_fc_tgt_queue *queue,
496 struct nvmefc_tgt_fcp_req *fcpreq)
497{
498 struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
499
500 /*
501 * put all admin cmds on hw queue id 0. All io commands go to
502 * the respective hw queue based on a modulo basis
503 */
504 fcpreq->hwqid = queue->qid ?
505 ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
506
507 if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
508 queue_work_on(queue->cpu, queue->work_q, &fod->work);
509 else
510 nvmet_fc_handle_fcp_rqst(tgtport, fod);
511}
512
513static void
486nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, 514nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
487 struct nvmet_fc_fcp_iod *fod) 515 struct nvmet_fc_fcp_iod *fod)
488{ 516{
489 struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; 517 struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
490 struct nvmet_fc_tgtport *tgtport = fod->tgtport; 518 struct nvmet_fc_tgtport *tgtport = fod->tgtport;
519 struct nvmet_fc_defer_fcp_req *deferfcp;
491 unsigned long flags; 520 unsigned long flags;
492 521
493 fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, 522 fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
@@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
495 524
496 fcpreq->nvmet_fc_private = NULL; 525 fcpreq->nvmet_fc_private = NULL;
497 526
498 spin_lock_irqsave(&queue->qlock, flags);
499 list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
500 fod->active = false; 527 fod->active = false;
501 fod->abort = false; 528 fod->abort = false;
502 fod->aborted = false; 529 fod->aborted = false;
503 fod->writedataactive = false; 530 fod->writedataactive = false;
504 fod->fcpreq = NULL; 531 fod->fcpreq = NULL;
532
533 tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
534
535 spin_lock_irqsave(&queue->qlock, flags);
536 deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
537 struct nvmet_fc_defer_fcp_req, req_list);
538 if (!deferfcp) {
539 list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
540 spin_unlock_irqrestore(&queue->qlock, flags);
541
542 /* Release reference taken at queue lookup and fod allocation */
543 nvmet_fc_tgt_q_put(queue);
544 return;
545 }
546
547 /* Re-use the fod for the next pending cmd that was deferred */
548 list_del(&deferfcp->req_list);
549
550 fcpreq = deferfcp->fcp_req;
551
552 /* deferfcp can be reused for another IO at a later date */
553 list_add_tail(&deferfcp->req_list, &queue->avail_defer_list);
554
505 spin_unlock_irqrestore(&queue->qlock, flags); 555 spin_unlock_irqrestore(&queue->qlock, flags);
506 556
557 /* Save NVME CMD IO in fod */
558 memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen);
559
560 /* Setup new fcpreq to be processed */
561 fcpreq->rspaddr = NULL;
562 fcpreq->rsplen = 0;
563 fcpreq->nvmet_fc_private = fod;
564 fod->fcpreq = fcpreq;
565 fod->active = true;
566
567 /* inform LLDD IO is now being processed */
568 tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq);
569
570 /* Submit deferred IO for processing */
571 nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
572
507 /* 573 /*
508 * release the reference taken at queue lookup and fod allocation 574 * Leave the queue lookup get reference taken when
575 * fod was originally allocated.
509 */ 576 */
510 nvmet_fc_tgt_q_put(queue);
511
512 tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
513} 577}
514 578
515static int 579static int
@@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
569 queue->port = assoc->tgtport->port; 633 queue->port = assoc->tgtport->port;
570 queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); 634 queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
571 INIT_LIST_HEAD(&queue->fod_list); 635 INIT_LIST_HEAD(&queue->fod_list);
636 INIT_LIST_HEAD(&queue->avail_defer_list);
637 INIT_LIST_HEAD(&queue->pending_cmd_list);
572 atomic_set(&queue->connected, 0); 638 atomic_set(&queue->connected, 0);
573 atomic_set(&queue->sqtail, 0); 639 atomic_set(&queue->sqtail, 0);
574 atomic_set(&queue->rsn, 1); 640 atomic_set(&queue->rsn, 1);
@@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
638{ 704{
639 struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; 705 struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
640 struct nvmet_fc_fcp_iod *fod = queue->fod; 706 struct nvmet_fc_fcp_iod *fod = queue->fod;
707 struct nvmet_fc_defer_fcp_req *deferfcp;
641 unsigned long flags; 708 unsigned long flags;
642 int i, writedataactive; 709 int i, writedataactive;
643 bool disconnect; 710 bool disconnect;
@@ -666,6 +733,35 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
666 } 733 }
667 } 734 }
668 } 735 }
736
737 /* Cleanup defer'ed IOs in queue */
738 list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) {
739 list_del(&deferfcp->req_list);
740 kfree(deferfcp);
741 }
742
743 for (;;) {
744 deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
745 struct nvmet_fc_defer_fcp_req, req_list);
746 if (!deferfcp)
747 break;
748
749 list_del(&deferfcp->req_list);
750 spin_unlock_irqrestore(&queue->qlock, flags);
751
752 tgtport->ops->defer_rcv(&tgtport->fc_target_port,
753 deferfcp->fcp_req);
754
755 tgtport->ops->fcp_abort(&tgtport->fc_target_port,
756 deferfcp->fcp_req);
757
758 tgtport->ops->fcp_req_release(&tgtport->fc_target_port,
759 deferfcp->fcp_req);
760
761 kfree(deferfcp);
762
763 spin_lock_irqsave(&queue->qlock, flags);
764 }
669 spin_unlock_irqrestore(&queue->qlock, flags); 765 spin_unlock_irqrestore(&queue->qlock, flags);
670 766
671 flush_workqueue(queue->work_q); 767 flush_workqueue(queue->work_q);
@@ -2172,11 +2268,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
2172 * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc 2268 * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc
2173 * layer for processing. 2269 * layer for processing.
2174 * 2270 *
2175 * The nvmet-fc layer will copy cmd payload to an internal structure for 2271 * The nvmet_fc layer allocates a local job structure (struct
2176 * processing. As such, upon completion of the routine, the LLDD may 2272 * nvmet_fc_fcp_iod) from the queue for the io and copies the
2177 * immediately free/reuse the CMD IU buffer passed in the call. 2273 * CMD IU buffer to the job structure. As such, on a successful
2274 * completion (returns 0), the LLDD may immediately free/reuse
2275 * the CMD IU buffer passed in the call.
2276 *
2277 * However, in some circumstances, due to the packetized nature of FC
2278 * and the api of the FC LLDD which may issue a hw command to send the
2279 * response, but the LLDD may not get the hw completion for that command
2280 * and upcall the nvmet_fc layer before a new command may be
2281 * asynchronously received - its possible for a command to be received
2282 * before the LLDD and nvmet_fc have recycled the job structure. It gives
2283 * the appearance of more commands received than fits in the sq.
2284 * To alleviate this scenario, a temporary queue is maintained in the
2285 * transport for pending LLDD requests waiting for a queue job structure.
2286 * In these "overrun" cases, a temporary queue element is allocated
2287 * the LLDD request and CMD iu buffer information remembered, and the
2288 * routine returns a -EOVERFLOW status. Subsequently, when a queue job
2289 * structure is freed, it is immediately reallocated for anything on the
2290 * pending request list. The LLDDs defer_rcv() callback is called,
2291 * informing the LLDD that it may reuse the CMD IU buffer, and the io
2292 * is then started normally with the transport.
2178 * 2293 *
2179 * If this routine returns error, the lldd should abort the exchange. 2294 * The LLDD, when receiving an -EOVERFLOW completion status, is to treat
2295 * the completion as successful but must not reuse the CMD IU buffer
2296 * until the LLDD's defer_rcv() callback has been called for the
2297 * corresponding struct nvmefc_tgt_fcp_req pointer.
2298 *
2299 * If there is any other condition in which an error occurs, the
2300 * transport will return a non-zero status indicating the error.
2301 * In all cases other than -EOVERFLOW, the transport has not accepted the
2302 * request and the LLDD should abort the exchange.
2180 * 2303 *
2181 * @target_port: pointer to the (registered) target port the FCP CMD IU 2304 * @target_port: pointer to the (registered) target port the FCP CMD IU
2182 * was received on. 2305 * was received on.
@@ -2194,6 +2317,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
2194 struct nvme_fc_cmd_iu *cmdiu = cmdiubuf; 2317 struct nvme_fc_cmd_iu *cmdiu = cmdiubuf;
2195 struct nvmet_fc_tgt_queue *queue; 2318 struct nvmet_fc_tgt_queue *queue;
2196 struct nvmet_fc_fcp_iod *fod; 2319 struct nvmet_fc_fcp_iod *fod;
2320 struct nvmet_fc_defer_fcp_req *deferfcp;
2321 unsigned long flags;
2197 2322
2198 /* validate iu, so the connection id can be used to find the queue */ 2323 /* validate iu, so the connection id can be used to find the queue */
2199 if ((cmdiubuf_len != sizeof(*cmdiu)) || 2324 if ((cmdiubuf_len != sizeof(*cmdiu)) ||
@@ -2214,29 +2339,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
2214 * when the fod is freed. 2339 * when the fod is freed.
2215 */ 2340 */
2216 2341
2342 spin_lock_irqsave(&queue->qlock, flags);
2343
2217 fod = nvmet_fc_alloc_fcp_iod(queue); 2344 fod = nvmet_fc_alloc_fcp_iod(queue);
2218 if (!fod) { 2345 if (fod) {
2346 spin_unlock_irqrestore(&queue->qlock, flags);
2347
2348 fcpreq->nvmet_fc_private = fod;
2349 fod->fcpreq = fcpreq;
2350
2351 memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
2352
2353 nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
2354
2355 return 0;
2356 }
2357
2358 if (!tgtport->ops->defer_rcv) {
2359 spin_unlock_irqrestore(&queue->qlock, flags);
2219 /* release the queue lookup reference */ 2360 /* release the queue lookup reference */
2220 nvmet_fc_tgt_q_put(queue); 2361 nvmet_fc_tgt_q_put(queue);
2221 return -ENOENT; 2362 return -ENOENT;
2222 } 2363 }
2223 2364
2224 fcpreq->nvmet_fc_private = fod; 2365 deferfcp = list_first_entry_or_null(&queue->avail_defer_list,
2225 fod->fcpreq = fcpreq; 2366 struct nvmet_fc_defer_fcp_req, req_list);
2226 /* 2367 if (deferfcp) {
2227 * put all admin cmds on hw queue id 0. All io commands go to 2368 /* Just re-use one that was previously allocated */
2228 * the respective hw queue based on a modulo basis 2369 list_del(&deferfcp->req_list);
2229 */ 2370 } else {
2230 fcpreq->hwqid = queue->qid ? 2371 spin_unlock_irqrestore(&queue->qlock, flags);
2231 ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
2232 memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
2233 2372
2234 if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) 2373 /* Now we need to dynamically allocate one */
2235 queue_work_on(queue->cpu, queue->work_q, &fod->work); 2374 deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL);
2236 else 2375 if (!deferfcp) {
2237 nvmet_fc_handle_fcp_rqst(tgtport, fod); 2376 /* release the queue lookup reference */
2377 nvmet_fc_tgt_q_put(queue);
2378 return -ENOMEM;
2379 }
2380 spin_lock_irqsave(&queue->qlock, flags);
2381 }
2238 2382
2239 return 0; 2383 /* For now, use rspaddr / rsplen to save payload information */
2384 fcpreq->rspaddr = cmdiubuf;
2385 fcpreq->rsplen = cmdiubuf_len;
2386 deferfcp->fcp_req = fcpreq;
2387
2388 /* defer processing till a fod becomes available */
2389 list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list);
2390
2391 /* NOTE: the queue lookup reference is still valid */
2392
2393 spin_unlock_irqrestore(&queue->qlock, flags);
2394
2395 return -EOVERFLOW;
2240} 2396}
2241EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); 2397EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req);
2242 2398