aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSagi Grimberg <sagi@grimberg.me>2018-09-03 06:47:07 -0400
committerChristoph Hellwig <hch@lst.de>2018-09-05 15:18:01 -0400
commit8407879c4e0d7731f6e7e905893cecf61a7762c7 (patch)
treee0d0ac2f54a1de859be50333a3015a627defb067
parentbc811f05d77f47059c197a98b6ad242eb03999cb (diff)
nvmet-rdma: fix possible bogus dereference under heavy load
Currently we always repost the recv buffer before we send a response capsule back to the host. Since ordering is not guaranteed for send and recv completions, it is posible that we will receive a new request from the host before we got a send completion for the response capsule. Today, we pre-allocate 2x rsps the length of the queue, but in reality, under heavy load there is nothing that is really preventing the gap to expand until we exhaust all our rsps. To fix this, if we don't have any pre-allocated rsps left, we dynamically allocate a rsp and make sure to free it when we are done. If under memory pressure we fail to allocate a rsp, we silently drop the command and wait for the host to retry. Reported-by: Steve Wise <swise@opengridcomputing.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> [hch: dropped a superflous assignment] Signed-off-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--drivers/nvme/target/rdma.c27
1 files changed, 25 insertions, 2 deletions
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3533e918ea37..bfc4da660bb4 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -66,6 +66,7 @@ struct nvmet_rdma_rsp {
66 66
67 struct nvmet_req req; 67 struct nvmet_req req;
68 68
69 bool allocated;
69 u8 n_rdma; 70 u8 n_rdma;
70 u32 flags; 71 u32 flags;
71 u32 invalidate_rkey; 72 u32 invalidate_rkey;
@@ -174,11 +175,19 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
174 unsigned long flags; 175 unsigned long flags;
175 176
176 spin_lock_irqsave(&queue->rsps_lock, flags); 177 spin_lock_irqsave(&queue->rsps_lock, flags);
177 rsp = list_first_entry(&queue->free_rsps, 178 rsp = list_first_entry_or_null(&queue->free_rsps,
178 struct nvmet_rdma_rsp, free_list); 179 struct nvmet_rdma_rsp, free_list);
179 list_del(&rsp->free_list); 180 if (likely(rsp))
181 list_del(&rsp->free_list);
180 spin_unlock_irqrestore(&queue->rsps_lock, flags); 182 spin_unlock_irqrestore(&queue->rsps_lock, flags);
181 183
184 if (unlikely(!rsp)) {
185 rsp = kmalloc(sizeof(*rsp), GFP_KERNEL);
186 if (unlikely(!rsp))
187 return NULL;
188 rsp->allocated = true;
189 }
190
182 return rsp; 191 return rsp;
183} 192}
184 193
@@ -187,6 +196,11 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
187{ 196{
188 unsigned long flags; 197 unsigned long flags;
189 198
199 if (rsp->allocated) {
200 kfree(rsp);
201 return;
202 }
203
190 spin_lock_irqsave(&rsp->queue->rsps_lock, flags); 204 spin_lock_irqsave(&rsp->queue->rsps_lock, flags);
191 list_add_tail(&rsp->free_list, &rsp->queue->free_rsps); 205 list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
192 spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); 206 spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
@@ -776,6 +790,15 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
776 790
777 cmd->queue = queue; 791 cmd->queue = queue;
778 rsp = nvmet_rdma_get_rsp(queue); 792 rsp = nvmet_rdma_get_rsp(queue);
793 if (unlikely(!rsp)) {
794 /*
795 * we get here only under memory pressure,
796 * silently drop and have the host retry
797 * as we can't even fail it.
798 */
799 nvmet_rdma_post_recv(queue->dev, cmd);
800 return;
801 }
779 rsp->queue = queue; 802 rsp->queue = queue;
780 rsp->cmd = cmd; 803 rsp->cmd = cmd;
781 rsp->flags = 0; 804 rsp->flags = 0;