aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/nvme.c
diff options
context:
space:
mode:
authorMatthew Wilcox <matthew.r.wilcox@intel.com>2011-10-15 07:33:46 -0400
committerMatthew Wilcox <matthew.r.wilcox@intel.com>2012-01-10 14:47:46 -0500
commitc2f5b65020869215814df03c3941dac9436f99fb (patch)
tree61e4da8b569ec8f747243358429f1052c49f837b /drivers/block/nvme.c
parent010e646ba2fdfc558048a97da746381c35836280 (diff)
NVMe: Simplify completion handling
Instead of encoding the handler type in the bottom two bits of the per-completion context pointer, store the handler function as well as the context pointer. This gives us more flexibility and the code is clearer. It comes at the cost of an extra 8k of memory per queue, but this feels like a reasonable price to pay. Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Diffstat (limited to 'drivers/block/nvme.c')
-rw-r--r--drivers/block/nvme.c167
1 files changed, 81 insertions, 86 deletions
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index a17f80fa3881..4724655a6ebf 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -135,8 +135,12 @@ static inline void _nvme_check_size(void)
135 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); 135 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
136} 136}
137 137
138typedef void (*nvme_completion_fn)(struct nvme_queue *, void *,
139 struct nvme_completion *);
140
138struct nvme_cmd_info { 141struct nvme_cmd_info {
139 unsigned long ctx; 142 nvme_completion_fn fn;
143 void *ctx;
140 unsigned long timeout; 144 unsigned long timeout;
141}; 145};
142 146
@@ -149,7 +153,7 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
149 * alloc_cmdid() - Allocate a Command ID 153 * alloc_cmdid() - Allocate a Command ID
150 * @nvmeq: The queue that will be used for this command 154 * @nvmeq: The queue that will be used for this command
151 * @ctx: A pointer that will be passed to the handler 155 * @ctx: A pointer that will be passed to the handler
152 * @handler: The ID of the handler to call 156 * @handler: The function to call on completion
153 * 157 *
154 * Allocate a Command ID for a queue. The data passed in will 158 * Allocate a Command ID for a queue. The data passed in will
155 * be passed to the completion handler. This is implemented by using 159 * be passed to the completion handler. This is implemented by using
@@ -160,28 +164,27 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
160 * May be called with local interrupts disabled and the q_lock held, 164 * May be called with local interrupts disabled and the q_lock held,
161 * or with interrupts enabled and no locks held. 165 * or with interrupts enabled and no locks held.
162 */ 166 */
163static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, int handler, 167static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx,
164 unsigned timeout) 168 nvme_completion_fn handler, unsigned timeout)
165{ 169{
166 int depth = nvmeq->q_depth - 1; 170 int depth = nvmeq->q_depth - 1;
167 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); 171 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
168 int cmdid; 172 int cmdid;
169 173
170 BUG_ON((unsigned long)ctx & 3);
171
172 do { 174 do {
173 cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth); 175 cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
174 if (cmdid >= depth) 176 if (cmdid >= depth)
175 return -EBUSY; 177 return -EBUSY;
176 } while (test_and_set_bit(cmdid, nvmeq->cmdid_data)); 178 } while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
177 179
178 info[cmdid].ctx = (unsigned long)ctx | handler; 180 info[cmdid].fn = handler;
181 info[cmdid].ctx = ctx;
179 info[cmdid].timeout = jiffies + timeout; 182 info[cmdid].timeout = jiffies + timeout;
180 return cmdid; 183 return cmdid;
181} 184}
182 185
183static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, 186static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
184 int handler, unsigned timeout) 187 nvme_completion_fn handler, unsigned timeout)
185{ 188{
186 int cmdid; 189 int cmdid;
187 wait_event_killable(nvmeq->sq_full, 190 wait_event_killable(nvmeq->sq_full,
@@ -189,47 +192,69 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
189 return (cmdid < 0) ? -EINTR : cmdid; 192 return (cmdid < 0) ? -EINTR : cmdid;
190} 193}
191 194
192/* 195/* Special values must be less than 0x1000 */
193 * If you need more than four handlers, you'll need to change how 196#define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
194 * alloc_cmdid and nvme_process_cq work. Consider using a special
195 * CMD_CTX value instead, if that works for your situation.
196 */
197enum {
198 sync_completion_id = 0,
199 bio_completion_id,
200};
201
202/* Special values must be a multiple of 4, and less than 0x1000 */
203#define CMD_CTX_BASE (POISON_POINTER_DELTA + sync_completion_id)
204#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) 197#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
205#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) 198#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
206#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) 199#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
207#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) 200#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE)
208 201
202static void special_completion(struct nvme_queue *nvmeq, void *ctx,
203 struct nvme_completion *cqe)
204{
205 if (ctx == CMD_CTX_CANCELLED)
206 return;
207 if (ctx == CMD_CTX_FLUSH)
208 return;
209 if (ctx == CMD_CTX_COMPLETED) {
210 dev_warn(nvmeq->q_dmadev,
211 "completed id %d twice on queue %d\n",
212 cqe->command_id, le16_to_cpup(&cqe->sq_id));
213 return;
214 }
215 if (ctx == CMD_CTX_INVALID) {
216 dev_warn(nvmeq->q_dmadev,
217 "invalid id %d completed on queue %d\n",
218 cqe->command_id, le16_to_cpup(&cqe->sq_id));
219 return;
220 }
221
222 dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
223}
224
209/* 225/*
210 * Called with local interrupts disabled and the q_lock held. May not sleep. 226 * Called with local interrupts disabled and the q_lock held. May not sleep.
211 */ 227 */
212static unsigned long free_cmdid(struct nvme_queue *nvmeq, int cmdid) 228static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid,
229 nvme_completion_fn *fn)
213{ 230{
214 unsigned long data; 231 void *ctx;
215 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); 232 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
216 233
217 if (cmdid >= nvmeq->q_depth) 234 if (cmdid >= nvmeq->q_depth) {
235 *fn = special_completion;
218 return CMD_CTX_INVALID; 236 return CMD_CTX_INVALID;
219 data = info[cmdid].ctx; 237 }
238 *fn = info[cmdid].fn;
239 ctx = info[cmdid].ctx;
240 info[cmdid].fn = special_completion;
220 info[cmdid].ctx = CMD_CTX_COMPLETED; 241 info[cmdid].ctx = CMD_CTX_COMPLETED;
221 clear_bit(cmdid, nvmeq->cmdid_data); 242 clear_bit(cmdid, nvmeq->cmdid_data);
222 wake_up(&nvmeq->sq_full); 243 wake_up(&nvmeq->sq_full);
223 return data; 244 return ctx;
224} 245}
225 246
226static unsigned long cancel_cmdid(struct nvme_queue *nvmeq, int cmdid) 247static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid,
248 nvme_completion_fn *fn)
227{ 249{
228 unsigned long data; 250 void *ctx;
229 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); 251 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
230 data = info[cmdid].ctx; 252 if (fn)
253 *fn = info[cmdid].fn;
254 ctx = info[cmdid].ctx;
255 info[cmdid].fn = special_completion;
231 info[cmdid].ctx = CMD_CTX_CANCELLED; 256 info[cmdid].ctx = CMD_CTX_CANCELLED;
232 return data; 257 return ctx;
233} 258}
234 259
235static struct nvme_queue *get_nvmeq(struct nvme_ns *ns) 260static struct nvme_queue *get_nvmeq(struct nvme_ns *ns)
@@ -485,7 +510,7 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
485static int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) 510static int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
486{ 511{
487 int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, 512 int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
488 sync_completion_id, IO_TIMEOUT); 513 special_completion, IO_TIMEOUT);
489 if (unlikely(cmdid < 0)) 514 if (unlikely(cmdid < 0))
490 return cmdid; 515 return cmdid;
491 516
@@ -518,7 +543,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
518 nbio->bio = bio; 543 nbio->bio = bio;
519 544
520 result = -EBUSY; 545 result = -EBUSY;
521 cmdid = alloc_cmdid(nvmeq, nbio, bio_completion_id, IO_TIMEOUT); 546 cmdid = alloc_cmdid(nvmeq, nbio, bio_completion, IO_TIMEOUT);
522 if (unlikely(cmdid < 0)) 547 if (unlikely(cmdid < 0))
523 goto free_nbio; 548 goto free_nbio;
524 549
@@ -599,45 +624,6 @@ static int nvme_make_request(struct request_queue *q, struct bio *bio)
599 return 0; 624 return 0;
600} 625}
601 626
602struct sync_cmd_info {
603 struct task_struct *task;
604 u32 result;
605 int status;
606};
607
608static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
609 struct nvme_completion *cqe)
610{
611 struct sync_cmd_info *cmdinfo = ctx;
612 if (unlikely((unsigned long)cmdinfo == CMD_CTX_CANCELLED))
613 return;
614 if ((unsigned long)cmdinfo == CMD_CTX_FLUSH)
615 return;
616 if (unlikely((unsigned long)cmdinfo == CMD_CTX_COMPLETED)) {
617 dev_warn(nvmeq->q_dmadev,
618 "completed id %d twice on queue %d\n",
619 cqe->command_id, le16_to_cpup(&cqe->sq_id));
620 return;
621 }
622 if (unlikely((unsigned long)cmdinfo == CMD_CTX_INVALID)) {
623 dev_warn(nvmeq->q_dmadev,
624 "invalid id %d completed on queue %d\n",
625 cqe->command_id, le16_to_cpup(&cqe->sq_id));
626 return;
627 }
628 cmdinfo->result = le32_to_cpup(&cqe->result);
629 cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
630 wake_up_process(cmdinfo->task);
631}
632
633typedef void (*completion_fn)(struct nvme_queue *, void *,
634 struct nvme_completion *);
635
636static const completion_fn nvme_completions[4] = {
637 [sync_completion_id] = sync_completion,
638 [bio_completion_id] = bio_completion,
639};
640
641static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq) 627static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
642{ 628{
643 u16 head, phase; 629 u16 head, phase;
@@ -646,9 +632,8 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
646 phase = nvmeq->cq_phase; 632 phase = nvmeq->cq_phase;
647 633
648 for (;;) { 634 for (;;) {
649 unsigned long data; 635 void *ctx;
650 void *ptr; 636 nvme_completion_fn fn;
651 unsigned char handler;
652 struct nvme_completion cqe = nvmeq->cqes[head]; 637 struct nvme_completion cqe = nvmeq->cqes[head];
653 if ((le16_to_cpu(cqe.status) & 1) != phase) 638 if ((le16_to_cpu(cqe.status) & 1) != phase)
654 break; 639 break;
@@ -658,10 +643,8 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
658 phase = !phase; 643 phase = !phase;
659 } 644 }
660 645
661 data = free_cmdid(nvmeq, cqe.command_id); 646 ctx = free_cmdid(nvmeq, cqe.command_id, &fn);
662 handler = data & 3; 647 fn(nvmeq, ctx, &cqe);
663 ptr = (void *)(data & ~3UL);
664 nvme_completions[handler](nvmeq, ptr, &cqe);
665 } 648 }
666 649
667 /* If the controller ignores the cq head doorbell and continuously 650 /* If the controller ignores the cq head doorbell and continuously
@@ -702,10 +685,25 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
702static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid) 685static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid)
703{ 686{
704 spin_lock_irq(&nvmeq->q_lock); 687 spin_lock_irq(&nvmeq->q_lock);
705 cancel_cmdid(nvmeq, cmdid); 688 cancel_cmdid(nvmeq, cmdid, NULL);
706 spin_unlock_irq(&nvmeq->q_lock); 689 spin_unlock_irq(&nvmeq->q_lock);
707} 690}
708 691
692struct sync_cmd_info {
693 struct task_struct *task;
694 u32 result;
695 int status;
696};
697
698static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
699 struct nvme_completion *cqe)
700{
701 struct sync_cmd_info *cmdinfo = ctx;
702 cmdinfo->result = le32_to_cpup(&cqe->result);
703 cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
704 wake_up_process(cmdinfo->task);
705}
706
709/* 707/*
710 * Returns 0 on success. If the result is negative, it's a Linux error code; 708 * Returns 0 on success. If the result is negative, it's a Linux error code;
711 * if the result is positive, it's an NVM Express status code 709 * if the result is positive, it's an NVM Express status code
@@ -719,7 +717,7 @@ static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq,
719 cmdinfo.task = current; 717 cmdinfo.task = current;
720 cmdinfo.status = -EINTR; 718 cmdinfo.status = -EINTR;
721 719
722 cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion_id, 720 cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion,
723 timeout); 721 timeout);
724 if (cmdid < 0) 722 if (cmdid < 0)
725 return cmdid; 723 return cmdid;
@@ -1201,18 +1199,15 @@ static void nvme_timeout_ios(struct nvme_queue *nvmeq)
1201 int cmdid; 1199 int cmdid;
1202 1200
1203 for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { 1201 for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) {
1204 unsigned long data; 1202 void *ctx;
1205 void *ptr; 1203 nvme_completion_fn fn;
1206 unsigned char handler;
1207 static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; 1204 static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, };
1208 1205
1209 if (!time_after(now, info[cmdid].timeout)) 1206 if (!time_after(now, info[cmdid].timeout))
1210 continue; 1207 continue;
1211 dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); 1208 dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid);
1212 data = cancel_cmdid(nvmeq, cmdid); 1209 ctx = cancel_cmdid(nvmeq, cmdid, &fn);
1213 handler = data & 3; 1210 fn(nvmeq, ctx, &cqe);
1214 ptr = (void *)(data & ~3UL);
1215 nvme_completions[handler](nvmeq, ptr, &cqe);
1216 } 1211 }
1217} 1212}
1218 1213