aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2013-12-10 15:10:40 -0500
committerMatthew Wilcox <matthew.r.wilcox@intel.com>2014-01-27 20:07:35 -0500
commit4d115420707afcabe77d2535e092356df6664b70 (patch)
tree7e8b6c1003e1d6882bc48fbed66d25b084c7e68f /drivers/block
parent0e53d18051725da46cbccfb7874a6422d4d4f274 (diff)
NVMe: Async IO queue deletion
This attempts to delete all IO queues at the same time asynchronously on shutdown. This is necessary for a present device that is not responding; a shutdown operation previously would take 2 minutes per queue-pair to timeout before moving on to the next queue, making a device removal appear to take a very long time or "hung" as reported by users. In the previous worst case, a removal may be stuck forever until a kill signal is given if there are more than 32 queue pairs since it would run out of admin command IDs after over an hour of timed out sync commands (admin queue depth is 64). This patch will wait for the admin command timeout for all commands to complete, so the worst case now for an unresponsive controller is 60 seconds, though that still seems like a long time. Since this adds another way to take queues offline, some duplicate code resulted so I moved these into more convienient functions. Signed-off-by: Keith Busch <keith.busch@intel.com> [make functions static, correct line length and whitespace issues] Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/nvme-core.c229
1 files changed, 217 insertions, 12 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index a51126129784..1c8a82fbbc37 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -62,6 +62,14 @@ static struct workqueue_struct *nvme_workq;
62 62
63static void nvme_reset_failed_dev(struct work_struct *ws); 63static void nvme_reset_failed_dev(struct work_struct *ws);
64 64
65struct async_cmd_info {
66 struct kthread_work work;
67 struct kthread_worker *worker;
68 u32 result;
69 int status;
70 void *ctx;
71};
72
65/* 73/*
66 * An NVM Express queue. Each device has at least two (one for admin 74 * An NVM Express queue. Each device has at least two (one for admin
67 * commands and one for I/O commands). 75 * commands and one for I/O commands).
@@ -87,6 +95,7 @@ struct nvme_queue {
87 u8 cq_phase; 95 u8 cq_phase;
88 u8 cqe_seen; 96 u8 cqe_seen;
89 u8 q_suspended; 97 u8 q_suspended;
98 struct async_cmd_info cmdinfo;
90 unsigned long cmdid_data[]; 99 unsigned long cmdid_data[];
91}; 100};
92 101
@@ -208,6 +217,15 @@ static void special_completion(struct nvme_dev *dev, void *ctx,
208 dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx); 217 dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx);
209} 218}
210 219
220static void async_completion(struct nvme_dev *dev, void *ctx,
221 struct nvme_completion *cqe)
222{
223 struct async_cmd_info *cmdinfo = ctx;
224 cmdinfo->result = le32_to_cpup(&cqe->result);
225 cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
226 queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
227}
228
211/* 229/*
212 * Called with local interrupts disabled and the q_lock held. May not sleep. 230 * Called with local interrupts disabled and the q_lock held. May not sleep.
213 */ 231 */
@@ -898,12 +916,34 @@ int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
898 return cmdinfo.status; 916 return cmdinfo.status;
899} 917}
900 918
919static int nvme_submit_async_cmd(struct nvme_queue *nvmeq,
920 struct nvme_command *cmd,
921 struct async_cmd_info *cmdinfo, unsigned timeout)
922{
923 int cmdid;
924
925 cmdid = alloc_cmdid_killable(nvmeq, cmdinfo, async_completion, timeout);
926 if (cmdid < 0)
927 return cmdid;
928 cmdinfo->status = -EINTR;
929 cmd->common.command_id = cmdid;
930 nvme_submit_cmd(nvmeq, cmd);
931 return 0;
932}
933
901int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, 934int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
902 u32 *result) 935 u32 *result)
903{ 936{
904 return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); 937 return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT);
905} 938}
906 939
940static int nvme_submit_admin_cmd_async(struct nvme_dev *dev,
941 struct nvme_command *cmd, struct async_cmd_info *cmdinfo)
942{
943 return nvme_submit_async_cmd(dev->queues[0], cmd, cmdinfo,
944 ADMIN_TIMEOUT);
945}
946
907static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 947static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
908{ 948{
909 int status; 949 int status;
@@ -1124,15 +1164,20 @@ static void nvme_free_queues(struct nvme_dev *dev)
1124 } 1164 }
1125} 1165}
1126 1166
1127static void nvme_disable_queue(struct nvme_dev *dev, int qid) 1167/**
1168 * nvme_suspend_queue - put queue into suspended state
1169 * @nvmeq - queue to suspend
1170 *
1171 * Returns 1 if already suspended, 0 otherwise.
1172 */
1173static int nvme_suspend_queue(struct nvme_queue *nvmeq)
1128{ 1174{
1129 struct nvme_queue *nvmeq = dev->queues[qid]; 1175 int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
1130 int vector = dev->entry[nvmeq->cq_vector].vector;
1131 1176
1132 spin_lock_irq(&nvmeq->q_lock); 1177 spin_lock_irq(&nvmeq->q_lock);
1133 if (nvmeq->q_suspended) { 1178 if (nvmeq->q_suspended) {
1134 spin_unlock_irq(&nvmeq->q_lock); 1179 spin_unlock_irq(&nvmeq->q_lock);
1135 return; 1180 return 1;
1136 } 1181 }
1137 nvmeq->q_suspended = 1; 1182 nvmeq->q_suspended = 1;
1138 spin_unlock_irq(&nvmeq->q_lock); 1183 spin_unlock_irq(&nvmeq->q_lock);
@@ -1140,17 +1185,33 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
1140 irq_set_affinity_hint(vector, NULL); 1185 irq_set_affinity_hint(vector, NULL);
1141 free_irq(vector, nvmeq); 1186 free_irq(vector, nvmeq);
1142 1187
1188 return 0;
1189}
1190
1191static void nvme_clear_queue(struct nvme_queue *nvmeq)
1192{
1193 spin_lock_irq(&nvmeq->q_lock);
1194 nvme_process_cq(nvmeq);
1195 nvme_cancel_ios(nvmeq, false);
1196 spin_unlock_irq(&nvmeq->q_lock);
1197}
1198
1199static void nvme_disable_queue(struct nvme_dev *dev, int qid)
1200{
1201 struct nvme_queue *nvmeq = dev->queues[qid];
1202
1203 if (!nvmeq)
1204 return;
1205 if (nvme_suspend_queue(nvmeq))
1206 return;
1207
1143 /* Don't tell the adapter to delete the admin queue. 1208 /* Don't tell the adapter to delete the admin queue.
1144 * Don't tell a removed adapter to delete IO queues. */ 1209 * Don't tell a removed adapter to delete IO queues. */
1145 if (qid && readl(&dev->bar->csts) != -1) { 1210 if (qid && readl(&dev->bar->csts) != -1) {
1146 adapter_delete_sq(dev, qid); 1211 adapter_delete_sq(dev, qid);
1147 adapter_delete_cq(dev, qid); 1212 adapter_delete_cq(dev, qid);
1148 } 1213 }
1149 1214 nvme_clear_queue(nvmeq);
1150 spin_lock_irq(&nvmeq->q_lock);
1151 nvme_process_cq(nvmeq);
1152 nvme_cancel_ios(nvmeq, false);
1153 spin_unlock_irq(&nvmeq->q_lock);
1154} 1215}
1155 1216
1156static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 1217static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
@@ -2089,20 +2150,164 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
2089 pci_disable_device(dev->pci_dev); 2150 pci_disable_device(dev->pci_dev);
2090} 2151}
2091 2152
2153struct nvme_delq_ctx {
2154 struct task_struct *waiter;
2155 struct kthread_worker *worker;
2156 atomic_t refcount;
2157};
2158
2159static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
2160{
2161 dq->waiter = current;
2162 mb();
2163
2164 for (;;) {
2165 set_current_state(TASK_KILLABLE);
2166 if (!atomic_read(&dq->refcount))
2167 break;
2168 if (!schedule_timeout(ADMIN_TIMEOUT) ||
2169 fatal_signal_pending(current)) {
2170 set_current_state(TASK_RUNNING);
2171
2172 nvme_disable_ctrl(dev, readq(&dev->bar->cap));
2173 nvme_disable_queue(dev, 0);
2174
2175 send_sig(SIGKILL, dq->worker->task, 1);
2176 flush_kthread_worker(dq->worker);
2177 return;
2178 }
2179 }
2180 set_current_state(TASK_RUNNING);
2181}
2182
2183static void nvme_put_dq(struct nvme_delq_ctx *dq)
2184{
2185 atomic_dec(&dq->refcount);
2186 if (dq->waiter)
2187 wake_up_process(dq->waiter);
2188}
2189
2190static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
2191{
2192 atomic_inc(&dq->refcount);
2193 return dq;
2194}
2195
2196static void nvme_del_queue_end(struct nvme_queue *nvmeq)
2197{
2198 struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
2199
2200 nvme_clear_queue(nvmeq);
2201 nvme_put_dq(dq);
2202}
2203
2204static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
2205 kthread_work_func_t fn)
2206{
2207 struct nvme_command c;
2208
2209 memset(&c, 0, sizeof(c));
2210 c.delete_queue.opcode = opcode;
2211 c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
2212
2213 init_kthread_work(&nvmeq->cmdinfo.work, fn);
2214 return nvme_submit_admin_cmd_async(nvmeq->dev, &c, &nvmeq->cmdinfo);
2215}
2216
2217static void nvme_del_cq_work_handler(struct kthread_work *work)
2218{
2219 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
2220 cmdinfo.work);
2221 nvme_del_queue_end(nvmeq);
2222}
2223
2224static int nvme_delete_cq(struct nvme_queue *nvmeq)
2225{
2226 return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
2227 nvme_del_cq_work_handler);
2228}
2229
2230static void nvme_del_sq_work_handler(struct kthread_work *work)
2231{
2232 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
2233 cmdinfo.work);
2234 int status = nvmeq->cmdinfo.status;
2235
2236 if (!status)
2237 status = nvme_delete_cq(nvmeq);
2238 if (status)
2239 nvme_del_queue_end(nvmeq);
2240}
2241
2242static int nvme_delete_sq(struct nvme_queue *nvmeq)
2243{
2244 return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
2245 nvme_del_sq_work_handler);
2246}
2247
2248static void nvme_del_queue_start(struct kthread_work *work)
2249{
2250 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
2251 cmdinfo.work);
2252 allow_signal(SIGKILL);
2253 if (nvme_delete_sq(nvmeq))
2254 nvme_del_queue_end(nvmeq);
2255}
2256
2257static void nvme_disable_io_queues(struct nvme_dev *dev)
2258{
2259 int i;
2260 DEFINE_KTHREAD_WORKER_ONSTACK(worker);
2261 struct nvme_delq_ctx dq;
2262 struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
2263 &worker, "nvme%d", dev->instance);
2264
2265 if (IS_ERR(kworker_task)) {
2266 dev_err(&dev->pci_dev->dev,
2267 "Failed to create queue del task\n");
2268 for (i = dev->queue_count - 1; i > 0; i--)
2269 nvme_disable_queue(dev, i);
2270 return;
2271 }
2272
2273 dq.waiter = NULL;
2274 atomic_set(&dq.refcount, 0);
2275 dq.worker = &worker;
2276 for (i = dev->queue_count - 1; i > 0; i--) {
2277 struct nvme_queue *nvmeq = dev->queues[i];
2278
2279 if (nvme_suspend_queue(nvmeq))
2280 continue;
2281 nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
2282 nvmeq->cmdinfo.worker = dq.worker;
2283 init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
2284 queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
2285 }
2286 nvme_wait_dq(&dq, dev);
2287 kthread_stop(kworker_task);
2288}
2289
2092static void nvme_dev_shutdown(struct nvme_dev *dev) 2290static void nvme_dev_shutdown(struct nvme_dev *dev)
2093{ 2291{
2094 int i; 2292 int i;
2095 2293
2096 dev->initialized = 0; 2294 dev->initialized = 0;
2097 for (i = dev->queue_count - 1; i >= 0; i--)
2098 nvme_disable_queue(dev, i);
2099 2295
2100 spin_lock(&dev_list_lock); 2296 spin_lock(&dev_list_lock);
2101 list_del_init(&dev->node); 2297 list_del_init(&dev->node);
2102 spin_unlock(&dev_list_lock); 2298 spin_unlock(&dev_list_lock);
2103 2299
2104 if (dev->bar) 2300 if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
2301 for (i = dev->queue_count - 1; i >= 0; i--) {
2302 struct nvme_queue *nvmeq = dev->queues[i];
2303 nvme_suspend_queue(nvmeq);
2304 nvme_clear_queue(nvmeq);
2305 }
2306 } else {
2307 nvme_disable_io_queues(dev);
2105 nvme_shutdown_ctrl(dev); 2308 nvme_shutdown_ctrl(dev);
2309 nvme_disable_queue(dev, 0);
2310 }
2106 nvme_dev_unmap(dev); 2311 nvme_dev_unmap(dev);
2107} 2312}
2108 2313