aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2018-08-05 21:34:09 -0400
committerJens Axboe <axboe@kernel.dk>2018-08-05 21:34:09 -0400
commitf87b0f0dfa5496fc4a701c071fa3ce7ad7ca5152 (patch)
treebfaf5858c80dc1056d30825f35762b7e3dad65d4
parent05b9ba4b550ff67d7362608828405f9e389e8988 (diff)
parentb369b30cf510fe94d8884837039362e2ec223cec (diff)
Merge branch 'nvme-4.19' of git://git.infradead.org/nvme into for-4.19/block2
Pull NVMe changes from Christoph: "This contains the support for TP4004, Asymmetric Namespace Access, which makes NVMe multipathing usable in practice." * 'nvme-4.19' of git://git.infradead.org/nvme: nvmet: use Retain Async Event bit to clear AEN nvmet: support configuring ANA groups nvmet: add minimal ANA support nvmet: track and limit the number of namespaces per subsystem nvmet: keep a port pointer in nvmet_ctrl nvme: add ANA support nvme: remove nvme_req_needs_failover nvme: simplify the API for getting log pages nvme.h: add ANA definitions nvme.h: add support for the log specific field Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/nvme/host/core.c69
-rw-r--r--drivers/nvme/host/lightnvm.c5
-rw-r--r--drivers/nvme/host/multipath.c349
-rw-r--r--drivers/nvme/host/nvme.h61
-rw-r--r--drivers/nvme/target/admin-cmd.c104
-rw-r--r--drivers/nvme/target/configfs.c190
-rw-r--r--drivers/nvme/target/core.c71
-rw-r--r--drivers/nvme/target/nvmet.h53
-rw-r--r--include/linux/nvme.h52
9 files changed, 880 insertions, 74 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9347f20190e5..603fe59756fb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -252,7 +252,8 @@ void nvme_complete_rq(struct request *req)
252 trace_nvme_complete_rq(req); 252 trace_nvme_complete_rq(req);
253 253
254 if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { 254 if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
255 if (nvme_req_needs_failover(req, status)) { 255 if ((req->cmd_flags & REQ_NVME_MPATH) &&
256 blk_path_error(status)) {
256 nvme_failover_req(req); 257 nvme_failover_req(req);
257 return; 258 return;
258 } 259 }
@@ -1067,7 +1068,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
1067EXPORT_SYMBOL_GPL(nvme_set_queue_count); 1068EXPORT_SYMBOL_GPL(nvme_set_queue_count);
1068 1069
1069#define NVME_AEN_SUPPORTED \ 1070#define NVME_AEN_SUPPORTED \
1070 (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT) 1071 (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
1071 1072
1072static void nvme_enable_aen(struct nvme_ctrl *ctrl) 1073static void nvme_enable_aen(struct nvme_ctrl *ctrl)
1073{ 1074{
@@ -2281,21 +2282,16 @@ out_unlock:
2281 return ret; 2282 return ret;
2282} 2283}
2283 2284
2284int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 2285int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
2285 u8 log_page, void *log, 2286 void *log, size_t size, u64 offset)
2286 size_t size, u64 offset)
2287{ 2287{
2288 struct nvme_command c = { }; 2288 struct nvme_command c = { };
2289 unsigned long dwlen = size / 4 - 1; 2289 unsigned long dwlen = size / 4 - 1;
2290 2290
2291 c.get_log_page.opcode = nvme_admin_get_log_page; 2291 c.get_log_page.opcode = nvme_admin_get_log_page;
2292 2292 c.get_log_page.nsid = cpu_to_le32(nsid);
2293 if (ns)
2294 c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
2295 else
2296 c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
2297
2298 c.get_log_page.lid = log_page; 2293 c.get_log_page.lid = log_page;
2294 c.get_log_page.lsp = lsp;
2299 c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); 2295 c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
2300 c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); 2296 c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
2301 c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); 2297 c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
@@ -2304,12 +2300,6 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
2304 return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); 2300 return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
2305} 2301}
2306 2302
2307static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
2308 size_t size)
2309{
2310 return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
2311}
2312
2313static int nvme_get_effects_log(struct nvme_ctrl *ctrl) 2303static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
2314{ 2304{
2315 int ret; 2305 int ret;
@@ -2320,8 +2310,8 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
2320 if (!ctrl->effects) 2310 if (!ctrl->effects)
2321 return 0; 2311 return 0;
2322 2312
2323 ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects, 2313 ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
2324 sizeof(*ctrl->effects)); 2314 ctrl->effects, sizeof(*ctrl->effects), 0);
2325 if (ret) { 2315 if (ret) {
2326 kfree(ctrl->effects); 2316 kfree(ctrl->effects);
2327 ctrl->effects = NULL; 2317 ctrl->effects = NULL;
@@ -2412,6 +2402,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
2412 nvme_set_queue_limits(ctrl, ctrl->admin_q); 2402 nvme_set_queue_limits(ctrl, ctrl->admin_q);
2413 ctrl->sgls = le32_to_cpu(id->sgls); 2403 ctrl->sgls = le32_to_cpu(id->sgls);
2414 ctrl->kas = le16_to_cpu(id->kas); 2404 ctrl->kas = le16_to_cpu(id->kas);
2405 ctrl->max_namespaces = le32_to_cpu(id->mnan);
2415 2406
2416 if (id->rtd3e) { 2407 if (id->rtd3e) {
2417 /* us -> s */ 2408 /* us -> s */
@@ -2471,8 +2462,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
2471 ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); 2462 ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
2472 } 2463 }
2473 2464
2465 ret = nvme_mpath_init(ctrl, id);
2474 kfree(id); 2466 kfree(id);
2475 2467
2468 if (ret < 0)
2469 return ret;
2470
2476 if (ctrl->apst_enabled && !prev_apst_enabled) 2471 if (ctrl->apst_enabled && !prev_apst_enabled)
2477 dev_pm_qos_expose_latency_tolerance(ctrl->device); 2472 dev_pm_qos_expose_latency_tolerance(ctrl->device);
2478 else if (!ctrl->apst_enabled && prev_apst_enabled) 2473 else if (!ctrl->apst_enabled && prev_apst_enabled)
@@ -2691,6 +2686,10 @@ static struct attribute *nvme_ns_id_attrs[] = {
2691 &dev_attr_nguid.attr, 2686 &dev_attr_nguid.attr,
2692 &dev_attr_eui.attr, 2687 &dev_attr_eui.attr,
2693 &dev_attr_nsid.attr, 2688 &dev_attr_nsid.attr,
2689#ifdef CONFIG_NVME_MULTIPATH
2690 &dev_attr_ana_grpid.attr,
2691 &dev_attr_ana_state.attr,
2692#endif
2694 NULL, 2693 NULL,
2695}; 2694};
2696 2695
@@ -2713,6 +2712,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
2713 if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) 2712 if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
2714 return 0; 2713 return 0;
2715 } 2714 }
2715#ifdef CONFIG_NVME_MULTIPATH
2716 if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
2717 if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
2718 return 0;
2719 if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
2720 return 0;
2721 }
2722#endif
2716 return a->mode; 2723 return a->mode;
2717} 2724}
2718 2725
@@ -3086,8 +3093,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
3086 3093
3087 nvme_get_ctrl(ctrl); 3094 nvme_get_ctrl(ctrl);
3088 3095
3089 kfree(id);
3090
3091 device_add_disk(ctrl->device, ns->disk); 3096 device_add_disk(ctrl->device, ns->disk);
3092 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, 3097 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
3093 &nvme_ns_id_attr_group)) 3098 &nvme_ns_id_attr_group))
@@ -3097,8 +3102,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
3097 pr_warn("%s: failed to register lightnvm sysfs group for identification\n", 3102 pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
3098 ns->disk->disk_name); 3103 ns->disk->disk_name);
3099 3104
3100 nvme_mpath_add_disk(ns->head); 3105 nvme_mpath_add_disk(ns, id);
3101 nvme_fault_inject_init(ns); 3106 nvme_fault_inject_init(ns);
3107 kfree(id);
3108
3102 return; 3109 return;
3103 out_unlink_ns: 3110 out_unlink_ns:
3104 mutex_lock(&ctrl->subsys->lock); 3111 mutex_lock(&ctrl->subsys->lock);
@@ -3240,7 +3247,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
3240 * raced with us in reading the log page, which could cause us to miss 3247 * raced with us in reading the log page, which could cause us to miss
3241 * updates. 3248 * updates.
3242 */ 3249 */
3243 error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); 3250 error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
3251 log_size, 0);
3244 if (error) 3252 if (error)
3245 dev_warn(ctrl->device, 3253 dev_warn(ctrl->device,
3246 "reading changed ns log failed: %d\n", error); 3254 "reading changed ns log failed: %d\n", error);
@@ -3357,9 +3365,9 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
3357 if (!log) 3365 if (!log)
3358 return; 3366 return;
3359 3367
3360 if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log))) 3368 if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
3361 dev_warn(ctrl->device, 3369 sizeof(*log), 0))
3362 "Get FW SLOT INFO log error\n"); 3370 dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
3363 kfree(log); 3371 kfree(log);
3364} 3372}
3365 3373
@@ -3405,6 +3413,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
3405 case NVME_AER_NOTICE_FW_ACT_STARTING: 3413 case NVME_AER_NOTICE_FW_ACT_STARTING:
3406 queue_work(nvme_wq, &ctrl->fw_act_work); 3414 queue_work(nvme_wq, &ctrl->fw_act_work);
3407 break; 3415 break;
3416#ifdef CONFIG_NVME_MULTIPATH
3417 case NVME_AER_NOTICE_ANA:
3418 if (!ctrl->ana_log_buf)
3419 break;
3420 queue_work(nvme_wq, &ctrl->ana_work);
3421 break;
3422#endif
3408 default: 3423 default:
3409 dev_warn(ctrl->device, "async event result %08x\n", result); 3424 dev_warn(ctrl->device, "async event result %08x\n", result);
3410 } 3425 }
@@ -3437,6 +3452,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
3437 3452
3438void nvme_stop_ctrl(struct nvme_ctrl *ctrl) 3453void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
3439{ 3454{
3455 nvme_mpath_stop(ctrl);
3440 nvme_stop_keep_alive(ctrl); 3456 nvme_stop_keep_alive(ctrl);
3441 flush_work(&ctrl->async_event_work); 3457 flush_work(&ctrl->async_event_work);
3442 flush_work(&ctrl->scan_work); 3458 flush_work(&ctrl->scan_work);
@@ -3474,6 +3490,7 @@ static void nvme_free_ctrl(struct device *dev)
3474 3490
3475 ida_simple_remove(&nvme_instance_ida, ctrl->instance); 3491 ida_simple_remove(&nvme_instance_ida, ctrl->instance);
3476 kfree(ctrl->effects); 3492 kfree(ctrl->effects);
3493 nvme_mpath_uninit(ctrl);
3477 3494
3478 if (subsys) { 3495 if (subsys) {
3479 mutex_lock(&subsys->lock); 3496 mutex_lock(&subsys->lock);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index d9e4cccd5b66..7e4cf4eb9d66 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -604,8 +604,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
604 while (left) { 604 while (left) {
605 len = min_t(unsigned int, left, max_len); 605 len = min_t(unsigned int, left, max_len);
606 606
607 ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK, 607 ret = nvme_get_log(ctrl, ns->head->ns_id,
608 dev_meta, len, offset); 608 NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len,
609 offset);
609 if (ret) { 610 if (ret) {
610 dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); 611 dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
611 break; 612 break;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 1ffd3e8b13a1..c643872f8dac 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017 Christoph Hellwig. 2 * Copyright (c) 2017-2018 Christoph Hellwig.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -20,6 +20,11 @@ module_param(multipath, bool, 0444);
20MODULE_PARM_DESC(multipath, 20MODULE_PARM_DESC(multipath,
21 "turn on native support for multiple controllers per subsystem"); 21 "turn on native support for multiple controllers per subsystem");
22 22
23inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
24{
25 return multipath && (ctrl->subsys->cmic & (1 << 3));
26}
27
23/* 28/*
24 * If multipathing is enabled we need to always use the subsystem instance 29 * If multipathing is enabled we need to always use the subsystem instance
25 * number for numbering our devices to avoid conflicts between subsystems that 30 * number for numbering our devices to avoid conflicts between subsystems that
@@ -45,6 +50,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
45void nvme_failover_req(struct request *req) 50void nvme_failover_req(struct request *req)
46{ 51{
47 struct nvme_ns *ns = req->q->queuedata; 52 struct nvme_ns *ns = req->q->queuedata;
53 u16 status = nvme_req(req)->status;
48 unsigned long flags; 54 unsigned long flags;
49 55
50 spin_lock_irqsave(&ns->head->requeue_lock, flags); 56 spin_lock_irqsave(&ns->head->requeue_lock, flags);
@@ -52,15 +58,35 @@ void nvme_failover_req(struct request *req)
52 spin_unlock_irqrestore(&ns->head->requeue_lock, flags); 58 spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
53 blk_mq_end_request(req, 0); 59 blk_mq_end_request(req, 0);
54 60
55 nvme_reset_ctrl(ns->ctrl); 61 switch (status & 0x7ff) {
56 kblockd_schedule_work(&ns->head->requeue_work); 62 case NVME_SC_ANA_TRANSITION:
57} 63 case NVME_SC_ANA_INACCESSIBLE:
64 case NVME_SC_ANA_PERSISTENT_LOSS:
65 /*
66 * If we got back an ANA error we know the controller is alive,
67 * but not ready to serve this namespaces. The spec suggests
68 * we should update our general state here, but due to the fact
69 * that the admin and I/O queues are not serialized that is
70 * fundamentally racy. So instead just clear the current path,
71 * mark the the path as pending and kick of a re-read of the ANA
72 * log page ASAP.
73 */
74 nvme_mpath_clear_current_path(ns);
75 if (ns->ctrl->ana_log_buf) {
76 set_bit(NVME_NS_ANA_PENDING, &ns->flags);
77 queue_work(nvme_wq, &ns->ctrl->ana_work);
78 }
79 break;
80 default:
81 /*
82 * Reset the controller for any non-ANA error as we don't know
83 * what caused the error.
84 */
85 nvme_reset_ctrl(ns->ctrl);
86 break;
87 }
58 88
59bool nvme_req_needs_failover(struct request *req, blk_status_t error) 89 kblockd_schedule_work(&ns->head->requeue_work);
60{
61 if (!(req->cmd_flags & REQ_NVME_MPATH))
62 return false;
63 return blk_path_error(error);
64} 90}
65 91
66void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) 92void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
@@ -75,25 +101,51 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
75 up_read(&ctrl->namespaces_rwsem); 101 up_read(&ctrl->namespaces_rwsem);
76} 102}
77 103
104static const char *nvme_ana_state_names[] = {
105 [0] = "invalid state",
106 [NVME_ANA_OPTIMIZED] = "optimized",
107 [NVME_ANA_NONOPTIMIZED] = "non-optimized",
108 [NVME_ANA_INACCESSIBLE] = "inaccessible",
109 [NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
110 [NVME_ANA_CHANGE] = "change",
111};
112
78static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) 113static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
79{ 114{
80 struct nvme_ns *ns; 115 struct nvme_ns *ns, *fallback = NULL;
81 116
82 list_for_each_entry_rcu(ns, &head->list, siblings) { 117 list_for_each_entry_rcu(ns, &head->list, siblings) {
83 if (ns->ctrl->state == NVME_CTRL_LIVE) { 118 if (ns->ctrl->state != NVME_CTRL_LIVE ||
119 test_bit(NVME_NS_ANA_PENDING, &ns->flags))
120 continue;
121 switch (ns->ana_state) {
122 case NVME_ANA_OPTIMIZED:
84 rcu_assign_pointer(head->current_path, ns); 123 rcu_assign_pointer(head->current_path, ns);
85 return ns; 124 return ns;
125 case NVME_ANA_NONOPTIMIZED:
126 fallback = ns;
127 break;
128 default:
129 break;
86 } 130 }
87 } 131 }
88 132
89 return NULL; 133 if (fallback)
134 rcu_assign_pointer(head->current_path, fallback);
135 return fallback;
136}
137
138static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
139{
140 return ns->ctrl->state == NVME_CTRL_LIVE &&
141 ns->ana_state == NVME_ANA_OPTIMIZED;
90} 142}
91 143
92inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) 144inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
93{ 145{
94 struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); 146 struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
95 147
96 if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE)) 148 if (unlikely(!ns || !nvme_path_is_optimized(ns)))
97 ns = __nvme_find_path(head); 149 ns = __nvme_find_path(head);
98 return ns; 150 return ns;
99} 151}
@@ -142,7 +194,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
142 194
143 srcu_idx = srcu_read_lock(&head->srcu); 195 srcu_idx = srcu_read_lock(&head->srcu);
144 ns = srcu_dereference(head->current_path, &head->srcu); 196 ns = srcu_dereference(head->current_path, &head->srcu);
145 if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE)) 197 if (likely(ns && nvme_path_is_optimized(ns)))
146 found = ns->queue->poll_fn(q, qc); 198 found = ns->queue->poll_fn(q, qc);
147 srcu_read_unlock(&head->srcu, srcu_idx); 199 srcu_read_unlock(&head->srcu, srcu_idx);
148 return found; 200 return found;
@@ -176,6 +228,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
176 struct request_queue *q; 228 struct request_queue *q;
177 bool vwc = false; 229 bool vwc = false;
178 230
231 mutex_init(&head->lock);
179 bio_list_init(&head->requeue_list); 232 bio_list_init(&head->requeue_list);
180 spin_lock_init(&head->requeue_lock); 233 spin_lock_init(&head->requeue_lock);
181 INIT_WORK(&head->requeue_work, nvme_requeue_work); 234 INIT_WORK(&head->requeue_work, nvme_requeue_work);
@@ -220,29 +273,232 @@ out:
220 return -ENOMEM; 273 return -ENOMEM;
221} 274}
222 275
223void nvme_mpath_add_disk(struct nvme_ns_head *head) 276static void nvme_mpath_set_live(struct nvme_ns *ns)
224{ 277{
278 struct nvme_ns_head *head = ns->head;
279
280 lockdep_assert_held(&ns->head->lock);
281
225 if (!head->disk) 282 if (!head->disk)
226 return; 283 return;
227 284
228 mutex_lock(&head->subsys->lock);
229 if (!(head->disk->flags & GENHD_FL_UP)) { 285 if (!(head->disk->flags & GENHD_FL_UP)) {
230 device_add_disk(&head->subsys->dev, head->disk); 286 device_add_disk(&head->subsys->dev, head->disk);
231 if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, 287 if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
232 &nvme_ns_id_attr_group)) 288 &nvme_ns_id_attr_group))
233 pr_warn("%s: failed to create sysfs group for identification\n", 289 dev_warn(&head->subsys->dev,
234 head->disk->disk_name); 290 "failed to create id group.\n");
291 }
292
293 kblockd_schedule_work(&ns->head->requeue_work);
294}
295
296static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
297 int (*cb)(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *,
298 void *))
299{
300 void *base = ctrl->ana_log_buf;
301 size_t offset = sizeof(struct nvme_ana_rsp_hdr);
302 int error, i;
303
304 lockdep_assert_held(&ctrl->ana_lock);
305
306 for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
307 struct nvme_ana_group_desc *desc = base + offset;
308 u32 nr_nsids = le32_to_cpu(desc->nnsids);
309 size_t nsid_buf_size = nr_nsids * sizeof(__le32);
310
311 if (WARN_ON_ONCE(desc->grpid == 0))
312 return -EINVAL;
313 if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax))
314 return -EINVAL;
315 if (WARN_ON_ONCE(desc->state == 0))
316 return -EINVAL;
317 if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
318 return -EINVAL;
319
320 offset += sizeof(*desc);
321 if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
322 return -EINVAL;
323
324 error = cb(ctrl, desc, data);
325 if (error)
326 return error;
327
328 offset += nsid_buf_size;
329 if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
330 return -EINVAL;
331 }
332
333 return 0;
334}
335
336static inline bool nvme_state_is_live(enum nvme_ana_state state)
337{
338 return state == NVME_ANA_OPTIMIZED || state == NVME_ANA_NONOPTIMIZED;
339}
340
341static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
342 struct nvme_ns *ns)
343{
344 enum nvme_ana_state old;
345
346 mutex_lock(&ns->head->lock);
347 old = ns->ana_state;
348 ns->ana_grpid = le32_to_cpu(desc->grpid);
349 ns->ana_state = desc->state;
350 clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
351
352 if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old))
353 nvme_mpath_set_live(ns);
354 mutex_unlock(&ns->head->lock);
355}
356
357static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
358 struct nvme_ana_group_desc *desc, void *data)
359{
360 u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
361 unsigned *nr_change_groups = data;
362 struct nvme_ns *ns;
363
364 dev_info(ctrl->device, "ANA group %d: %s.\n",
365 le32_to_cpu(desc->grpid),
366 nvme_ana_state_names[desc->state]);
367
368 if (desc->state == NVME_ANA_CHANGE)
369 (*nr_change_groups)++;
370
371 if (!nr_nsids)
372 return 0;
373
374 down_write(&ctrl->namespaces_rwsem);
375 list_for_each_entry(ns, &ctrl->namespaces, list) {
376 if (ns->head->ns_id != le32_to_cpu(desc->nsids[n]))
377 continue;
378 nvme_update_ns_ana_state(desc, ns);
379 if (++n == nr_nsids)
380 break;
381 }
382 up_write(&ctrl->namespaces_rwsem);
383 WARN_ON_ONCE(n < nr_nsids);
384 return 0;
385}
386
387static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
388{
389 u32 nr_change_groups = 0;
390 int error;
391
392 mutex_lock(&ctrl->ana_lock);
393 error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
394 groups_only ? NVME_ANA_LOG_RGO : 0,
395 ctrl->ana_log_buf, ctrl->ana_log_size, 0);
396 if (error) {
397 dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
398 goto out_unlock;
399 }
400
401 error = nvme_parse_ana_log(ctrl, &nr_change_groups,
402 nvme_update_ana_state);
403 if (error)
404 goto out_unlock;
405
406 /*
407 * In theory we should have an ANATT timer per group as they might enter
408 * the change state at different times. But that is a lot of overhead
409 * just to protect against a target that keeps entering new changes
410 * states while never finishing previous ones. But we'll still
411 * eventually time out once all groups are in change state, so this
412 * isn't a big deal.
413 *
414 * We also double the ANATT value to provide some slack for transports
415 * or AEN processing overhead.
416 */
417 if (nr_change_groups)
418 mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies);
419 else
420 del_timer_sync(&ctrl->anatt_timer);
421out_unlock:
422 mutex_unlock(&ctrl->ana_lock);
423 return error;
424}
425
426static void nvme_ana_work(struct work_struct *work)
427{
428 struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
429
430 nvme_read_ana_log(ctrl, false);
431}
432
433static void nvme_anatt_timeout(struct timer_list *t)
434{
435 struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
436
437 dev_info(ctrl->device, "ANATT timeout, resetting controller.\n");
438 nvme_reset_ctrl(ctrl);
439}
440
441void nvme_mpath_stop(struct nvme_ctrl *ctrl)
442{
443 if (!nvme_ctrl_use_ana(ctrl))
444 return;
445 del_timer_sync(&ctrl->anatt_timer);
446 cancel_work_sync(&ctrl->ana_work);
447}
448
449static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
450 char *buf)
451{
452 return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
453}
454DEVICE_ATTR_RO(ana_grpid);
455
456static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
457 char *buf)
458{
459 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
460
461 return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
462}
463DEVICE_ATTR_RO(ana_state);
464
465static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
466 struct nvme_ana_group_desc *desc, void *data)
467{
468 struct nvme_ns *ns = data;
469
470 if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
471 nvme_update_ns_ana_state(desc, ns);
472 return -ENXIO; /* just break out of the loop */
473 }
474
475 return 0;
476}
477
478void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
479{
480 if (nvme_ctrl_use_ana(ns->ctrl)) {
481 mutex_lock(&ns->ctrl->ana_lock);
482 ns->ana_grpid = le32_to_cpu(id->anagrpid);
483 nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
484 mutex_unlock(&ns->ctrl->ana_lock);
485 } else {
486 mutex_lock(&ns->head->lock);
487 ns->ana_state = NVME_ANA_OPTIMIZED;
488 nvme_mpath_set_live(ns);
489 mutex_unlock(&ns->head->lock);
235 } 490 }
236 mutex_unlock(&head->subsys->lock);
237} 491}
238 492
239void nvme_mpath_remove_disk(struct nvme_ns_head *head) 493void nvme_mpath_remove_disk(struct nvme_ns_head *head)
240{ 494{
241 if (!head->disk) 495 if (!head->disk)
242 return; 496 return;
243 sysfs_remove_group(&disk_to_dev(head->disk)->kobj, 497 if (head->disk->flags & GENHD_FL_UP) {
244 &nvme_ns_id_attr_group); 498 sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
245 del_gendisk(head->disk); 499 &nvme_ns_id_attr_group);
500 del_gendisk(head->disk);
501 }
246 blk_set_queue_dying(head->disk->queue); 502 blk_set_queue_dying(head->disk->queue);
247 /* make sure all pending bios are cleaned up */ 503 /* make sure all pending bios are cleaned up */
248 kblockd_schedule_work(&head->requeue_work); 504 kblockd_schedule_work(&head->requeue_work);
@@ -250,3 +506,52 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
250 blk_cleanup_queue(head->disk->queue); 506 blk_cleanup_queue(head->disk->queue);
251 put_disk(head->disk); 507 put_disk(head->disk);
252} 508}
509
510int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
511{
512 int error;
513
514 if (!nvme_ctrl_use_ana(ctrl))
515 return 0;
516
517 ctrl->anacap = id->anacap;
518 ctrl->anatt = id->anatt;
519 ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
520 ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
521
522 mutex_init(&ctrl->ana_lock);
523 timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
524 ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
525 ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
526 if (!(ctrl->anacap & (1 << 6)))
527 ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
528
529 if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
530 dev_err(ctrl->device,
531 "ANA log page size (%zd) larger than MDTS (%d).\n",
532 ctrl->ana_log_size,
533 ctrl->max_hw_sectors << SECTOR_SHIFT);
534 dev_err(ctrl->device, "disabling ANA support.\n");
535 return 0;
536 }
537
538 INIT_WORK(&ctrl->ana_work, nvme_ana_work);
539 ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
540 if (!ctrl->ana_log_buf)
541 goto out;
542
543 error = nvme_read_ana_log(ctrl, true);
544 if (error)
545 goto out_free_ana_log_buf;
546 return 0;
547out_free_ana_log_buf:
548 kfree(ctrl->ana_log_buf);
549out:
550 return -ENOMEM;
551}
552
553void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
554{
555 kfree(ctrl->ana_log_buf);
556}
557
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cf970f9543a6..bb4a2003c097 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -183,6 +183,7 @@ struct nvme_ctrl {
183 u16 oacs; 183 u16 oacs;
184 u16 nssa; 184 u16 nssa;
185 u16 nr_streams; 185 u16 nr_streams;
186 u32 max_namespaces;
186 atomic_t abort_limit; 187 atomic_t abort_limit;
187 u8 vwc; 188 u8 vwc;
188 u32 vs; 189 u32 vs;
@@ -205,6 +206,19 @@ struct nvme_ctrl {
205 struct work_struct fw_act_work; 206 struct work_struct fw_act_work;
206 unsigned long events; 207 unsigned long events;
207 208
209#ifdef CONFIG_NVME_MULTIPATH
210 /* asymmetric namespace access: */
211 u8 anacap;
212 u8 anatt;
213 u32 anagrpmax;
214 u32 nanagrpid;
215 struct mutex ana_lock;
216 struct nvme_ana_rsp_hdr *ana_log_buf;
217 size_t ana_log_size;
218 struct timer_list anatt_timer;
219 struct work_struct ana_work;
220#endif
221
208 /* Power saving configuration */ 222 /* Power saving configuration */
209 u64 ps_max_latency_us; 223 u64 ps_max_latency_us;
210 bool apst_enabled; 224 bool apst_enabled;
@@ -269,6 +283,7 @@ struct nvme_ns_head {
269 struct bio_list requeue_list; 283 struct bio_list requeue_list;
270 spinlock_t requeue_lock; 284 spinlock_t requeue_lock;
271 struct work_struct requeue_work; 285 struct work_struct requeue_work;
286 struct mutex lock;
272#endif 287#endif
273 struct list_head list; 288 struct list_head list;
274 struct srcu_struct srcu; 289 struct srcu_struct srcu;
@@ -295,6 +310,10 @@ struct nvme_ns {
295 struct nvme_ctrl *ctrl; 310 struct nvme_ctrl *ctrl;
296 struct request_queue *queue; 311 struct request_queue *queue;
297 struct gendisk *disk; 312 struct gendisk *disk;
313#ifdef CONFIG_NVME_MULTIPATH
314 enum nvme_ana_state ana_state;
315 u32 ana_grpid;
316#endif
298 struct list_head siblings; 317 struct list_head siblings;
299 struct nvm_dev *ndev; 318 struct nvm_dev *ndev;
300 struct kref kref; 319 struct kref kref;
@@ -307,8 +326,9 @@ struct nvme_ns {
307 bool ext; 326 bool ext;
308 u8 pi_type; 327 u8 pi_type;
309 unsigned long flags; 328 unsigned long flags;
310#define NVME_NS_REMOVING 0 329#define NVME_NS_REMOVING 0
311#define NVME_NS_DEAD 1 330#define NVME_NS_DEAD 1
331#define NVME_NS_ANA_PENDING 2
312 u16 noiob; 332 u16 noiob;
313 333
314#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 334#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
@@ -436,21 +456,24 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
436int nvme_delete_ctrl(struct nvme_ctrl *ctrl); 456int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
437int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); 457int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
438 458
439int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 459int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
440 u8 log_page, void *log, size_t size, u64 offset); 460 void *log, size_t size, u64 offset);
441 461
442extern const struct attribute_group nvme_ns_id_attr_group; 462extern const struct attribute_group nvme_ns_id_attr_group;
443extern const struct block_device_operations nvme_ns_head_ops; 463extern const struct block_device_operations nvme_ns_head_ops;
444 464
445#ifdef CONFIG_NVME_MULTIPATH 465#ifdef CONFIG_NVME_MULTIPATH
466bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
446void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, 467void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
447 struct nvme_ctrl *ctrl, int *flags); 468 struct nvme_ctrl *ctrl, int *flags);
448void nvme_failover_req(struct request *req); 469void nvme_failover_req(struct request *req);
449bool nvme_req_needs_failover(struct request *req, blk_status_t error);
450void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); 470void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
451int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); 471int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
452void nvme_mpath_add_disk(struct nvme_ns_head *head); 472void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
453void nvme_mpath_remove_disk(struct nvme_ns_head *head); 473void nvme_mpath_remove_disk(struct nvme_ns_head *head);
474int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
475void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
476void nvme_mpath_stop(struct nvme_ctrl *ctrl);
454 477
455static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) 478static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
456{ 479{
@@ -469,7 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
469 kblockd_schedule_work(&head->requeue_work); 492 kblockd_schedule_work(&head->requeue_work);
470} 493}
471 494
495extern struct device_attribute dev_attr_ana_grpid;
496extern struct device_attribute dev_attr_ana_state;
497
472#else 498#else
499static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
500{
501 return false;
502}
473/* 503/*
474 * Without the multipath code enabled, multiple controller per subsystems are 504 * Without the multipath code enabled, multiple controller per subsystems are
475 * visible as devices and thus we cannot use the subsystem instance. 505 * visible as devices and thus we cannot use the subsystem instance.
@@ -483,11 +513,6 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
483static inline void nvme_failover_req(struct request *req) 513static inline void nvme_failover_req(struct request *req)
484{ 514{
485} 515}
486static inline bool nvme_req_needs_failover(struct request *req,
487 blk_status_t error)
488{
489 return false;
490}
491static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) 516static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
492{ 517{
493} 518}
@@ -496,7 +521,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
496{ 521{
497 return 0; 522 return 0;
498} 523}
499static inline void nvme_mpath_add_disk(struct nvme_ns_head *head) 524static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
525 struct nvme_id_ns *id)
500{ 526{
501} 527}
502static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) 528static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -508,6 +534,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
508static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) 534static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
509{ 535{
510} 536}
537static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
538 struct nvme_id_ctrl *id)
539{
540 return 0;
541}
542static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
543{
544}
545static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
546{
547}
511#endif /* CONFIG_NVME_MULTIPATH */ 548#endif /* CONFIG_NVME_MULTIPATH */
512 549
513#ifdef CONFIG_NVM 550#ifdef CONFIG_NVM
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 16a9b24270f9..f517bc562d26 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -19,6 +19,19 @@
19#include <asm/unaligned.h> 19#include <asm/unaligned.h>
20#include "nvmet.h" 20#include "nvmet.h"
21 21
22/*
23 * This helper allows us to clear the AEN based on the RAE bit,
24 * Please use this helper when processing the log pages which are
25 * associated with the AEN.
26 */
27static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit)
28{
29 int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15;
30
31 if (!rae)
32 clear_bit(aen_bit, &req->sq->ctrl->aen_masked);
33}
34
22u32 nvmet_get_log_page_len(struct nvme_command *cmd) 35u32 nvmet_get_log_page_len(struct nvme_command *cmd)
23{ 36{
24 u32 len = le16_to_cpu(cmd->get_log_page.numdu); 37 u32 len = le16_to_cpu(cmd->get_log_page.numdu);
@@ -176,12 +189,76 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
176 if (!status) 189 if (!status)
177 status = nvmet_zero_sgl(req, len, req->data_len - len); 190 status = nvmet_zero_sgl(req, len, req->data_len - len);
178 ctrl->nr_changed_ns = 0; 191 ctrl->nr_changed_ns = 0;
179 clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked); 192 nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR);
180 mutex_unlock(&ctrl->lock); 193 mutex_unlock(&ctrl->lock);
181out: 194out:
182 nvmet_req_complete(req, status); 195 nvmet_req_complete(req, status);
183} 196}
184 197
198static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
199 struct nvme_ana_group_desc *desc)
200{
201 struct nvmet_ctrl *ctrl = req->sq->ctrl;
202 struct nvmet_ns *ns;
203 u32 count = 0;
204
205 if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
206 rcu_read_lock();
207 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
208 if (ns->anagrpid == grpid)
209 desc->nsids[count++] = cpu_to_le32(ns->nsid);
210 rcu_read_unlock();
211 }
212
213 desc->grpid = cpu_to_le32(grpid);
214 desc->nnsids = cpu_to_le32(count);
215 desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
216 desc->state = req->port->ana_state[grpid];
217 memset(desc->rsvd17, 0, sizeof(desc->rsvd17));
218 return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32);
219}
220
221static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
222{
223 struct nvme_ana_rsp_hdr hdr = { 0, };
224 struct nvme_ana_group_desc *desc;
225 size_t offset = sizeof(struct nvme_ana_rsp_hdr); /* start beyond hdr */
226 size_t len;
227 u32 grpid;
228 u16 ngrps = 0;
229 u16 status;
230
231 status = NVME_SC_INTERNAL;
232 desc = kmalloc(sizeof(struct nvme_ana_group_desc) +
233 NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL);
234 if (!desc)
235 goto out;
236
237 down_read(&nvmet_ana_sem);
238 for (grpid = 1; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
239 if (!nvmet_ana_group_enabled[grpid])
240 continue;
241 len = nvmet_format_ana_group(req, grpid, desc);
242 status = nvmet_copy_to_sgl(req, offset, desc, len);
243 if (status)
244 break;
245 offset += len;
246 ngrps++;
247 }
248
249 hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
250 hdr.ngrps = cpu_to_le16(ngrps);
251 nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE);
252 up_read(&nvmet_ana_sem);
253
254 kfree(desc);
255
256 /* copy the header last once we know the number of groups */
257 status = nvmet_copy_to_sgl(req, 0, &hdr, sizeof(hdr));
258out:
259 nvmet_req_complete(req, status);
260}
261
185static void nvmet_execute_identify_ctrl(struct nvmet_req *req) 262static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
186{ 263{
187 struct nvmet_ctrl *ctrl = req->sq->ctrl; 264 struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -213,8 +290,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
213 * the safest is to leave it as zeroes. 290 * the safest is to leave it as zeroes.
214 */ 291 */
215 292
216 /* we support multiple ports and multiples hosts: */ 293 /* we support multiple ports, multiples hosts and ANA: */
217 id->cmic = (1 << 0) | (1 << 1); 294 id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
218 295
219 /* no limit on data transfer sizes for now */ 296 /* no limit on data transfer sizes for now */
220 id->mdts = 0; 297 id->mdts = 0;
@@ -252,6 +329,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
252 id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); 329 id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
253 330
254 id->nn = cpu_to_le32(ctrl->subsys->max_nsid); 331 id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
332 id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
255 id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | 333 id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
256 NVME_CTRL_ONCS_WRITE_ZEROES); 334 NVME_CTRL_ONCS_WRITE_ZEROES);
257 335
@@ -281,6 +359,11 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
281 359
282 id->msdbd = ctrl->ops->msdbd; 360 id->msdbd = ctrl->ops->msdbd;
283 361
362 id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
363 id->anatt = 10; /* random value */
364 id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
365 id->nanagrpid = cpu_to_le32(NVMET_MAX_ANAGRPS);
366
284 /* 367 /*
285 * Meh, we don't really support any power state. Fake up the same 368 * Meh, we don't really support any power state. Fake up the same
286 * values that qemu does. 369 * values that qemu does.
@@ -322,8 +405,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
322 * nuse = ncap = nsze isn't always true, but we have no way to find 405 * nuse = ncap = nsze isn't always true, but we have no way to find
323 * that out from the underlying device. 406 * that out from the underlying device.
324 */ 407 */
325 id->ncap = id->nuse = id->nsze = 408 id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift);
326 cpu_to_le64(ns->size >> ns->blksize_shift); 409 switch (req->port->ana_state[ns->anagrpid]) {
410 case NVME_ANA_INACCESSIBLE:
411 case NVME_ANA_PERSISTENT_LOSS:
412 break;
413 default:
414 id->nuse = id->nsze;
415 break;
416 }
327 417
328 /* 418 /*
329 * We just provide a single LBA format that matches what the 419 * We just provide a single LBA format that matches what the
@@ -337,6 +427,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
337 * controllers, but also with any other user of the block device. 427 * controllers, but also with any other user of the block device.
338 */ 428 */
339 id->nmic = (1 << 0); 429 id->nmic = (1 << 0);
430 id->anagrpid = cpu_to_le32(ns->anagrpid);
340 431
341 memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid)); 432 memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid));
342 433
@@ -619,6 +710,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
619 case NVME_LOG_CMD_EFFECTS: 710 case NVME_LOG_CMD_EFFECTS:
620 req->execute = nvmet_execute_get_log_cmd_effects_ns; 711 req->execute = nvmet_execute_get_log_cmd_effects_ns;
621 return 0; 712 return 0;
713 case NVME_LOG_ANA:
714 req->execute = nvmet_execute_get_log_page_ana;
715 return 0;
622 } 716 }
623 break; 717 break;
624 case nvme_admin_identify: 718 case nvme_admin_identify:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 3ba5ea5c4376..51f5a8c092b4 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -411,6 +411,39 @@ out_unlock:
411 411
412CONFIGFS_ATTR(nvmet_ns_, device_nguid); 412CONFIGFS_ATTR(nvmet_ns_, device_nguid);
413 413
414static ssize_t nvmet_ns_ana_grpid_show(struct config_item *item, char *page)
415{
416 return sprintf(page, "%u\n", to_nvmet_ns(item)->anagrpid);
417}
418
419static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item,
420 const char *page, size_t count)
421{
422 struct nvmet_ns *ns = to_nvmet_ns(item);
423 u32 oldgrpid, newgrpid;
424 int ret;
425
426 ret = kstrtou32(page, 0, &newgrpid);
427 if (ret)
428 return ret;
429
430 if (newgrpid < 1 || newgrpid > NVMET_MAX_ANAGRPS)
431 return -EINVAL;
432
433 down_write(&nvmet_ana_sem);
434 oldgrpid = ns->anagrpid;
435 nvmet_ana_group_enabled[newgrpid]++;
436 ns->anagrpid = newgrpid;
437 nvmet_ana_group_enabled[oldgrpid]--;
438 nvmet_ana_chgcnt++;
439 up_write(&nvmet_ana_sem);
440
441 nvmet_send_ana_event(ns->subsys, NULL);
442 return count;
443}
444
445CONFIGFS_ATTR(nvmet_ns_, ana_grpid);
446
414static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page) 447static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
415{ 448{
416 return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled); 449 return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled);
@@ -468,6 +501,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
468 &nvmet_ns_attr_device_path, 501 &nvmet_ns_attr_device_path,
469 &nvmet_ns_attr_device_nguid, 502 &nvmet_ns_attr_device_nguid,
470 &nvmet_ns_attr_device_uuid, 503 &nvmet_ns_attr_device_uuid,
504 &nvmet_ns_attr_ana_grpid,
471 &nvmet_ns_attr_enable, 505 &nvmet_ns_attr_enable,
472 &nvmet_ns_attr_buffered_io, 506 &nvmet_ns_attr_buffered_io,
473 NULL, 507 NULL,
@@ -916,6 +950,134 @@ static const struct config_item_type nvmet_referrals_type = {
916 .ct_group_ops = &nvmet_referral_group_ops, 950 .ct_group_ops = &nvmet_referral_group_ops,
917}; 951};
918 952
953static struct {
954 enum nvme_ana_state state;
955 const char *name;
956} nvmet_ana_state_names[] = {
957 { NVME_ANA_OPTIMIZED, "optimized" },
958 { NVME_ANA_NONOPTIMIZED, "non-optimized" },
959 { NVME_ANA_INACCESSIBLE, "inaccessible" },
960 { NVME_ANA_PERSISTENT_LOSS, "persistent-loss" },
961 { NVME_ANA_CHANGE, "change" },
962};
963
964static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
965 char *page)
966{
967 struct nvmet_ana_group *grp = to_ana_group(item);
968 enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
969 int i;
970
971 for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
972 if (state != nvmet_ana_state_names[i].state)
973 continue;
974 return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
975 }
976
977 return sprintf(page, "\n");
978}
979
980static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
981 const char *page, size_t count)
982{
983 struct nvmet_ana_group *grp = to_ana_group(item);
984 int i;
985
986 for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
987 if (sysfs_streq(page, nvmet_ana_state_names[i].name))
988 goto found;
989 }
990
991 pr_err("Invalid value '%s' for ana_state\n", page);
992 return -EINVAL;
993
994found:
995 down_write(&nvmet_ana_sem);
996 grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
997 nvmet_ana_chgcnt++;
998 up_write(&nvmet_ana_sem);
999
1000 nvmet_port_send_ana_event(grp->port);
1001 return count;
1002}
1003
1004CONFIGFS_ATTR(nvmet_ana_group_, ana_state);
1005
1006static struct configfs_attribute *nvmet_ana_group_attrs[] = {
1007 &nvmet_ana_group_attr_ana_state,
1008 NULL,
1009};
1010
1011static void nvmet_ana_group_release(struct config_item *item)
1012{
1013 struct nvmet_ana_group *grp = to_ana_group(item);
1014
1015 if (grp == &grp->port->ana_default_group)
1016 return;
1017
1018 down_write(&nvmet_ana_sem);
1019 grp->port->ana_state[grp->grpid] = NVME_ANA_INACCESSIBLE;
1020 nvmet_ana_group_enabled[grp->grpid]--;
1021 up_write(&nvmet_ana_sem);
1022
1023 nvmet_port_send_ana_event(grp->port);
1024 kfree(grp);
1025}
1026
1027static struct configfs_item_operations nvmet_ana_group_item_ops = {
1028 .release = nvmet_ana_group_release,
1029};
1030
1031static const struct config_item_type nvmet_ana_group_type = {
1032 .ct_item_ops = &nvmet_ana_group_item_ops,
1033 .ct_attrs = nvmet_ana_group_attrs,
1034 .ct_owner = THIS_MODULE,
1035};
1036
1037static struct config_group *nvmet_ana_groups_make_group(
1038 struct config_group *group, const char *name)
1039{
1040 struct nvmet_port *port = ana_groups_to_port(&group->cg_item);
1041 struct nvmet_ana_group *grp;
1042 u32 grpid;
1043 int ret;
1044
1045 ret = kstrtou32(name, 0, &grpid);
1046 if (ret)
1047 goto out;
1048
1049 ret = -EINVAL;
1050 if (grpid <= 1 || grpid > NVMET_MAX_ANAGRPS)
1051 goto out;
1052
1053 ret = -ENOMEM;
1054 grp = kzalloc(sizeof(*grp), GFP_KERNEL);
1055 if (!grp)
1056 goto out;
1057 grp->port = port;
1058 grp->grpid = grpid;
1059
1060 down_write(&nvmet_ana_sem);
1061 nvmet_ana_group_enabled[grpid]++;
1062 up_write(&nvmet_ana_sem);
1063
1064 nvmet_port_send_ana_event(grp->port);
1065
1066 config_group_init_type_name(&grp->group, name, &nvmet_ana_group_type);
1067 return &grp->group;
1068out:
1069 return ERR_PTR(ret);
1070}
1071
1072static struct configfs_group_operations nvmet_ana_groups_group_ops = {
1073 .make_group = nvmet_ana_groups_make_group,
1074};
1075
1076static const struct config_item_type nvmet_ana_groups_type = {
1077 .ct_group_ops = &nvmet_ana_groups_group_ops,
1078 .ct_owner = THIS_MODULE,
1079};
1080
919/* 1081/*
920 * Ports definitions. 1082 * Ports definitions.
921 */ 1083 */
@@ -923,6 +1085,7 @@ static void nvmet_port_release(struct config_item *item)
923{ 1085{
924 struct nvmet_port *port = to_nvmet_port(item); 1086 struct nvmet_port *port = to_nvmet_port(item);
925 1087
1088 kfree(port->ana_state);
926 kfree(port); 1089 kfree(port);
927} 1090}
928 1091
@@ -951,6 +1114,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
951{ 1114{
952 struct nvmet_port *port; 1115 struct nvmet_port *port;
953 u16 portid; 1116 u16 portid;
1117 u32 i;
954 1118
955 if (kstrtou16(name, 0, &portid)) 1119 if (kstrtou16(name, 0, &portid))
956 return ERR_PTR(-EINVAL); 1120 return ERR_PTR(-EINVAL);
@@ -959,6 +1123,20 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
959 if (!port) 1123 if (!port)
960 return ERR_PTR(-ENOMEM); 1124 return ERR_PTR(-ENOMEM);
961 1125
1126 port->ana_state = kcalloc(NVMET_MAX_ANAGRPS + 1,
1127 sizeof(*port->ana_state), GFP_KERNEL);
1128 if (!port->ana_state) {
1129 kfree(port);
1130 return ERR_PTR(-ENOMEM);
1131 }
1132
1133 for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
1134 if (i == NVMET_DEFAULT_ANA_GRPID)
1135 port->ana_state[1] = NVME_ANA_OPTIMIZED;
1136 else
1137 port->ana_state[i] = NVME_ANA_INACCESSIBLE;
1138 }
1139
962 INIT_LIST_HEAD(&port->entry); 1140 INIT_LIST_HEAD(&port->entry);
963 INIT_LIST_HEAD(&port->subsystems); 1141 INIT_LIST_HEAD(&port->subsystems);
964 INIT_LIST_HEAD(&port->referrals); 1142 INIT_LIST_HEAD(&port->referrals);
@@ -975,6 +1153,18 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
975 "referrals", &nvmet_referrals_type); 1153 "referrals", &nvmet_referrals_type);
976 configfs_add_default_group(&port->referrals_group, &port->group); 1154 configfs_add_default_group(&port->referrals_group, &port->group);
977 1155
1156 config_group_init_type_name(&port->ana_groups_group,
1157 "ana_groups", &nvmet_ana_groups_type);
1158 configfs_add_default_group(&port->ana_groups_group, &port->group);
1159
1160 port->ana_default_group.port = port;
1161 port->ana_default_group.grpid = NVMET_DEFAULT_ANA_GRPID;
1162 config_group_init_type_name(&port->ana_default_group.group,
1163 __stringify(NVMET_DEFAULT_ANA_GRPID),
1164 &nvmet_ana_group_type);
1165 configfs_add_default_group(&port->ana_default_group.group,
1166 &port->ana_groups_group);
1167
978 return &port->group; 1168 return &port->group;
979} 1169}
980 1170
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index ddd85715a00a..3ceb7a03bb2a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -40,6 +40,10 @@ static DEFINE_IDA(cntlid_ida);
40 */ 40 */
41DECLARE_RWSEM(nvmet_config_sem); 41DECLARE_RWSEM(nvmet_config_sem);
42 42
43u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
44u64 nvmet_ana_chgcnt;
45DECLARE_RWSEM(nvmet_ana_sem);
46
43static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, 47static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
44 const char *subsysnqn); 48 const char *subsysnqn);
45 49
@@ -190,6 +194,33 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
190 } 194 }
191} 195}
192 196
197void nvmet_send_ana_event(struct nvmet_subsys *subsys,
198 struct nvmet_port *port)
199{
200 struct nvmet_ctrl *ctrl;
201
202 mutex_lock(&subsys->lock);
203 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
204 if (port && ctrl->port != port)
205 continue;
206 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
207 continue;
208 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
209 NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
210 }
211 mutex_unlock(&subsys->lock);
212}
213
214void nvmet_port_send_ana_event(struct nvmet_port *port)
215{
216 struct nvmet_subsys_link *p;
217
218 down_read(&nvmet_config_sem);
219 list_for_each_entry(p, &port->subsystems, entry)
220 nvmet_send_ana_event(p->subsys, port);
221 up_read(&nvmet_config_sem);
222}
223
193int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) 224int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
194{ 225{
195 int ret = 0; 226 int ret = 0;
@@ -337,9 +368,13 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
337int nvmet_ns_enable(struct nvmet_ns *ns) 368int nvmet_ns_enable(struct nvmet_ns *ns)
338{ 369{
339 struct nvmet_subsys *subsys = ns->subsys; 370 struct nvmet_subsys *subsys = ns->subsys;
340 int ret = 0; 371 int ret;
341 372
342 mutex_lock(&subsys->lock); 373 mutex_lock(&subsys->lock);
374 ret = -EMFILE;
375 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
376 goto out_unlock;
377 ret = 0;
343 if (ns->enabled) 378 if (ns->enabled)
344 goto out_unlock; 379 goto out_unlock;
345 380
@@ -374,6 +409,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
374 409
375 list_add_tail_rcu(&ns->dev_link, &old->dev_link); 410 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
376 } 411 }
412 subsys->nr_namespaces++;
377 413
378 nvmet_ns_changed(subsys, ns->nsid); 414 nvmet_ns_changed(subsys, ns->nsid);
379 ns->enabled = true; 415 ns->enabled = true;
@@ -414,6 +450,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
414 percpu_ref_exit(&ns->ref); 450 percpu_ref_exit(&ns->ref);
415 451
416 mutex_lock(&subsys->lock); 452 mutex_lock(&subsys->lock);
453 subsys->nr_namespaces--;
417 nvmet_ns_changed(subsys, ns->nsid); 454 nvmet_ns_changed(subsys, ns->nsid);
418 nvmet_ns_dev_disable(ns); 455 nvmet_ns_dev_disable(ns);
419out_unlock: 456out_unlock:
@@ -424,6 +461,10 @@ void nvmet_ns_free(struct nvmet_ns *ns)
424{ 461{
425 nvmet_ns_disable(ns); 462 nvmet_ns_disable(ns);
426 463
464 down_write(&nvmet_ana_sem);
465 nvmet_ana_group_enabled[ns->anagrpid]--;
466 up_write(&nvmet_ana_sem);
467
427 kfree(ns->device_path); 468 kfree(ns->device_path);
428 kfree(ns); 469 kfree(ns);
429} 470}
@@ -441,6 +482,12 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
441 482
442 ns->nsid = nsid; 483 ns->nsid = nsid;
443 ns->subsys = subsys; 484 ns->subsys = subsys;
485
486 down_write(&nvmet_ana_sem);
487 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
488 nvmet_ana_group_enabled[ns->anagrpid]++;
489 up_write(&nvmet_ana_sem);
490
444 uuid_gen(&ns->uuid); 491 uuid_gen(&ns->uuid);
445 ns->buffered_io = false; 492 ns->buffered_io = false;
446 493
@@ -548,6 +595,20 @@ int nvmet_sq_init(struct nvmet_sq *sq)
548} 595}
549EXPORT_SYMBOL_GPL(nvmet_sq_init); 596EXPORT_SYMBOL_GPL(nvmet_sq_init);
550 597
598static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
599 struct nvmet_ns *ns)
600{
601 enum nvme_ana_state state = port->ana_state[ns->anagrpid];
602
603 if (unlikely(state == NVME_ANA_INACCESSIBLE))
604 return NVME_SC_ANA_INACCESSIBLE;
605 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
606 return NVME_SC_ANA_PERSISTENT_LOSS;
607 if (unlikely(state == NVME_ANA_CHANGE))
608 return NVME_SC_ANA_TRANSITION;
609 return 0;
610}
611
551static u16 nvmet_parse_io_cmd(struct nvmet_req *req) 612static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
552{ 613{
553 struct nvme_command *cmd = req->cmd; 614 struct nvme_command *cmd = req->cmd;
@@ -560,6 +621,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
560 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); 621 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
561 if (unlikely(!req->ns)) 622 if (unlikely(!req->ns))
562 return NVME_SC_INVALID_NS | NVME_SC_DNR; 623 return NVME_SC_INVALID_NS | NVME_SC_DNR;
624 ret = nvmet_check_ana_state(req->port, req->ns);
625 if (unlikely(ret))
626 return ret;
563 627
564 if (req->ns->file) 628 if (req->ns->file)
565 return nvmet_file_parse_io_cmd(req); 629 return nvmet_file_parse_io_cmd(req);
@@ -876,6 +940,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
876 940
877 nvmet_init_cap(ctrl); 941 nvmet_init_cap(ctrl);
878 942
943 ctrl->port = req->port;
944
879 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); 945 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
880 INIT_LIST_HEAD(&ctrl->async_events); 946 INIT_LIST_HEAD(&ctrl->async_events);
881 947
@@ -1115,12 +1181,15 @@ static int __init nvmet_init(void)
1115{ 1181{
1116 int error; 1182 int error;
1117 1183
1184 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1185
1118 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", 1186 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1119 WQ_MEM_RECLAIM, 0); 1187 WQ_MEM_RECLAIM, 0);
1120 if (!buffered_io_wq) { 1188 if (!buffered_io_wq) {
1121 error = -ENOMEM; 1189 error = -ENOMEM;
1122 goto out; 1190 goto out;
1123 } 1191 }
1192
1124 error = nvmet_init_discovery(); 1193 error = nvmet_init_discovery();
1125 if (error) 1194 if (error)
1126 goto out; 1195 goto out;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 688993855402..22941045f46e 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -30,12 +30,11 @@
30#define NVMET_ASYNC_EVENTS 4 30#define NVMET_ASYNC_EVENTS 4
31#define NVMET_ERROR_LOG_SLOTS 128 31#define NVMET_ERROR_LOG_SLOTS 128
32 32
33
34/* 33/*
35 * Supported optional AENs: 34 * Supported optional AENs:
36 */ 35 */
37#define NVMET_AEN_CFG_OPTIONAL \ 36#define NVMET_AEN_CFG_OPTIONAL \
38 NVME_AEN_CFG_NS_ATTR 37 (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
39 38
40/* 39/*
41 * Plus mandatory SMART AENs (we'll never send them, but allow enabling them): 40 * Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
@@ -64,6 +63,7 @@ struct nvmet_ns {
64 loff_t size; 63 loff_t size;
65 u8 nguid[16]; 64 u8 nguid[16];
66 uuid_t uuid; 65 uuid_t uuid;
66 u32 anagrpid;
67 67
68 bool buffered_io; 68 bool buffered_io;
69 bool enabled; 69 bool enabled;
@@ -98,6 +98,18 @@ struct nvmet_sq {
98 struct completion confirm_done; 98 struct completion confirm_done;
99}; 99};
100 100
101struct nvmet_ana_group {
102 struct config_group group;
103 struct nvmet_port *port;
104 u32 grpid;
105};
106
107static inline struct nvmet_ana_group *to_ana_group(struct config_item *item)
108{
109 return container_of(to_config_group(item), struct nvmet_ana_group,
110 group);
111}
112
101/** 113/**
102 * struct nvmet_port - Common structure to keep port 114 * struct nvmet_port - Common structure to keep port
103 * information for the target. 115 * information for the target.
@@ -115,6 +127,9 @@ struct nvmet_port {
115 struct list_head subsystems; 127 struct list_head subsystems;
116 struct config_group referrals_group; 128 struct config_group referrals_group;
117 struct list_head referrals; 129 struct list_head referrals;
130 struct config_group ana_groups_group;
131 struct nvmet_ana_group ana_default_group;
132 enum nvme_ana_state *ana_state;
118 void *priv; 133 void *priv;
119 bool enabled; 134 bool enabled;
120 int inline_data_size; 135 int inline_data_size;
@@ -126,6 +141,13 @@ static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
126 group); 141 group);
127} 142}
128 143
144static inline struct nvmet_port *ana_groups_to_port(
145 struct config_item *item)
146{
147 return container_of(to_config_group(item), struct nvmet_port,
148 ana_groups_group);
149}
150
129struct nvmet_ctrl { 151struct nvmet_ctrl {
130 struct nvmet_subsys *subsys; 152 struct nvmet_subsys *subsys;
131 struct nvmet_cq **cqs; 153 struct nvmet_cq **cqs;
@@ -140,6 +162,8 @@ struct nvmet_ctrl {
140 u16 cntlid; 162 u16 cntlid;
141 u32 kato; 163 u32 kato;
142 164
165 struct nvmet_port *port;
166
143 u32 aen_enabled; 167 u32 aen_enabled;
144 unsigned long aen_masked; 168 unsigned long aen_masked;
145 struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS]; 169 struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
@@ -168,6 +192,7 @@ struct nvmet_subsys {
168 struct kref ref; 192 struct kref ref;
169 193
170 struct list_head namespaces; 194 struct list_head namespaces;
195 unsigned int nr_namespaces;
171 unsigned int max_nsid; 196 unsigned int max_nsid;
172 197
173 struct list_head ctrls; 198 struct list_head ctrls;
@@ -340,6 +365,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns);
340struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid); 365struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
341void nvmet_ns_free(struct nvmet_ns *ns); 366void nvmet_ns_free(struct nvmet_ns *ns);
342 367
368void nvmet_send_ana_event(struct nvmet_subsys *subsys,
369 struct nvmet_port *port);
370void nvmet_port_send_ana_event(struct nvmet_port *port);
371
343int nvmet_register_transport(const struct nvmet_fabrics_ops *ops); 372int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
344void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops); 373void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
345 374
@@ -360,6 +389,22 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd);
360#define NVMET_QUEUE_SIZE 1024 389#define NVMET_QUEUE_SIZE 1024
361#define NVMET_NR_QUEUES 128 390#define NVMET_NR_QUEUES 128
362#define NVMET_MAX_CMD NVMET_QUEUE_SIZE 391#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
392
393/*
394 * Nice round number that makes a list of nsids fit into a page.
395 * Should become tunable at some point in the future.
396 */
397#define NVMET_MAX_NAMESPACES 1024
398
399/*
400 * 0 is not a valid ANA group ID, so we start numbering at 1.
401 *
402 * ANA Group 1 exists without manual intervention, has namespaces assigned to it
403 * by default, and is available in an optimized state through all ports.
404 */
405#define NVMET_MAX_ANAGRPS 128
406#define NVMET_DEFAULT_ANA_GRPID 1
407
363#define NVMET_KAS 10 408#define NVMET_KAS 10
364#define NVMET_DISC_KATO 120 409#define NVMET_DISC_KATO 120
365 410
@@ -373,6 +418,10 @@ extern struct nvmet_subsys *nvmet_disc_subsys;
373extern u64 nvmet_genctr; 418extern u64 nvmet_genctr;
374extern struct rw_semaphore nvmet_config_sem; 419extern struct rw_semaphore nvmet_config_sem;
375 420
421extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
422extern u64 nvmet_ana_chgcnt;
423extern struct rw_semaphore nvmet_ana_sem;
424
376bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, 425bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
377 const char *hostnqn); 426 const char *hostnqn);
378 427
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 80dfedcf0bf7..64c9175723de 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -242,7 +242,12 @@ struct nvme_id_ctrl {
242 __le32 sanicap; 242 __le32 sanicap;
243 __le32 hmminds; 243 __le32 hmminds;
244 __le16 hmmaxd; 244 __le16 hmmaxd;
245 __u8 rsvd338[174]; 245 __u8 rsvd338[4];
246 __u8 anatt;
247 __u8 anacap;
248 __le32 anagrpmax;
249 __le32 nanagrpid;
250 __u8 rsvd352[160];
246 __u8 sqes; 251 __u8 sqes;
247 __u8 cqes; 252 __u8 cqes;
248 __le16 maxcmd; 253 __le16 maxcmd;
@@ -258,7 +263,8 @@ struct nvme_id_ctrl {
258 __le16 acwu; 263 __le16 acwu;
259 __u8 rsvd534[2]; 264 __u8 rsvd534[2];
260 __le32 sgls; 265 __le32 sgls;
261 __u8 rsvd540[228]; 266 __le32 mnan;
267 __u8 rsvd544[224];
262 char subnqn[256]; 268 char subnqn[256];
263 __u8 rsvd1024[768]; 269 __u8 rsvd1024[768];
264 __le32 ioccsz; 270 __le32 ioccsz;
@@ -312,7 +318,9 @@ struct nvme_id_ns {
312 __le16 nabspf; 318 __le16 nabspf;
313 __le16 noiob; 319 __le16 noiob;
314 __u8 nvmcap[16]; 320 __u8 nvmcap[16];
315 __u8 rsvd64[40]; 321 __u8 rsvd64[28];
322 __le32 anagrpid;
323 __u8 rsvd96[8];
316 __u8 nguid[16]; 324 __u8 nguid[16];
317 __u8 eui64[8]; 325 __u8 eui64[8];
318 struct nvme_lbaf lbaf[16]; 326 struct nvme_lbaf lbaf[16];
@@ -425,6 +433,32 @@ struct nvme_effects_log {
425 __u8 resv[2048]; 433 __u8 resv[2048];
426}; 434};
427 435
436enum nvme_ana_state {
437 NVME_ANA_OPTIMIZED = 0x01,
438 NVME_ANA_NONOPTIMIZED = 0x02,
439 NVME_ANA_INACCESSIBLE = 0x03,
440 NVME_ANA_PERSISTENT_LOSS = 0x04,
441 NVME_ANA_CHANGE = 0x0f,
442};
443
444struct nvme_ana_group_desc {
445 __le32 grpid;
446 __le32 nnsids;
447 __le64 chgcnt;
448 __u8 state;
449 __u8 rsvd17[7];
450 __le32 nsids[];
451};
452
453/* flag for the log specific field of the ANA log */
454#define NVME_ANA_LOG_RGO (1 << 0)
455
456struct nvme_ana_rsp_hdr {
457 __le64 chgcnt;
458 __le16 ngrps;
459 __le16 rsvd10[3];
460};
461
428enum { 462enum {
429 NVME_SMART_CRIT_SPARE = 1 << 0, 463 NVME_SMART_CRIT_SPARE = 1 << 0,
430 NVME_SMART_CRIT_TEMPERATURE = 1 << 1, 464 NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@@ -444,11 +478,13 @@ enum {
444enum { 478enum {
445 NVME_AER_NOTICE_NS_CHANGED = 0x00, 479 NVME_AER_NOTICE_NS_CHANGED = 0x00,
446 NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, 480 NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
481 NVME_AER_NOTICE_ANA = 0x03,
447}; 482};
448 483
449enum { 484enum {
450 NVME_AEN_CFG_NS_ATTR = 1 << 8, 485 NVME_AEN_CFG_NS_ATTR = 1 << 8,
451 NVME_AEN_CFG_FW_ACT = 1 << 9, 486 NVME_AEN_CFG_FW_ACT = 1 << 9,
487 NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
452}; 488};
453 489
454struct nvme_lba_range_type { 490struct nvme_lba_range_type {
@@ -763,6 +799,7 @@ enum {
763 NVME_LOG_FW_SLOT = 0x03, 799 NVME_LOG_FW_SLOT = 0x03,
764 NVME_LOG_CHANGED_NS = 0x04, 800 NVME_LOG_CHANGED_NS = 0x04,
765 NVME_LOG_CMD_EFFECTS = 0x05, 801 NVME_LOG_CMD_EFFECTS = 0x05,
802 NVME_LOG_ANA = 0x0c,
766 NVME_LOG_DISC = 0x70, 803 NVME_LOG_DISC = 0x70,
767 NVME_LOG_RESERVATION = 0x80, 804 NVME_LOG_RESERVATION = 0x80,
768 NVME_FWACT_REPL = (0 << 3), 805 NVME_FWACT_REPL = (0 << 3),
@@ -885,7 +922,7 @@ struct nvme_get_log_page_command {
885 __u64 rsvd2[2]; 922 __u64 rsvd2[2];
886 union nvme_data_ptr dptr; 923 union nvme_data_ptr dptr;
887 __u8 lid; 924 __u8 lid;
888 __u8 rsvd10; 925 __u8 lsp; /* upper 4 bits reserved */
889 __le16 numdl; 926 __le16 numdl;
890 __le16 numdu; 927 __le16 numdu;
891 __u16 rsvd11; 928 __u16 rsvd11;
@@ -1185,6 +1222,13 @@ enum {
1185 NVME_SC_ACCESS_DENIED = 0x286, 1222 NVME_SC_ACCESS_DENIED = 0x286,
1186 NVME_SC_UNWRITTEN_BLOCK = 0x287, 1223 NVME_SC_UNWRITTEN_BLOCK = 0x287,
1187 1224
1225 /*
1226 * Path-related Errors:
1227 */
1228 NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
1229 NVME_SC_ANA_INACCESSIBLE = 0x302,
1230 NVME_SC_ANA_TRANSITION = 0x303,
1231
1188 NVME_SC_DNR = 0x4000, 1232 NVME_SC_DNR = 0x4000,
1189}; 1233};
1190 1234