aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-block9
-rw-r--r--Documentation/block/bfq-iosched.txt7
-rw-r--r--Documentation/block/null_blk.txt3
-rw-r--r--Documentation/block/queue-sysfs.txt7
-rw-r--r--block/blk-core.c20
-rw-r--r--drivers/ata/ahci.h2
-rw-r--r--drivers/ata/ahci_mvebu.c87
-rw-r--r--drivers/ata/libahci_platform.c13
-rw-r--r--drivers/block/loop.c35
-rw-r--r--drivers/block/null_blk.h1
-rw-r--r--drivers/nvme/host/core.c19
-rw-r--r--drivers/nvme/host/fabrics.c2
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/nvme/host/nvme.h5
-rw-r--r--drivers/nvme/host/pci.c67
-rw-r--r--drivers/nvme/host/tcp.c16
16 files changed, 229 insertions, 66 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 7710d4022b19..dfad7427817c 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -279,3 +279,12 @@ Description:
279 size in 512B sectors of the zones of the device, with 279 size in 512B sectors of the zones of the device, with
280 the eventual exception of the last zone of the device 280 the eventual exception of the last zone of the device
281 which may be smaller. 281 which may be smaller.
282
283What: /sys/block/<disk>/queue/io_timeout
284Date: November 2018
285Contact: Weiping Zhang <zhangweiping@didiglobal.com>
286Description:
287 io_timeout is the request timeout in milliseconds. If a request
288 does not complete in this time then the block driver timeout
289 handler is invoked. That timeout handler can decide to retry
290 the request, to fail it or to start a device recovery strategy.
diff --git a/Documentation/block/bfq-iosched.txt b/Documentation/block/bfq-iosched.txt
index 8d8d8f06cab2..98a8dd5ee385 100644
--- a/Documentation/block/bfq-iosched.txt
+++ b/Documentation/block/bfq-iosched.txt
@@ -357,6 +357,13 @@ video playing/streaming, a very low drop rate may be more important
357than maximum throughput. In these cases, consider setting the 357than maximum throughput. In these cases, consider setting the
358strict_guarantees parameter. 358strict_guarantees parameter.
359 359
360slice_idle_us
361-------------
362
363Controls the same tuning parameter as slice_idle, but in microseconds.
364Either tunable can be used to set idling behavior. Afterwards, the
365other tunable will reflect the newly set value in sysfs.
366
360strict_guarantees 367strict_guarantees
361----------------- 368-----------------
362 369
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
index ea2dafe49ae8..4cad1024fff7 100644
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.txt
@@ -88,7 +88,8 @@ shared_tags=[0/1]: Default: 0
88 88
89zoned=[0/1]: Default: 0 89zoned=[0/1]: Default: 0
90 0: Block device is exposed as a random-access block device. 90 0: Block device is exposed as a random-access block device.
91 1: Block device is exposed as a host-managed zoned block device. 91 1: Block device is exposed as a host-managed zoned block device. Requires
92 CONFIG_BLK_DEV_ZONED.
92 93
93zone_size=[MB]: Default: 256 94zone_size=[MB]: Default: 256
94 Per zone size when exposed as a zoned block device. Must be a power of two. 95 Per zone size when exposed as a zoned block device. Must be a power of two.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 39e286d7afc9..83b457e24bba 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -67,6 +67,13 @@ If set to a value larger than 0, the kernel will put the process issuing
67IO to sleep for this amount of microseconds before entering classic 67IO to sleep for this amount of microseconds before entering classic
68polling. 68polling.
69 69
70io_timeout (RW)
71---------------
72io_timeout is the request timeout in milliseconds. If a request does not
73complete in this time then the block driver timeout handler is invoked.
74That timeout handler can decide to retry the request, to fail it or to start
75a device recovery strategy.
76
70iostats (RW) 77iostats (RW)
71------------- 78-------------
72This file is used to control (on/off) the iostats accounting of the 79This file is used to control (on/off) the iostats accounting of the
diff --git a/block/blk-core.c b/block/blk-core.c
index c78042975737..3c5f61ceeb67 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -661,7 +661,6 @@ no_merge:
661 * blk_attempt_plug_merge - try to merge with %current's plugged list 661 * blk_attempt_plug_merge - try to merge with %current's plugged list
662 * @q: request_queue new bio is being queued at 662 * @q: request_queue new bio is being queued at
663 * @bio: new bio being queued 663 * @bio: new bio being queued
664 * @request_count: out parameter for number of traversed plugged requests
665 * @same_queue_rq: pointer to &struct request that gets filled in when 664 * @same_queue_rq: pointer to &struct request that gets filled in when
666 * another request associated with @q is found on the plug list 665 * another request associated with @q is found on the plug list
667 * (optional, may be %NULL) 666 * (optional, may be %NULL)
@@ -1683,6 +1682,15 @@ EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
1683 * @plug: The &struct blk_plug that needs to be initialized 1682 * @plug: The &struct blk_plug that needs to be initialized
1684 * 1683 *
1685 * Description: 1684 * Description:
1685 * blk_start_plug() indicates to the block layer an intent by the caller
1686 * to submit multiple I/O requests in a batch. The block layer may use
1687 * this hint to defer submitting I/Os from the caller until blk_finish_plug()
1688 * is called. However, the block layer may choose to submit requests
1689 * before a call to blk_finish_plug() if the number of queued I/Os
1690 * exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than
1691 * %BLK_PLUG_FLUSH_SIZE. The queued I/Os may also be submitted early if
1692 * the task schedules (see below).
1693 *
1686 * Tracking blk_plug inside the task_struct will help with auto-flushing the 1694 * Tracking blk_plug inside the task_struct will help with auto-flushing the
1687 * pending I/O should the task end up blocking between blk_start_plug() and 1695 * pending I/O should the task end up blocking between blk_start_plug() and
1688 * blk_finish_plug(). This is important from a performance perspective, but 1696 * blk_finish_plug(). This is important from a performance perspective, but
@@ -1765,6 +1773,16 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
1765 blk_mq_flush_plug_list(plug, from_schedule); 1773 blk_mq_flush_plug_list(plug, from_schedule);
1766} 1774}
1767 1775
1776/**
1777 * blk_finish_plug - mark the end of a batch of submitted I/O
1778 * @plug: The &struct blk_plug passed to blk_start_plug()
1779 *
1780 * Description:
1781 * Indicate that a batch of I/O submissions is complete. This function
1782 * must be paired with an initial call to blk_start_plug(). The intent
1783 * is to allow the block layer to optimize I/O submission. See the
1784 * documentation for blk_start_plug() for more information.
1785 */
1768void blk_finish_plug(struct blk_plug *plug) 1786void blk_finish_plug(struct blk_plug *plug)
1769{ 1787{
1770 if (plug != current->plug) 1788 if (plug != current->plug)
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index ef356e70e6de..8810475f307a 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -254,6 +254,8 @@ enum {
254 AHCI_HFLAG_IS_MOBILE = (1 << 25), /* mobile chipset, use 254 AHCI_HFLAG_IS_MOBILE = (1 << 25), /* mobile chipset, use
255 SATA_MOBILE_LPM_POLICY 255 SATA_MOBILE_LPM_POLICY
256 as default lpm_policy */ 256 as default lpm_policy */
257 AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during
258 suspend/resume */
257 259
258 /* ap->flags bits */ 260 /* ap->flags bits */
259 261
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index f9cb51be38eb..d4bba3ace45d 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -28,6 +28,11 @@
28#define AHCI_WINDOW_BASE(win) (0x64 + ((win) << 4)) 28#define AHCI_WINDOW_BASE(win) (0x64 + ((win) << 4))
29#define AHCI_WINDOW_SIZE(win) (0x68 + ((win) << 4)) 29#define AHCI_WINDOW_SIZE(win) (0x68 + ((win) << 4))
30 30
31struct ahci_mvebu_plat_data {
32 int (*plat_config)(struct ahci_host_priv *hpriv);
33 unsigned int flags;
34};
35
31static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv, 36static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv,
32 const struct mbus_dram_target_info *dram) 37 const struct mbus_dram_target_info *dram)
33{ 38{
@@ -62,6 +67,35 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv)
62 writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); 67 writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
63} 68}
64 69
70static int ahci_mvebu_armada_380_config(struct ahci_host_priv *hpriv)
71{
72 const struct mbus_dram_target_info *dram;
73 int rc = 0;
74
75 dram = mv_mbus_dram_info();
76 if (dram)
77 ahci_mvebu_mbus_config(hpriv, dram);
78 else
79 rc = -ENODEV;
80
81 ahci_mvebu_regret_option(hpriv);
82
83 return rc;
84}
85
86static int ahci_mvebu_armada_3700_config(struct ahci_host_priv *hpriv)
87{
88 u32 reg;
89
90 writel(0, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_ADDR);
91
92 reg = readl(hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
93 reg |= BIT(6);
94 writel(reg, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
95
96 return 0;
97}
98
65/** 99/**
66 * ahci_mvebu_stop_engine 100 * ahci_mvebu_stop_engine
67 * 101 *
@@ -126,13 +160,9 @@ static int ahci_mvebu_resume(struct platform_device *pdev)
126{ 160{
127 struct ata_host *host = platform_get_drvdata(pdev); 161 struct ata_host *host = platform_get_drvdata(pdev);
128 struct ahci_host_priv *hpriv = host->private_data; 162 struct ahci_host_priv *hpriv = host->private_data;
129 const struct mbus_dram_target_info *dram; 163 const struct ahci_mvebu_plat_data *pdata = hpriv->plat_data;
130 164
131 dram = mv_mbus_dram_info(); 165 pdata->plat_config(hpriv);
132 if (dram)
133 ahci_mvebu_mbus_config(hpriv, dram);
134
135 ahci_mvebu_regret_option(hpriv);
136 166
137 return ahci_platform_resume_host(&pdev->dev); 167 return ahci_platform_resume_host(&pdev->dev);
138} 168}
@@ -154,29 +184,30 @@ static struct scsi_host_template ahci_platform_sht = {
154 184
155static int ahci_mvebu_probe(struct platform_device *pdev) 185static int ahci_mvebu_probe(struct platform_device *pdev)
156{ 186{
187 const struct ahci_mvebu_plat_data *pdata;
157 struct ahci_host_priv *hpriv; 188 struct ahci_host_priv *hpriv;
158 const struct mbus_dram_target_info *dram;
159 int rc; 189 int rc;
160 190
191 pdata = of_device_get_match_data(&pdev->dev);
192 if (!pdata)
193 return -EINVAL;
194
161 hpriv = ahci_platform_get_resources(pdev, 0); 195 hpriv = ahci_platform_get_resources(pdev, 0);
162 if (IS_ERR(hpriv)) 196 if (IS_ERR(hpriv))
163 return PTR_ERR(hpriv); 197 return PTR_ERR(hpriv);
164 198
199 hpriv->flags |= pdata->flags;
200 hpriv->plat_data = (void *)pdata;
201
165 rc = ahci_platform_enable_resources(hpriv); 202 rc = ahci_platform_enable_resources(hpriv);
166 if (rc) 203 if (rc)
167 return rc; 204 return rc;
168 205
169 hpriv->stop_engine = ahci_mvebu_stop_engine; 206 hpriv->stop_engine = ahci_mvebu_stop_engine;
170 207
171 if (of_device_is_compatible(pdev->dev.of_node, 208 rc = pdata->plat_config(hpriv);
172 "marvell,armada-380-ahci")) { 209 if (rc)
173 dram = mv_mbus_dram_info(); 210 goto disable_resources;
174 if (!dram)
175 return -ENODEV;
176
177 ahci_mvebu_mbus_config(hpriv, dram);
178 ahci_mvebu_regret_option(hpriv);
179 }
180 211
181 rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, 212 rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info,
182 &ahci_platform_sht); 213 &ahci_platform_sht);
@@ -190,18 +221,28 @@ disable_resources:
190 return rc; 221 return rc;
191} 222}
192 223
224static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = {
225 .plat_config = ahci_mvebu_armada_380_config,
226};
227
228static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = {
229 .plat_config = ahci_mvebu_armada_3700_config,
230 .flags = AHCI_HFLAG_SUSPEND_PHYS,
231};
232
193static const struct of_device_id ahci_mvebu_of_match[] = { 233static const struct of_device_id ahci_mvebu_of_match[] = {
194 { .compatible = "marvell,armada-380-ahci", }, 234 {
195 { .compatible = "marvell,armada-3700-ahci", }, 235 .compatible = "marvell,armada-380-ahci",
236 .data = &ahci_mvebu_armada_380_plat_data,
237 },
238 {
239 .compatible = "marvell,armada-3700-ahci",
240 .data = &ahci_mvebu_armada_3700_plat_data,
241 },
196 { }, 242 { },
197}; 243};
198MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match); 244MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match);
199 245
200/*
201 * We currently don't provide power management related operations,
202 * since there is no suspend/resume support at the platform level for
203 * Armada 38x for the moment.
204 */
205static struct platform_driver ahci_mvebu_driver = { 246static struct platform_driver ahci_mvebu_driver = {
206 .probe = ahci_mvebu_probe, 247 .probe = ahci_mvebu_probe,
207 .remove = ata_platform_remove_one, 248 .remove = ata_platform_remove_one,
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 4b900fc659f7..81b1a3332ed6 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -56,6 +56,12 @@ static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
56 if (rc) 56 if (rc)
57 goto disable_phys; 57 goto disable_phys;
58 58
59 rc = phy_set_mode(hpriv->phys[i], PHY_MODE_SATA);
60 if (rc) {
61 phy_exit(hpriv->phys[i]);
62 goto disable_phys;
63 }
64
59 rc = phy_power_on(hpriv->phys[i]); 65 rc = phy_power_on(hpriv->phys[i]);
60 if (rc) { 66 if (rc) {
61 phy_exit(hpriv->phys[i]); 67 phy_exit(hpriv->phys[i]);
@@ -738,6 +744,9 @@ int ahci_platform_suspend_host(struct device *dev)
738 writel(ctl, mmio + HOST_CTL); 744 writel(ctl, mmio + HOST_CTL);
739 readl(mmio + HOST_CTL); /* flush */ 745 readl(mmio + HOST_CTL); /* flush */
740 746
747 if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS)
748 ahci_platform_disable_phys(hpriv);
749
741 return ata_host_suspend(host, PMSG_SUSPEND); 750 return ata_host_suspend(host, PMSG_SUSPEND);
742} 751}
743EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); 752EXPORT_SYMBOL_GPL(ahci_platform_suspend_host);
@@ -756,6 +765,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend_host);
756int ahci_platform_resume_host(struct device *dev) 765int ahci_platform_resume_host(struct device *dev)
757{ 766{
758 struct ata_host *host = dev_get_drvdata(dev); 767 struct ata_host *host = dev_get_drvdata(dev);
768 struct ahci_host_priv *hpriv = host->private_data;
759 int rc; 769 int rc;
760 770
761 if (dev->power.power_state.event == PM_EVENT_SUSPEND) { 771 if (dev->power.power_state.event == PM_EVENT_SUSPEND) {
@@ -766,6 +776,9 @@ int ahci_platform_resume_host(struct device *dev)
766 ahci_init_controller(host); 776 ahci_init_controller(host);
767 } 777 }
768 778
779 if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS)
780 ahci_platform_enable_phys(hpriv);
781
769 ata_host_resume(host); 782 ata_host_resume(host);
770 783
771 return 0; 784 return 0;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b8a0720d3653..cf5538942834 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1190,6 +1190,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1190 goto out_unlock; 1190 goto out_unlock;
1191 } 1191 }
1192 1192
1193 if (lo->lo_offset != info->lo_offset ||
1194 lo->lo_sizelimit != info->lo_sizelimit) {
1195 sync_blockdev(lo->lo_device);
1196 kill_bdev(lo->lo_device);
1197 }
1198
1193 /* I/O need to be drained during transfer transition */ 1199 /* I/O need to be drained during transfer transition */
1194 blk_mq_freeze_queue(lo->lo_queue); 1200 blk_mq_freeze_queue(lo->lo_queue);
1195 1201
@@ -1218,6 +1224,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1218 1224
1219 if (lo->lo_offset != info->lo_offset || 1225 if (lo->lo_offset != info->lo_offset ||
1220 lo->lo_sizelimit != info->lo_sizelimit) { 1226 lo->lo_sizelimit != info->lo_sizelimit) {
1227 /* kill_bdev should have truncated all the pages */
1228 if (lo->lo_device->bd_inode->i_mapping->nrpages) {
1229 err = -EAGAIN;
1230 pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
1231 __func__, lo->lo_number, lo->lo_file_name,
1232 lo->lo_device->bd_inode->i_mapping->nrpages);
1233 goto out_unfreeze;
1234 }
1221 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { 1235 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
1222 err = -EFBIG; 1236 err = -EFBIG;
1223 goto out_unfreeze; 1237 goto out_unfreeze;
@@ -1443,22 +1457,39 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
1443 1457
1444static int loop_set_block_size(struct loop_device *lo, unsigned long arg) 1458static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
1445{ 1459{
1460 int err = 0;
1461
1446 if (lo->lo_state != Lo_bound) 1462 if (lo->lo_state != Lo_bound)
1447 return -ENXIO; 1463 return -ENXIO;
1448 1464
1449 if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) 1465 if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
1450 return -EINVAL; 1466 return -EINVAL;
1451 1467
1468 if (lo->lo_queue->limits.logical_block_size != arg) {
1469 sync_blockdev(lo->lo_device);
1470 kill_bdev(lo->lo_device);
1471 }
1472
1452 blk_mq_freeze_queue(lo->lo_queue); 1473 blk_mq_freeze_queue(lo->lo_queue);
1453 1474
1475 /* kill_bdev should have truncated all the pages */
1476 if (lo->lo_queue->limits.logical_block_size != arg &&
1477 lo->lo_device->bd_inode->i_mapping->nrpages) {
1478 err = -EAGAIN;
1479 pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
1480 __func__, lo->lo_number, lo->lo_file_name,
1481 lo->lo_device->bd_inode->i_mapping->nrpages);
1482 goto out_unfreeze;
1483 }
1484
1454 blk_queue_logical_block_size(lo->lo_queue, arg); 1485 blk_queue_logical_block_size(lo->lo_queue, arg);
1455 blk_queue_physical_block_size(lo->lo_queue, arg); 1486 blk_queue_physical_block_size(lo->lo_queue, arg);
1456 blk_queue_io_min(lo->lo_queue, arg); 1487 blk_queue_io_min(lo->lo_queue, arg);
1457 loop_update_dio(lo); 1488 loop_update_dio(lo);
1458 1489out_unfreeze:
1459 blk_mq_unfreeze_queue(lo->lo_queue); 1490 blk_mq_unfreeze_queue(lo->lo_queue);
1460 1491
1461 return 0; 1492 return err;
1462} 1493}
1463 1494
1464static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, 1495static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index b3df2793e7cd..34b22d6523ba 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -97,6 +97,7 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
97#else 97#else
98static inline int null_zone_init(struct nullb_device *dev) 98static inline int null_zone_init(struct nullb_device *dev)
99{ 99{
100 pr_err("null_blk: CONFIG_BLK_DEV_ZONED not enabled\n");
100 return -EINVAL; 101 return -EINVAL;
101} 102}
102static inline void null_zone_exit(struct nullb_device *dev) {} 103static inline void null_zone_exit(struct nullb_device *dev) {}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 08f2c92602f4..150e49723c15 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2173,18 +2173,20 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
2173 size_t nqnlen; 2173 size_t nqnlen;
2174 int off; 2174 int off;
2175 2175
2176 nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); 2176 if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) {
2177 if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { 2177 nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
2178 strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); 2178 if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
2179 return; 2179 strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE);
2180 } 2180 return;
2181 }
2181 2182
2182 if (ctrl->vs >= NVME_VS(1, 2, 1)) 2183 if (ctrl->vs >= NVME_VS(1, 2, 1))
2183 dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); 2184 dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
2185 }
2184 2186
2185 /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ 2187 /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
2186 off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, 2188 off = snprintf(subsys->subnqn, NVMF_NQN_SIZE,
2187 "nqn.2014.08.org.nvmexpress:%4x%4x", 2189 "nqn.2014.08.org.nvmexpress:%04x%04x",
2188 le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); 2190 le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
2189 memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn)); 2191 memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn));
2190 off += sizeof(id->sn); 2192 off += sizeof(id->sn);
@@ -2500,7 +2502,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
2500 ctrl->oaes = le32_to_cpu(id->oaes); 2502 ctrl->oaes = le32_to_cpu(id->oaes);
2501 atomic_set(&ctrl->abort_limit, id->acl + 1); 2503 atomic_set(&ctrl->abort_limit, id->acl + 1);
2502 ctrl->vwc = id->vwc; 2504 ctrl->vwc = id->vwc;
2503 ctrl->cntlid = le16_to_cpup(&id->cntlid);
2504 if (id->mdts) 2505 if (id->mdts)
2505 max_hw_sectors = 1 << (id->mdts + page_shift - 9); 2506 max_hw_sectors = 1 << (id->mdts + page_shift - 9);
2506 else 2507 else
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index b2ab213f43de..3eb908c50e1a 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -874,6 +874,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
874 if (opts->discovery_nqn) { 874 if (opts->discovery_nqn) {
875 opts->kato = 0; 875 opts->kato = 0;
876 opts->nr_io_queues = 0; 876 opts->nr_io_queues = 0;
877 opts->nr_write_queues = 0;
878 opts->nr_poll_queues = 0;
877 opts->duplicate_connect = true; 879 opts->duplicate_connect = true;
878 } 880 }
879 if (ctrl_loss_tmo < 0) 881 if (ctrl_loss_tmo < 0)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 183ec17ba067..df4b3a6db51b 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -570,6 +570,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
570 return 0; 570 return 0;
571out_free_ana_log_buf: 571out_free_ana_log_buf:
572 kfree(ctrl->ana_log_buf); 572 kfree(ctrl->ana_log_buf);
573 ctrl->ana_log_buf = NULL;
573out: 574out:
574 return error; 575 return error;
575} 576}
@@ -577,5 +578,6 @@ out:
577void nvme_mpath_uninit(struct nvme_ctrl *ctrl) 578void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
578{ 579{
579 kfree(ctrl->ana_log_buf); 580 kfree(ctrl->ana_log_buf);
581 ctrl->ana_log_buf = NULL;
580} 582}
581 583
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2b36ac922596..ab961bdeea89 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -90,6 +90,11 @@ enum nvme_quirks {
90 * Set MEDIUM priority on SQ creation 90 * Set MEDIUM priority on SQ creation
91 */ 91 */
92 NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), 92 NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7),
93
94 /*
95 * Ignore device provided subnqn.
96 */
97 NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8),
93}; 98};
94 99
95/* 100/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e8d0942c9c92..deb1a66bf117 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -95,6 +95,7 @@ struct nvme_dev;
95struct nvme_queue; 95struct nvme_queue;
96 96
97static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); 97static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
98static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
98 99
99/* 100/*
100 * Represents an NVM Express device. Each nvme_dev is a PCI function. 101 * Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -1019,9 +1020,11 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
1019 1020
1020static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) 1021static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
1021{ 1022{
1022 if (++nvmeq->cq_head == nvmeq->q_depth) { 1023 if (nvmeq->cq_head == nvmeq->q_depth - 1) {
1023 nvmeq->cq_head = 0; 1024 nvmeq->cq_head = 0;
1024 nvmeq->cq_phase = !nvmeq->cq_phase; 1025 nvmeq->cq_phase = !nvmeq->cq_phase;
1026 } else {
1027 nvmeq->cq_head++;
1025 } 1028 }
1026} 1029}
1027 1030
@@ -1420,6 +1423,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
1420 return 0; 1423 return 0;
1421} 1424}
1422 1425
1426static void nvme_suspend_io_queues(struct nvme_dev *dev)
1427{
1428 int i;
1429
1430 for (i = dev->ctrl.queue_count - 1; i > 0; i--)
1431 nvme_suspend_queue(&dev->queues[i]);
1432}
1433
1423static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) 1434static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
1424{ 1435{
1425 struct nvme_queue *nvmeq = &dev->queues[0]; 1436 struct nvme_queue *nvmeq = &dev->queues[0];
@@ -1885,8 +1896,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
1885 struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; 1896 struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
1886 size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size; 1897 size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
1887 1898
1888 dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i], 1899 dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i],
1889 le64_to_cpu(desc->addr)); 1900 le64_to_cpu(desc->addr),
1901 DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
1890 } 1902 }
1891 1903
1892 kfree(dev->host_mem_desc_bufs); 1904 kfree(dev->host_mem_desc_bufs);
@@ -1952,8 +1964,9 @@ out_free_bufs:
1952 while (--i >= 0) { 1964 while (--i >= 0) {
1953 size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size; 1965 size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
1954 1966
1955 dma_free_coherent(dev->dev, size, bufs[i], 1967 dma_free_attrs(dev->dev, size, bufs[i],
1956 le64_to_cpu(descs[i].addr)); 1968 le64_to_cpu(descs[i].addr),
1969 DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
1957 } 1970 }
1958 1971
1959 kfree(bufs); 1972 kfree(bufs);
@@ -2132,6 +2145,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
2132 return result; 2145 return result;
2133} 2146}
2134 2147
2148static void nvme_disable_io_queues(struct nvme_dev *dev)
2149{
2150 if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq))
2151 __nvme_disable_io_queues(dev, nvme_admin_delete_cq);
2152}
2153
2135static int nvme_setup_io_queues(struct nvme_dev *dev) 2154static int nvme_setup_io_queues(struct nvme_dev *dev)
2136{ 2155{
2137 struct nvme_queue *adminq = &dev->queues[0]; 2156 struct nvme_queue *adminq = &dev->queues[0];
@@ -2168,6 +2187,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
2168 } while (1); 2187 } while (1);
2169 adminq->q_db = dev->dbs; 2188 adminq->q_db = dev->dbs;
2170 2189
2190 retry:
2171 /* Deregister the admin queue's interrupt */ 2191 /* Deregister the admin queue's interrupt */
2172 pci_free_irq(pdev, 0, adminq); 2192 pci_free_irq(pdev, 0, adminq);
2173 2193
@@ -2185,25 +2205,34 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
2185 result = max(result - 1, 1); 2205 result = max(result - 1, 1);
2186 dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; 2206 dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
2187 2207
2188 dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
2189 dev->io_queues[HCTX_TYPE_DEFAULT],
2190 dev->io_queues[HCTX_TYPE_READ],
2191 dev->io_queues[HCTX_TYPE_POLL]);
2192
2193 /* 2208 /*
2194 * Should investigate if there's a performance win from allocating 2209 * Should investigate if there's a performance win from allocating
2195 * more queues than interrupt vectors; it might allow the submission 2210 * more queues than interrupt vectors; it might allow the submission
2196 * path to scale better, even if the receive path is limited by the 2211 * path to scale better, even if the receive path is limited by the
2197 * number of interrupts. 2212 * number of interrupts.
2198 */ 2213 */
2199
2200 result = queue_request_irq(adminq); 2214 result = queue_request_irq(adminq);
2201 if (result) { 2215 if (result) {
2202 adminq->cq_vector = -1; 2216 adminq->cq_vector = -1;
2203 return result; 2217 return result;
2204 } 2218 }
2205 set_bit(NVMEQ_ENABLED, &adminq->flags); 2219 set_bit(NVMEQ_ENABLED, &adminq->flags);
2206 return nvme_create_io_queues(dev); 2220
2221 result = nvme_create_io_queues(dev);
2222 if (result || dev->online_queues < 2)
2223 return result;
2224
2225 if (dev->online_queues - 1 < dev->max_qid) {
2226 nr_io_queues = dev->online_queues - 1;
2227 nvme_disable_io_queues(dev);
2228 nvme_suspend_io_queues(dev);
2229 goto retry;
2230 }
2231 dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
2232 dev->io_queues[HCTX_TYPE_DEFAULT],
2233 dev->io_queues[HCTX_TYPE_READ],
2234 dev->io_queues[HCTX_TYPE_POLL]);
2235 return 0;
2207} 2236}
2208 2237
2209static void nvme_del_queue_end(struct request *req, blk_status_t error) 2238static void nvme_del_queue_end(struct request *req, blk_status_t error)
@@ -2248,7 +2277,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
2248 return 0; 2277 return 0;
2249} 2278}
2250 2279
2251static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) 2280static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
2252{ 2281{
2253 int nr_queues = dev->online_queues - 1, sent = 0; 2282 int nr_queues = dev->online_queues - 1, sent = 0;
2254 unsigned long timeout; 2283 unsigned long timeout;
@@ -2294,7 +2323,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
2294 dev->tagset.nr_maps = 2; /* default + read */ 2323 dev->tagset.nr_maps = 2; /* default + read */
2295 if (dev->io_queues[HCTX_TYPE_POLL]) 2324 if (dev->io_queues[HCTX_TYPE_POLL])
2296 dev->tagset.nr_maps++; 2325 dev->tagset.nr_maps++;
2297 dev->tagset.nr_maps = HCTX_MAX_TYPES;
2298 dev->tagset.timeout = NVME_IO_TIMEOUT; 2326 dev->tagset.timeout = NVME_IO_TIMEOUT;
2299 dev->tagset.numa_node = dev_to_node(dev->dev); 2327 dev->tagset.numa_node = dev_to_node(dev->dev);
2300 dev->tagset.queue_depth = 2328 dev->tagset.queue_depth =
@@ -2410,7 +2438,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
2410 2438
2411static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) 2439static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
2412{ 2440{
2413 int i;
2414 bool dead = true; 2441 bool dead = true;
2415 struct pci_dev *pdev = to_pci_dev(dev->dev); 2442 struct pci_dev *pdev = to_pci_dev(dev->dev);
2416 2443
@@ -2437,13 +2464,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
2437 nvme_stop_queues(&dev->ctrl); 2464 nvme_stop_queues(&dev->ctrl);
2438 2465
2439 if (!dead && dev->ctrl.queue_count > 0) { 2466 if (!dead && dev->ctrl.queue_count > 0) {
2440 if (nvme_disable_io_queues(dev, nvme_admin_delete_sq)) 2467 nvme_disable_io_queues(dev);
2441 nvme_disable_io_queues(dev, nvme_admin_delete_cq);
2442 nvme_disable_admin_queue(dev, shutdown); 2468 nvme_disable_admin_queue(dev, shutdown);
2443 } 2469 }
2444 for (i = dev->ctrl.queue_count - 1; i >= 0; i--) 2470 nvme_suspend_io_queues(dev);
2445 nvme_suspend_queue(&dev->queues[i]); 2471 nvme_suspend_queue(&dev->queues[0]);
2446
2447 nvme_pci_disable(dev); 2472 nvme_pci_disable(dev);
2448 2473
2449 blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); 2474 blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
@@ -2946,6 +2971,8 @@ static const struct pci_device_id nvme_id_table[] = {
2946 { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ 2971 { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
2947 .driver_data = NVME_QUIRK_NO_DEEPEST_PS | 2972 .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
2948 NVME_QUIRK_MEDIUM_PRIO_SQ }, 2973 NVME_QUIRK_MEDIUM_PRIO_SQ },
2974 { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
2975 .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
2949 { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ 2976 { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
2950 .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, 2977 .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
2951 { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ 2978 { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index de174912445e..265a0543b381 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1565,8 +1565,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
1565{ 1565{
1566 nvme_tcp_stop_io_queues(ctrl); 1566 nvme_tcp_stop_io_queues(ctrl);
1567 if (remove) { 1567 if (remove) {
1568 if (ctrl->ops->flags & NVME_F_FABRICS) 1568 blk_cleanup_queue(ctrl->connect_q);
1569 blk_cleanup_queue(ctrl->connect_q);
1570 blk_mq_free_tag_set(ctrl->tagset); 1569 blk_mq_free_tag_set(ctrl->tagset);
1571 } 1570 }
1572 nvme_tcp_free_io_queues(ctrl); 1571 nvme_tcp_free_io_queues(ctrl);
@@ -1587,12 +1586,10 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
1587 goto out_free_io_queues; 1586 goto out_free_io_queues;
1588 } 1587 }
1589 1588
1590 if (ctrl->ops->flags & NVME_F_FABRICS) { 1589 ctrl->connect_q = blk_mq_init_queue(ctrl->tagset);
1591 ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); 1590 if (IS_ERR(ctrl->connect_q)) {
1592 if (IS_ERR(ctrl->connect_q)) { 1591 ret = PTR_ERR(ctrl->connect_q);
1593 ret = PTR_ERR(ctrl->connect_q); 1592 goto out_free_tag_set;
1594 goto out_free_tag_set;
1595 }
1596 } 1593 }
1597 } else { 1594 } else {
1598 blk_mq_update_nr_hw_queues(ctrl->tagset, 1595 blk_mq_update_nr_hw_queues(ctrl->tagset,
@@ -1606,7 +1603,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
1606 return 0; 1603 return 0;
1607 1604
1608out_cleanup_connect_q: 1605out_cleanup_connect_q:
1609 if (new && (ctrl->ops->flags & NVME_F_FABRICS)) 1606 if (new)
1610 blk_cleanup_queue(ctrl->connect_q); 1607 blk_cleanup_queue(ctrl->connect_q);
1611out_free_tag_set: 1608out_free_tag_set:
1612 if (new) 1609 if (new)
@@ -1620,7 +1617,6 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
1620{ 1617{
1621 nvme_tcp_stop_queue(ctrl, 0); 1618 nvme_tcp_stop_queue(ctrl, 0);
1622 if (remove) { 1619 if (remove) {
1623 free_opal_dev(ctrl->opal_dev);
1624 blk_cleanup_queue(ctrl->admin_q); 1620 blk_cleanup_queue(ctrl->admin_q);
1625 blk_mq_free_tag_set(ctrl->admin_tagset); 1621 blk_mq_free_tag_set(ctrl->admin_tagset);
1626 } 1622 }