aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 20:21:53 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 20:21:53 -0500
commitcf1b3341afab9d3ad02a76b3a619ea027dcf4e28 (patch)
tree99d5e2df998556c6ba1e9022eaa361aee74bf4a8
parent80eabba70260dcb55b05098f6c1fecbe5c0e518b (diff)
parentd1b1cea1e58477dad88ff769f54c0d2dfa56d923 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block IO fixes from Jens Axboe: "A few fixes that I collected as post-merge. I was going to wait a bit with sending this out, but the O_DIRECT fix should really go in sooner rather than later" * 'for-linus' of git://git.kernel.dk/linux-block: blk-mq: Fix failed allocation path when mapping queues blk-mq: Avoid memory reclaim when remapping queues block_dev: don't update file access position for sync direct IO nvme/pci: Log PCI_STATUS when the controller dies block_dev: don't test bdev->bd_contains when it is not stable
-rw-r--r--block/blk-mq.c32
-rw-r--r--drivers/nvme/host/pci.c22
-rw-r--r--fs/block_dev.c7
3 files changed, 45 insertions, 16 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d79fdc11b1ee..4bf850e8d6b5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1605,7 +1605,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1605 INIT_LIST_HEAD(&tags->page_list); 1605 INIT_LIST_HEAD(&tags->page_list);
1606 1606
1607 tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), 1607 tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
1608 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, 1608 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
1609 set->numa_node); 1609 set->numa_node);
1610 if (!tags->rqs) { 1610 if (!tags->rqs) {
1611 blk_mq_free_tags(tags); 1611 blk_mq_free_tags(tags);
@@ -1631,7 +1631,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1631 1631
1632 do { 1632 do {
1633 page = alloc_pages_node(set->numa_node, 1633 page = alloc_pages_node(set->numa_node,
1634 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, 1634 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
1635 this_order); 1635 this_order);
1636 if (page) 1636 if (page)
1637 break; 1637 break;
@@ -1652,7 +1652,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1652 * Allow kmemleak to scan these pages as they contain pointers 1652 * Allow kmemleak to scan these pages as they contain pointers
1653 * to additional allocations like via ops->init_request(). 1653 * to additional allocations like via ops->init_request().
1654 */ 1654 */
1655 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL); 1655 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
1656 entries_per_page = order_to_size(this_order) / rq_size; 1656 entries_per_page = order_to_size(this_order) / rq_size;
1657 to_do = min(entries_per_page, set->queue_depth - i); 1657 to_do = min(entries_per_page, set->queue_depth - i);
1658 left -= to_do * rq_size; 1658 left -= to_do * rq_size;
@@ -1870,7 +1870,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
1870static void blk_mq_map_swqueue(struct request_queue *q, 1870static void blk_mq_map_swqueue(struct request_queue *q,
1871 const struct cpumask *online_mask) 1871 const struct cpumask *online_mask)
1872{ 1872{
1873 unsigned int i; 1873 unsigned int i, hctx_idx;
1874 struct blk_mq_hw_ctx *hctx; 1874 struct blk_mq_hw_ctx *hctx;
1875 struct blk_mq_ctx *ctx; 1875 struct blk_mq_ctx *ctx;
1876 struct blk_mq_tag_set *set = q->tag_set; 1876 struct blk_mq_tag_set *set = q->tag_set;
@@ -1893,6 +1893,21 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1893 if (!cpumask_test_cpu(i, online_mask)) 1893 if (!cpumask_test_cpu(i, online_mask))
1894 continue; 1894 continue;
1895 1895
1896 hctx_idx = q->mq_map[i];
1897 /* unmapped hw queue can be remapped after CPU topo changed */
1898 if (!set->tags[hctx_idx]) {
1899 set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx);
1900
1901 /*
1902 * If tags initialization fail for some hctx,
1903 * that hctx won't be brought online. In this
1904 * case, remap the current ctx to hctx[0] which
1905 * is guaranteed to always have tags allocated
1906 */
1907 if (!set->tags[hctx_idx])
1908 q->mq_map[i] = 0;
1909 }
1910
1896 ctx = per_cpu_ptr(q->queue_ctx, i); 1911 ctx = per_cpu_ptr(q->queue_ctx, i);
1897 hctx = blk_mq_map_queue(q, i); 1912 hctx = blk_mq_map_queue(q, i);
1898 1913
@@ -1909,7 +1924,11 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1909 * disable it and free the request entries. 1924 * disable it and free the request entries.
1910 */ 1925 */
1911 if (!hctx->nr_ctx) { 1926 if (!hctx->nr_ctx) {
1912 if (set->tags[i]) { 1927 /* Never unmap queue 0. We need it as a
1928 * fallback in case of a new remap fails
1929 * allocation
1930 */
1931 if (i && set->tags[i]) {
1913 blk_mq_free_rq_map(set, set->tags[i], i); 1932 blk_mq_free_rq_map(set, set->tags[i], i);
1914 set->tags[i] = NULL; 1933 set->tags[i] = NULL;
1915 } 1934 }
@@ -1917,9 +1936,6 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1917 continue; 1936 continue;
1918 } 1937 }
1919 1938
1920 /* unmapped hw queue can be remapped after CPU topo changed */
1921 if (!set->tags[i])
1922 set->tags[i] = blk_mq_init_rq_map(set, i);
1923 hctx->tags = set->tags[i]; 1939 hctx->tags = set->tags[i];
1924 WARN_ON(!hctx->tags); 1940 WARN_ON(!hctx->tags);
1925 1941
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d6e6bce93d0c..2fd7dc2e8fc4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1282,6 +1282,24 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
1282 return true; 1282 return true;
1283} 1283}
1284 1284
1285static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
1286{
1287 /* Read a config register to help see what died. */
1288 u16 pci_status;
1289 int result;
1290
1291 result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
1292 &pci_status);
1293 if (result == PCIBIOS_SUCCESSFUL)
1294 dev_warn(dev->dev,
1295 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
1296 csts, pci_status);
1297 else
1298 dev_warn(dev->dev,
1299 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
1300 csts, result);
1301}
1302
1285static void nvme_watchdog_timer(unsigned long data) 1303static void nvme_watchdog_timer(unsigned long data)
1286{ 1304{
1287 struct nvme_dev *dev = (struct nvme_dev *)data; 1305 struct nvme_dev *dev = (struct nvme_dev *)data;
@@ -1290,9 +1308,7 @@ static void nvme_watchdog_timer(unsigned long data)
1290 /* Skip controllers under certain specific conditions. */ 1308 /* Skip controllers under certain specific conditions. */
1291 if (nvme_should_reset(dev, csts)) { 1309 if (nvme_should_reset(dev, csts)) {
1292 if (!nvme_reset(dev)) 1310 if (!nvme_reset(dev))
1293 dev_warn(dev->dev, 1311 nvme_warn_reset(dev, csts);
1294 "Failed status: 0x%x, reset controller.\n",
1295 csts);
1296 return; 1312 return;
1297 } 1313 }
1298 1314
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 95acbd2ebc5d..7c4507224ed6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -264,7 +264,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
264 264
265 if (unlikely(bio.bi_error)) 265 if (unlikely(bio.bi_error))
266 return bio.bi_error; 266 return bio.bi_error;
267 iocb->ki_pos += ret;
268 return ret; 267 return ret;
269} 268}
270 269
@@ -411,10 +410,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
411 __set_current_state(TASK_RUNNING); 410 __set_current_state(TASK_RUNNING);
412 411
413 ret = dio->bio.bi_error; 412 ret = dio->bio.bi_error;
414 if (likely(!ret)) { 413 if (likely(!ret))
415 ret = dio->size; 414 ret = dio->size;
416 iocb->ki_pos += ret;
417 }
418 415
419 bio_put(&dio->bio); 416 bio_put(&dio->bio);
420 return ret; 417 return ret;
@@ -1089,7 +1086,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1089 return true; /* already a holder */ 1086 return true; /* already a holder */
1090 else if (bdev->bd_holder != NULL) 1087 else if (bdev->bd_holder != NULL)
1091 return false; /* held by someone else */ 1088 return false; /* held by someone else */
1092 else if (bdev->bd_contains == bdev) 1089 else if (whole == bdev)
1093 return true; /* is a whole device which isn't held */ 1090 return true; /* is a whole device which isn't held */
1094 1091
1095 else if (whole->bd_holder == bd_may_claim) 1092 else if (whole->bd_holder == bd_may_claim)