diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 20:21:53 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 20:21:53 -0500 |
commit | cf1b3341afab9d3ad02a76b3a619ea027dcf4e28 (patch) | |
tree | 99d5e2df998556c6ba1e9022eaa361aee74bf4a8 | |
parent | 80eabba70260dcb55b05098f6c1fecbe5c0e518b (diff) | |
parent | d1b1cea1e58477dad88ff769f54c0d2dfa56d923 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block IO fixes from Jens Axboe:
"A few fixes that I collected as post-merge.
I was going to wait a bit with sending this out, but the O_DIRECT fix
should really go in sooner rather than later"
* 'for-linus' of git://git.kernel.dk/linux-block:
blk-mq: Fix failed allocation path when mapping queues
blk-mq: Avoid memory reclaim when remapping queues
block_dev: don't update file access position for sync direct IO
nvme/pci: Log PCI_STATUS when the controller dies
block_dev: don't test bdev->bd_contains when it is not stable
-rw-r--r-- | block/blk-mq.c | 32 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 22 | ||||
-rw-r--r-- | fs/block_dev.c | 7 |
3 files changed, 45 insertions, 16 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index d79fdc11b1ee..4bf850e8d6b5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1605,7 +1605,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1605 | INIT_LIST_HEAD(&tags->page_list); | 1605 | INIT_LIST_HEAD(&tags->page_list); |
1606 | 1606 | ||
1607 | tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), | 1607 | tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), |
1608 | GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, | 1608 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, |
1609 | set->numa_node); | 1609 | set->numa_node); |
1610 | if (!tags->rqs) { | 1610 | if (!tags->rqs) { |
1611 | blk_mq_free_tags(tags); | 1611 | blk_mq_free_tags(tags); |
@@ -1631,7 +1631,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1631 | 1631 | ||
1632 | do { | 1632 | do { |
1633 | page = alloc_pages_node(set->numa_node, | 1633 | page = alloc_pages_node(set->numa_node, |
1634 | GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, | 1634 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, |
1635 | this_order); | 1635 | this_order); |
1636 | if (page) | 1636 | if (page) |
1637 | break; | 1637 | break; |
@@ -1652,7 +1652,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1652 | * Allow kmemleak to scan these pages as they contain pointers | 1652 | * Allow kmemleak to scan these pages as they contain pointers |
1653 | * to additional allocations like via ops->init_request(). | 1653 | * to additional allocations like via ops->init_request(). |
1654 | */ | 1654 | */ |
1655 | kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL); | 1655 | kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); |
1656 | entries_per_page = order_to_size(this_order) / rq_size; | 1656 | entries_per_page = order_to_size(this_order) / rq_size; |
1657 | to_do = min(entries_per_page, set->queue_depth - i); | 1657 | to_do = min(entries_per_page, set->queue_depth - i); |
1658 | left -= to_do * rq_size; | 1658 | left -= to_do * rq_size; |
@@ -1870,7 +1870,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1870 | static void blk_mq_map_swqueue(struct request_queue *q, | 1870 | static void blk_mq_map_swqueue(struct request_queue *q, |
1871 | const struct cpumask *online_mask) | 1871 | const struct cpumask *online_mask) |
1872 | { | 1872 | { |
1873 | unsigned int i; | 1873 | unsigned int i, hctx_idx; |
1874 | struct blk_mq_hw_ctx *hctx; | 1874 | struct blk_mq_hw_ctx *hctx; |
1875 | struct blk_mq_ctx *ctx; | 1875 | struct blk_mq_ctx *ctx; |
1876 | struct blk_mq_tag_set *set = q->tag_set; | 1876 | struct blk_mq_tag_set *set = q->tag_set; |
@@ -1893,6 +1893,21 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||
1893 | if (!cpumask_test_cpu(i, online_mask)) | 1893 | if (!cpumask_test_cpu(i, online_mask)) |
1894 | continue; | 1894 | continue; |
1895 | 1895 | ||
1896 | hctx_idx = q->mq_map[i]; | ||
1897 | /* unmapped hw queue can be remapped after CPU topo changed */ | ||
1898 | if (!set->tags[hctx_idx]) { | ||
1899 | set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx); | ||
1900 | |||
1901 | /* | ||
1902 | * If tags initialization fail for some hctx, | ||
1903 | * that hctx won't be brought online. In this | ||
1904 | * case, remap the current ctx to hctx[0] which | ||
1905 | * is guaranteed to always have tags allocated | ||
1906 | */ | ||
1907 | if (!set->tags[hctx_idx]) | ||
1908 | q->mq_map[i] = 0; | ||
1909 | } | ||
1910 | |||
1896 | ctx = per_cpu_ptr(q->queue_ctx, i); | 1911 | ctx = per_cpu_ptr(q->queue_ctx, i); |
1897 | hctx = blk_mq_map_queue(q, i); | 1912 | hctx = blk_mq_map_queue(q, i); |
1898 | 1913 | ||
@@ -1909,7 +1924,11 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||
1909 | * disable it and free the request entries. | 1924 | * disable it and free the request entries. |
1910 | */ | 1925 | */ |
1911 | if (!hctx->nr_ctx) { | 1926 | if (!hctx->nr_ctx) { |
1912 | if (set->tags[i]) { | 1927 | /* Never unmap queue 0. We need it as a |
1928 | * fallback in case of a new remap fails | ||
1929 | * allocation | ||
1930 | */ | ||
1931 | if (i && set->tags[i]) { | ||
1913 | blk_mq_free_rq_map(set, set->tags[i], i); | 1932 | blk_mq_free_rq_map(set, set->tags[i], i); |
1914 | set->tags[i] = NULL; | 1933 | set->tags[i] = NULL; |
1915 | } | 1934 | } |
@@ -1917,9 +1936,6 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||
1917 | continue; | 1936 | continue; |
1918 | } | 1937 | } |
1919 | 1938 | ||
1920 | /* unmapped hw queue can be remapped after CPU topo changed */ | ||
1921 | if (!set->tags[i]) | ||
1922 | set->tags[i] = blk_mq_init_rq_map(set, i); | ||
1923 | hctx->tags = set->tags[i]; | 1939 | hctx->tags = set->tags[i]; |
1924 | WARN_ON(!hctx->tags); | 1940 | WARN_ON(!hctx->tags); |
1925 | 1941 | ||
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d6e6bce93d0c..2fd7dc2e8fc4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -1282,6 +1282,24 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) | |||
1282 | return true; | 1282 | return true; |
1283 | } | 1283 | } |
1284 | 1284 | ||
1285 | static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) | ||
1286 | { | ||
1287 | /* Read a config register to help see what died. */ | ||
1288 | u16 pci_status; | ||
1289 | int result; | ||
1290 | |||
1291 | result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS, | ||
1292 | &pci_status); | ||
1293 | if (result == PCIBIOS_SUCCESSFUL) | ||
1294 | dev_warn(dev->dev, | ||
1295 | "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n", | ||
1296 | csts, pci_status); | ||
1297 | else | ||
1298 | dev_warn(dev->dev, | ||
1299 | "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n", | ||
1300 | csts, result); | ||
1301 | } | ||
1302 | |||
1285 | static void nvme_watchdog_timer(unsigned long data) | 1303 | static void nvme_watchdog_timer(unsigned long data) |
1286 | { | 1304 | { |
1287 | struct nvme_dev *dev = (struct nvme_dev *)data; | 1305 | struct nvme_dev *dev = (struct nvme_dev *)data; |
@@ -1290,9 +1308,7 @@ static void nvme_watchdog_timer(unsigned long data) | |||
1290 | /* Skip controllers under certain specific conditions. */ | 1308 | /* Skip controllers under certain specific conditions. */ |
1291 | if (nvme_should_reset(dev, csts)) { | 1309 | if (nvme_should_reset(dev, csts)) { |
1292 | if (!nvme_reset(dev)) | 1310 | if (!nvme_reset(dev)) |
1293 | dev_warn(dev->dev, | 1311 | nvme_warn_reset(dev, csts); |
1294 | "Failed status: 0x%x, reset controller.\n", | ||
1295 | csts); | ||
1296 | return; | 1312 | return; |
1297 | } | 1313 | } |
1298 | 1314 | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 95acbd2ebc5d..7c4507224ed6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -264,7 +264,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, | |||
264 | 264 | ||
265 | if (unlikely(bio.bi_error)) | 265 | if (unlikely(bio.bi_error)) |
266 | return bio.bi_error; | 266 | return bio.bi_error; |
267 | iocb->ki_pos += ret; | ||
268 | return ret; | 267 | return ret; |
269 | } | 268 | } |
270 | 269 | ||
@@ -411,10 +410,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) | |||
411 | __set_current_state(TASK_RUNNING); | 410 | __set_current_state(TASK_RUNNING); |
412 | 411 | ||
413 | ret = dio->bio.bi_error; | 412 | ret = dio->bio.bi_error; |
414 | if (likely(!ret)) { | 413 | if (likely(!ret)) |
415 | ret = dio->size; | 414 | ret = dio->size; |
416 | iocb->ki_pos += ret; | ||
417 | } | ||
418 | 415 | ||
419 | bio_put(&dio->bio); | 416 | bio_put(&dio->bio); |
420 | return ret; | 417 | return ret; |
@@ -1089,7 +1086,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, | |||
1089 | return true; /* already a holder */ | 1086 | return true; /* already a holder */ |
1090 | else if (bdev->bd_holder != NULL) | 1087 | else if (bdev->bd_holder != NULL) |
1091 | return false; /* held by someone else */ | 1088 | return false; /* held by someone else */ |
1092 | else if (bdev->bd_contains == bdev) | 1089 | else if (whole == bdev) |
1093 | return true; /* is a whole device which isn't held */ | 1090 | return true; /* is a whole device which isn't held */ |
1094 | 1091 | ||
1095 | else if (whole->bd_holder == bd_may_claim) | 1092 | else if (whole->bd_holder == bd_may_claim) |