diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-16 00:20:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-16 00:20:52 -0400 |
commit | 9637d517347e80ee2fe1c5d8ce45ba1b88d8b5cd (patch) | |
tree | 3cee2a1d8b3c6ea466924517307a1f98ada1e92f | |
parent | 273cbf61c3ddee9574ef1f4959b9bc6db5b24271 (diff) | |
parent | 787c79d6393fc028887cc1b6066915f0b094e92f (diff) |
Merge tag 'for-linus-20190715' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe:
"A later pull request with some followup items. I had some vacation
coming up to the merge window, so certain things items were delayed a
bit. This pull request also contains fixes that came in within the
last few days of the merge window, which I didn't want to push right
before sending you a pull request.
This contains:
- NVMe pull request, mostly fixes, but also a few minor items on the
feature side that were timing constrained (Christoph et al)
- Report zones fixes (Damien)
- Removal of dead code (Damien)
- Turn on cgroup psi memstall (Josef)
- block cgroup MAINTAINERS entry (Konstantin)
- Flush init fix (Josef)
- blk-throttle low iops timing fix (Konstantin)
- nbd resize fixes (Mike)
- nbd 0 blocksize crash fix (Xiubo)
- block integrity error leak fix (Wenwen)
- blk-cgroup writeback and priority inheritance fixes (Tejun)"
* tag 'for-linus-20190715' of git://git.kernel.dk/linux-block: (42 commits)
MAINTAINERS: add entry for block io cgroup
null_blk: fixup ->report_zones() for !CONFIG_BLK_DEV_ZONED
block: Limit zone array allocation size
sd_zbc: Fix report zones buffer allocation
block: Kill gfp_t argument of blkdev_report_zones()
block: Allow mapping of vmalloc-ed buffers
block/bio-integrity: fix a memory leak bug
nvme: fix NULL deref for fabrics options
nbd: add netlink reconfigure resize support
nbd: fix crash when the blksize is zero
block: Disable write plugging for zoned block devices
block: Fix elevator name declaration
block: Remove unused definitions
nvme: fix regression upon hot device removal and insertion
blk-throttle: fix zero wait time for iops throttled group
block: Fix potential overflow in blk_report_zones()
blkcg: implement REQ_CGROUP_PUNT
blkcg, writeback: Implement wbc_blkcg_css()
blkcg, writeback: Add wbc->no_cgroup_owner
blkcg, writeback: Rename wbc_account_io() to wbc_account_cgroup_owner()
...
50 files changed, 660 insertions, 210 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index a9548de56ac9..8269e869cb1e 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst | |||
@@ -2124,7 +2124,7 @@ following two functions. | |||
2124 | a queue (device) has been associated with the bio and | 2124 | a queue (device) has been associated with the bio and |
2125 | before submission. | 2125 | before submission. |
2126 | 2126 | ||
2127 | wbc_account_io(@wbc, @page, @bytes) | 2127 | wbc_account_cgroup_owner(@wbc, @page, @bytes) |
2128 | Should be called for each data segment being written out. | 2128 | Should be called for each data segment being written out. |
2129 | While this function doesn't care exactly when it's called | 2129 | While this function doesn't care exactly when it's called |
2130 | during the writeback session, it's the easiest and most | 2130 | during the writeback session, it's the easiest and most |
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 31c177663ed5..5a4a799fe61b 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
@@ -843,11 +843,6 @@ elevator_latter_req_fn These return the request before or after the | |||
843 | 843 | ||
844 | elevator_completed_req_fn called when a request is completed. | 844 | elevator_completed_req_fn called when a request is completed. |
845 | 845 | ||
846 | elevator_may_queue_fn returns true if the scheduler wants to allow the | ||
847 | current context to queue a new request even if | ||
848 | it is over the queue limit. This must be used | ||
849 | very carefully!! | ||
850 | |||
851 | elevator_set_req_fn | 846 | elevator_set_req_fn |
852 | elevator_put_req_fn Must be used to allocate and free any elevator | 847 | elevator_put_req_fn Must be used to allocate and free any elevator |
853 | specific storage for a request. | 848 | specific storage for a request. |
diff --git a/MAINTAINERS b/MAINTAINERS index 4aee3a1de331..6debe6829716 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4183,6 +4183,19 @@ S: Maintained | |||
4183 | F: mm/memcontrol.c | 4183 | F: mm/memcontrol.c |
4184 | F: mm/swap_cgroup.c | 4184 | F: mm/swap_cgroup.c |
4185 | 4185 | ||
4186 | CONTROL GROUP - BLOCK IO CONTROLLER (BLKIO) | ||
4187 | M: Tejun Heo <tj@kernel.org> | ||
4188 | M: Jens Axboe <axboe@kernel.dk> | ||
4189 | L: cgroups@vger.kernel.org | ||
4190 | L: linux-block@vger.kernel.org | ||
4191 | T: git git://git.kernel.dk/linux-block | ||
4192 | F: Documentation/cgroup-v1/blkio-controller.rst | ||
4193 | F: block/blk-cgroup.c | ||
4194 | F: include/linux/blk-cgroup.h | ||
4195 | F: block/blk-throttle.c | ||
4196 | F: block/blk-iolatency.c | ||
4197 | F: block/bfq-cgroup.c | ||
4198 | |||
4186 | CORETEMP HARDWARE MONITORING DRIVER | 4199 | CORETEMP HARDWARE MONITORING DRIVER |
4187 | M: Fenghua Yu <fenghua.yu@intel.com> | 4200 | M: Fenghua Yu <fenghua.yu@intel.com> |
4188 | L: linux-hwmon@vger.kernel.org | 4201 | L: linux-hwmon@vger.kernel.org |
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 4db620849515..fb95dbb21dd8 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c | |||
@@ -276,8 +276,12 @@ bool bio_integrity_prep(struct bio *bio) | |||
276 | ret = bio_integrity_add_page(bio, virt_to_page(buf), | 276 | ret = bio_integrity_add_page(bio, virt_to_page(buf), |
277 | bytes, offset); | 277 | bytes, offset); |
278 | 278 | ||
279 | if (ret == 0) | 279 | if (ret == 0) { |
280 | return false; | 280 | printk(KERN_ERR "could not attach integrity payload\n"); |
281 | kfree(buf); | ||
282 | status = BLK_STS_RESOURCE; | ||
283 | goto err_end_io; | ||
284 | } | ||
281 | 285 | ||
282 | if (ret < bytes) | 286 | if (ret < bytes) |
283 | break; | 287 | break; |
diff --git a/block/bio.c b/block/bio.c index 29cd6cf4da51..299a0e7651ec 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/workqueue.h> | 16 | #include <linux/workqueue.h> |
17 | #include <linux/cgroup.h> | 17 | #include <linux/cgroup.h> |
18 | #include <linux/blk-cgroup.h> | 18 | #include <linux/blk-cgroup.h> |
19 | #include <linux/highmem.h> | ||
19 | 20 | ||
20 | #include <trace/events/block.h> | 21 | #include <trace/events/block.h> |
21 | #include "blk.h" | 22 | #include "blk.h" |
@@ -1441,8 +1442,22 @@ void bio_unmap_user(struct bio *bio) | |||
1441 | bio_put(bio); | 1442 | bio_put(bio); |
1442 | } | 1443 | } |
1443 | 1444 | ||
1445 | static void bio_invalidate_vmalloc_pages(struct bio *bio) | ||
1446 | { | ||
1447 | #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE | ||
1448 | if (bio->bi_private && !op_is_write(bio_op(bio))) { | ||
1449 | unsigned long i, len = 0; | ||
1450 | |||
1451 | for (i = 0; i < bio->bi_vcnt; i++) | ||
1452 | len += bio->bi_io_vec[i].bv_len; | ||
1453 | invalidate_kernel_vmap_range(bio->bi_private, len); | ||
1454 | } | ||
1455 | #endif | ||
1456 | } | ||
1457 | |||
1444 | static void bio_map_kern_endio(struct bio *bio) | 1458 | static void bio_map_kern_endio(struct bio *bio) |
1445 | { | 1459 | { |
1460 | bio_invalidate_vmalloc_pages(bio); | ||
1446 | bio_put(bio); | 1461 | bio_put(bio); |
1447 | } | 1462 | } |
1448 | 1463 | ||
@@ -1463,6 +1478,8 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, | |||
1463 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 1478 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
1464 | unsigned long start = kaddr >> PAGE_SHIFT; | 1479 | unsigned long start = kaddr >> PAGE_SHIFT; |
1465 | const int nr_pages = end - start; | 1480 | const int nr_pages = end - start; |
1481 | bool is_vmalloc = is_vmalloc_addr(data); | ||
1482 | struct page *page; | ||
1466 | int offset, i; | 1483 | int offset, i; |
1467 | struct bio *bio; | 1484 | struct bio *bio; |
1468 | 1485 | ||
@@ -1470,6 +1487,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, | |||
1470 | if (!bio) | 1487 | if (!bio) |
1471 | return ERR_PTR(-ENOMEM); | 1488 | return ERR_PTR(-ENOMEM); |
1472 | 1489 | ||
1490 | if (is_vmalloc) { | ||
1491 | flush_kernel_vmap_range(data, len); | ||
1492 | bio->bi_private = data; | ||
1493 | } | ||
1494 | |||
1473 | offset = offset_in_page(kaddr); | 1495 | offset = offset_in_page(kaddr); |
1474 | for (i = 0; i < nr_pages; i++) { | 1496 | for (i = 0; i < nr_pages; i++) { |
1475 | unsigned int bytes = PAGE_SIZE - offset; | 1497 | unsigned int bytes = PAGE_SIZE - offset; |
@@ -1480,7 +1502,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, | |||
1480 | if (bytes > len) | 1502 | if (bytes > len) |
1481 | bytes = len; | 1503 | bytes = len; |
1482 | 1504 | ||
1483 | if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, | 1505 | if (!is_vmalloc) |
1506 | page = virt_to_page(data); | ||
1507 | else | ||
1508 | page = vmalloc_to_page(data); | ||
1509 | if (bio_add_pc_page(q, bio, page, bytes, | ||
1484 | offset) < bytes) { | 1510 | offset) < bytes) { |
1485 | /* we don't support partial mappings */ | 1511 | /* we don't support partial mappings */ |
1486 | bio_put(bio); | 1512 | bio_put(bio); |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 53b7bd4c7000..24ed26957367 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/ctype.h> | 29 | #include <linux/ctype.h> |
30 | #include <linux/blk-cgroup.h> | 30 | #include <linux/blk-cgroup.h> |
31 | #include <linux/tracehook.h> | 31 | #include <linux/tracehook.h> |
32 | #include <linux/psi.h> | ||
32 | #include "blk.h" | 33 | #include "blk.h" |
33 | 34 | ||
34 | #define MAX_KEY_LEN 100 | 35 | #define MAX_KEY_LEN 100 |
@@ -47,12 +48,14 @@ struct blkcg blkcg_root; | |||
47 | EXPORT_SYMBOL_GPL(blkcg_root); | 48 | EXPORT_SYMBOL_GPL(blkcg_root); |
48 | 49 | ||
49 | struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; | 50 | struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; |
51 | EXPORT_SYMBOL_GPL(blkcg_root_css); | ||
50 | 52 | ||
51 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 53 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
52 | 54 | ||
53 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ | 55 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ |
54 | 56 | ||
55 | static bool blkcg_debug_stats = false; | 57 | static bool blkcg_debug_stats = false; |
58 | static struct workqueue_struct *blkcg_punt_bio_wq; | ||
56 | 59 | ||
57 | static bool blkcg_policy_enabled(struct request_queue *q, | 60 | static bool blkcg_policy_enabled(struct request_queue *q, |
58 | const struct blkcg_policy *pol) | 61 | const struct blkcg_policy *pol) |
@@ -87,6 +90,8 @@ static void __blkg_release(struct rcu_head *rcu) | |||
87 | { | 90 | { |
88 | struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); | 91 | struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); |
89 | 92 | ||
93 | WARN_ON(!bio_list_empty(&blkg->async_bios)); | ||
94 | |||
90 | /* release the blkcg and parent blkg refs this blkg has been holding */ | 95 | /* release the blkcg and parent blkg refs this blkg has been holding */ |
91 | css_put(&blkg->blkcg->css); | 96 | css_put(&blkg->blkcg->css); |
92 | if (blkg->parent) | 97 | if (blkg->parent) |
@@ -112,6 +117,23 @@ static void blkg_release(struct percpu_ref *ref) | |||
112 | call_rcu(&blkg->rcu_head, __blkg_release); | 117 | call_rcu(&blkg->rcu_head, __blkg_release); |
113 | } | 118 | } |
114 | 119 | ||
120 | static void blkg_async_bio_workfn(struct work_struct *work) | ||
121 | { | ||
122 | struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, | ||
123 | async_bio_work); | ||
124 | struct bio_list bios = BIO_EMPTY_LIST; | ||
125 | struct bio *bio; | ||
126 | |||
127 | /* as long as there are pending bios, @blkg can't go away */ | ||
128 | spin_lock_bh(&blkg->async_bio_lock); | ||
129 | bio_list_merge(&bios, &blkg->async_bios); | ||
130 | bio_list_init(&blkg->async_bios); | ||
131 | spin_unlock_bh(&blkg->async_bio_lock); | ||
132 | |||
133 | while ((bio = bio_list_pop(&bios))) | ||
134 | submit_bio(bio); | ||
135 | } | ||
136 | |||
115 | /** | 137 | /** |
116 | * blkg_alloc - allocate a blkg | 138 | * blkg_alloc - allocate a blkg |
117 | * @blkcg: block cgroup the new blkg is associated with | 139 | * @blkcg: block cgroup the new blkg is associated with |
@@ -140,6 +162,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | |||
140 | 162 | ||
141 | blkg->q = q; | 163 | blkg->q = q; |
142 | INIT_LIST_HEAD(&blkg->q_node); | 164 | INIT_LIST_HEAD(&blkg->q_node); |
165 | spin_lock_init(&blkg->async_bio_lock); | ||
166 | bio_list_init(&blkg->async_bios); | ||
167 | INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); | ||
143 | blkg->blkcg = blkcg; | 168 | blkg->blkcg = blkcg; |
144 | 169 | ||
145 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 170 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
@@ -1526,6 +1551,25 @@ out_unlock: | |||
1526 | } | 1551 | } |
1527 | EXPORT_SYMBOL_GPL(blkcg_policy_unregister); | 1552 | EXPORT_SYMBOL_GPL(blkcg_policy_unregister); |
1528 | 1553 | ||
1554 | bool __blkcg_punt_bio_submit(struct bio *bio) | ||
1555 | { | ||
1556 | struct blkcg_gq *blkg = bio->bi_blkg; | ||
1557 | |||
1558 | /* consume the flag first */ | ||
1559 | bio->bi_opf &= ~REQ_CGROUP_PUNT; | ||
1560 | |||
1561 | /* never bounce for the root cgroup */ | ||
1562 | if (!blkg->parent) | ||
1563 | return false; | ||
1564 | |||
1565 | spin_lock_bh(&blkg->async_bio_lock); | ||
1566 | bio_list_add(&blkg->async_bios, bio); | ||
1567 | spin_unlock_bh(&blkg->async_bio_lock); | ||
1568 | |||
1569 | queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); | ||
1570 | return true; | ||
1571 | } | ||
1572 | |||
1529 | /* | 1573 | /* |
1530 | * Scale the accumulated delay based on how long it has been since we updated | 1574 | * Scale the accumulated delay based on how long it has been since we updated |
1531 | * the delay. We only call this when we are adding delay, in case it's been a | 1575 | * the delay. We only call this when we are adding delay, in case it's been a |
@@ -1587,6 +1631,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) | |||
1587 | */ | 1631 | */ |
1588 | static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) | 1632 | static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) |
1589 | { | 1633 | { |
1634 | unsigned long pflags; | ||
1590 | u64 now = ktime_to_ns(ktime_get()); | 1635 | u64 now = ktime_to_ns(ktime_get()); |
1591 | u64 exp; | 1636 | u64 exp; |
1592 | u64 delay_nsec = 0; | 1637 | u64 delay_nsec = 0; |
@@ -1613,11 +1658,8 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) | |||
1613 | */ | 1658 | */ |
1614 | delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); | 1659 | delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); |
1615 | 1660 | ||
1616 | /* | 1661 | if (use_memdelay) |
1617 | * TODO: the use_memdelay flag is going to be for the upcoming psi stuff | 1662 | psi_memstall_enter(&pflags); |
1618 | * that hasn't landed upstream yet. Once that stuff is in place we need | ||
1619 | * to do a psi_memstall_enter/leave if memdelay is set. | ||
1620 | */ | ||
1621 | 1663 | ||
1622 | exp = ktime_add_ns(now, delay_nsec); | 1664 | exp = ktime_add_ns(now, delay_nsec); |
1623 | tok = io_schedule_prepare(); | 1665 | tok = io_schedule_prepare(); |
@@ -1627,6 +1669,9 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) | |||
1627 | break; | 1669 | break; |
1628 | } while (!fatal_signal_pending(current)); | 1670 | } while (!fatal_signal_pending(current)); |
1629 | io_schedule_finish(tok); | 1671 | io_schedule_finish(tok); |
1672 | |||
1673 | if (use_memdelay) | ||
1674 | psi_memstall_leave(&pflags); | ||
1630 | } | 1675 | } |
1631 | 1676 | ||
1632 | /** | 1677 | /** |
@@ -1726,5 +1771,16 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta) | |||
1726 | atomic64_add(delta, &blkg->delay_nsec); | 1771 | atomic64_add(delta, &blkg->delay_nsec); |
1727 | } | 1772 | } |
1728 | 1773 | ||
1774 | static int __init blkcg_init(void) | ||
1775 | { | ||
1776 | blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", | ||
1777 | WQ_MEM_RECLAIM | WQ_FREEZABLE | | ||
1778 | WQ_UNBOUND | WQ_SYSFS, 0); | ||
1779 | if (!blkcg_punt_bio_wq) | ||
1780 | return -ENOMEM; | ||
1781 | return 0; | ||
1782 | } | ||
1783 | subsys_initcall(blkcg_init); | ||
1784 | |||
1729 | module_param(blkcg_debug_stats, bool, 0644); | 1785 | module_param(blkcg_debug_stats, bool, 0644); |
1730 | MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); | 1786 | MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); |
diff --git a/block/blk-core.c b/block/blk-core.c index 5d1fc8e17dd1..d0cc6e14d2f0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -117,6 +117,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
117 | rq->internal_tag = -1; | 117 | rq->internal_tag = -1; |
118 | rq->start_time_ns = ktime_get_ns(); | 118 | rq->start_time_ns = ktime_get_ns(); |
119 | rq->part = NULL; | 119 | rq->part = NULL; |
120 | refcount_set(&rq->ref, 1); | ||
120 | } | 121 | } |
121 | EXPORT_SYMBOL(blk_rq_init); | 122 | EXPORT_SYMBOL(blk_rq_init); |
122 | 123 | ||
@@ -687,7 +688,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | |||
687 | struct request *rq; | 688 | struct request *rq; |
688 | struct list_head *plug_list; | 689 | struct list_head *plug_list; |
689 | 690 | ||
690 | plug = current->plug; | 691 | plug = blk_mq_plug(q, bio); |
691 | if (!plug) | 692 | if (!plug) |
692 | return false; | 693 | return false; |
693 | 694 | ||
@@ -1127,6 +1128,9 @@ EXPORT_SYMBOL_GPL(direct_make_request); | |||
1127 | */ | 1128 | */ |
1128 | blk_qc_t submit_bio(struct bio *bio) | 1129 | blk_qc_t submit_bio(struct bio *bio) |
1129 | { | 1130 | { |
1131 | if (blkcg_punt_bio_submit(bio)) | ||
1132 | return BLK_QC_T_NONE; | ||
1133 | |||
1130 | /* | 1134 | /* |
1131 | * If it's a regular read/write or a barrier with data attached, | 1135 | * If it's a regular read/write or a barrier with data attached, |
1132 | * go through the normal accounting stuff before submission. | 1136 | * go through the normal accounting stuff before submission. |
diff --git a/block/blk-mq.c b/block/blk-mq.c index e5ef40c603ca..b038ec680e84 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1973,7 +1973,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1973 | 1973 | ||
1974 | blk_mq_bio_to_request(rq, bio, nr_segs); | 1974 | blk_mq_bio_to_request(rq, bio, nr_segs); |
1975 | 1975 | ||
1976 | plug = current->plug; | 1976 | plug = blk_mq_plug(q, bio); |
1977 | if (unlikely(is_flush_fua)) { | 1977 | if (unlikely(is_flush_fua)) { |
1978 | /* bypass scheduler for flush rq */ | 1978 | /* bypass scheduler for flush rq */ |
1979 | blk_insert_flush(rq); | 1979 | blk_insert_flush(rq); |
diff --git a/block/blk-mq.h b/block/blk-mq.h index f4bf5161333e..32c62c64e6c2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -233,4 +233,36 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) | |||
233 | qmap->mq_map[cpu] = 0; | 233 | qmap->mq_map[cpu] = 0; |
234 | } | 234 | } |
235 | 235 | ||
236 | /* | ||
237 | * blk_mq_plug() - Get caller context plug | ||
238 | * @q: request queue | ||
239 | * @bio : the bio being submitted by the caller context | ||
240 | * | ||
241 | * Plugging, by design, may delay the insertion of BIOs into the elevator in | ||
242 | * order to increase BIO merging opportunities. This however can cause BIO | ||
243 | * insertion order to change from the order in which submit_bio() is being | ||
244 | * executed in the case of multiple contexts concurrently issuing BIOs to a | ||
245 | * device, even if these context are synchronized to tightly control BIO issuing | ||
246 | * order. While this is not a problem with regular block devices, this ordering | ||
247 | * change can cause write BIO failures with zoned block devices as these | ||
248 | * require sequential write patterns to zones. Prevent this from happening by | ||
249 | * ignoring the plug state of a BIO issuing context if the target request queue | ||
250 | * is for a zoned block device and the BIO to plug is a write operation. | ||
251 | * | ||
252 | * Return current->plug if the bio can be plugged and NULL otherwise | ||
253 | */ | ||
254 | static inline struct blk_plug *blk_mq_plug(struct request_queue *q, | ||
255 | struct bio *bio) | ||
256 | { | ||
257 | /* | ||
258 | * For regular block devices or read operations, use the context plug | ||
259 | * which may be NULL if blk_start_plug() was not executed. | ||
260 | */ | ||
261 | if (!blk_queue_is_zoned(q) || !op_is_write(bio_op(bio))) | ||
262 | return current->plug; | ||
263 | |||
264 | /* Zoned block device write operation case: do not plug the BIO */ | ||
265 | return NULL; | ||
266 | } | ||
267 | |||
236 | #endif | 268 | #endif |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 9ea7c0ecad10..8ab6c8153223 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -881,13 +881,10 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, | |||
881 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; | 881 | unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; |
882 | u64 tmp; | 882 | u64 tmp; |
883 | 883 | ||
884 | jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; | 884 | jiffy_elapsed = jiffies - tg->slice_start[rw]; |
885 | |||
886 | /* Slice has just started. Consider one slice interval */ | ||
887 | if (!jiffy_elapsed) | ||
888 | jiffy_elapsed_rnd = tg->td->throtl_slice; | ||
889 | 885 | ||
890 | jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); | 886 | /* Round up to the next throttle slice, wait time must be nonzero */ |
887 | jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice); | ||
891 | 888 | ||
892 | /* | 889 | /* |
893 | * jiffy_elapsed_rnd should not be a big value as minimum iops can be | 890 | * jiffy_elapsed_rnd should not be a big value as minimum iops can be |
diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ae7e91bd0618..6c503824ba3f 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c | |||
@@ -14,6 +14,9 @@ | |||
14 | #include <linux/rbtree.h> | 14 | #include <linux/rbtree.h> |
15 | #include <linux/blkdev.h> | 15 | #include <linux/blkdev.h> |
16 | #include <linux/blk-mq.h> | 16 | #include <linux/blk-mq.h> |
17 | #include <linux/mm.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | #include <linux/sched/mm.h> | ||
17 | 20 | ||
18 | #include "blk.h" | 21 | #include "blk.h" |
19 | 22 | ||
@@ -70,7 +73,7 @@ EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); | |||
70 | static inline unsigned int __blkdev_nr_zones(struct request_queue *q, | 73 | static inline unsigned int __blkdev_nr_zones(struct request_queue *q, |
71 | sector_t nr_sectors) | 74 | sector_t nr_sectors) |
72 | { | 75 | { |
73 | unsigned long zone_sectors = blk_queue_zone_sectors(q); | 76 | sector_t zone_sectors = blk_queue_zone_sectors(q); |
74 | 77 | ||
75 | return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors); | 78 | return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors); |
76 | } | 79 | } |
@@ -117,8 +120,7 @@ static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep) | |||
117 | } | 120 | } |
118 | 121 | ||
119 | static int blk_report_zones(struct gendisk *disk, sector_t sector, | 122 | static int blk_report_zones(struct gendisk *disk, sector_t sector, |
120 | struct blk_zone *zones, unsigned int *nr_zones, | 123 | struct blk_zone *zones, unsigned int *nr_zones) |
121 | gfp_t gfp_mask) | ||
122 | { | 124 | { |
123 | struct request_queue *q = disk->queue; | 125 | struct request_queue *q = disk->queue; |
124 | unsigned int z = 0, n, nrz = *nr_zones; | 126 | unsigned int z = 0, n, nrz = *nr_zones; |
@@ -127,8 +129,7 @@ static int blk_report_zones(struct gendisk *disk, sector_t sector, | |||
127 | 129 | ||
128 | while (z < nrz && sector < capacity) { | 130 | while (z < nrz && sector < capacity) { |
129 | n = nrz - z; | 131 | n = nrz - z; |
130 | ret = disk->fops->report_zones(disk, sector, &zones[z], &n, | 132 | ret = disk->fops->report_zones(disk, sector, &zones[z], &n); |
131 | gfp_mask); | ||
132 | if (ret) | 133 | if (ret) |
133 | return ret; | 134 | return ret; |
134 | if (!n) | 135 | if (!n) |
@@ -149,17 +150,18 @@ static int blk_report_zones(struct gendisk *disk, sector_t sector, | |||
149 | * @sector: Sector from which to report zones | 150 | * @sector: Sector from which to report zones |
150 | * @zones: Array of zone structures where to return the zones information | 151 | * @zones: Array of zone structures where to return the zones information |
151 | * @nr_zones: Number of zone structures in the zone array | 152 | * @nr_zones: Number of zone structures in the zone array |
152 | * @gfp_mask: Memory allocation flags (for bio_alloc) | ||
153 | * | 153 | * |
154 | * Description: | 154 | * Description: |
155 | * Get zone information starting from the zone containing @sector. | 155 | * Get zone information starting from the zone containing @sector. |
156 | * The number of zone information reported may be less than the number | 156 | * The number of zone information reported may be less than the number |
157 | * requested by @nr_zones. The number of zones actually reported is | 157 | * requested by @nr_zones. The number of zones actually reported is |
158 | * returned in @nr_zones. | 158 | * returned in @nr_zones. |
159 | * The caller must use memalloc_noXX_save/restore() calls to control | ||
160 | * memory allocations done within this function (zone array and command | ||
161 | * buffer allocation by the device driver). | ||
159 | */ | 162 | */ |
160 | int blkdev_report_zones(struct block_device *bdev, sector_t sector, | 163 | int blkdev_report_zones(struct block_device *bdev, sector_t sector, |
161 | struct blk_zone *zones, unsigned int *nr_zones, | 164 | struct blk_zone *zones, unsigned int *nr_zones) |
162 | gfp_t gfp_mask) | ||
163 | { | 165 | { |
164 | struct request_queue *q = bdev_get_queue(bdev); | 166 | struct request_queue *q = bdev_get_queue(bdev); |
165 | unsigned int i, nrz; | 167 | unsigned int i, nrz; |
@@ -184,7 +186,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, | |||
184 | nrz = min(*nr_zones, | 186 | nrz = min(*nr_zones, |
185 | __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector)); | 187 | __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector)); |
186 | ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector, | 188 | ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector, |
187 | zones, &nrz, gfp_mask); | 189 | zones, &nrz); |
188 | if (ret) | 190 | if (ret) |
189 | return ret; | 191 | return ret; |
190 | 192 | ||
@@ -305,9 +307,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, | |||
305 | if (!zones) | 307 | if (!zones) |
306 | return -ENOMEM; | 308 | return -ENOMEM; |
307 | 309 | ||
308 | ret = blkdev_report_zones(bdev, rep.sector, | 310 | ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones); |
309 | zones, &rep.nr_zones, | ||
310 | GFP_KERNEL); | ||
311 | if (ret) | 311 | if (ret) |
312 | goto out; | 312 | goto out; |
313 | 313 | ||
@@ -373,22 +373,25 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node, | |||
373 | * Allocate an array of struct blk_zone to get nr_zones zone information. | 373 | * Allocate an array of struct blk_zone to get nr_zones zone information. |
374 | * The allocated array may be smaller than nr_zones. | 374 | * The allocated array may be smaller than nr_zones. |
375 | */ | 375 | */ |
376 | static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones) | 376 | static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones) |
377 | { | 377 | { |
378 | size_t size = *nr_zones * sizeof(struct blk_zone); | 378 | struct blk_zone *zones; |
379 | struct page *page; | 379 | size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES); |
380 | int order; | 380 | |
381 | 381 | /* | |
382 | for (order = get_order(size); order >= 0; order--) { | 382 | * GFP_KERNEL here is meaningless as the caller task context has |
383 | page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order); | 383 | * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones() |
384 | if (page) { | 384 | * with memalloc_noio_save(). |
385 | *nr_zones = min_t(unsigned int, *nr_zones, | 385 | */ |
386 | (PAGE_SIZE << order) / sizeof(struct blk_zone)); | 386 | zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL); |
387 | return page_address(page); | 387 | if (!zones) { |
388 | } | 388 | *nr_zones = 0; |
389 | return NULL; | ||
389 | } | 390 | } |
390 | 391 | ||
391 | return NULL; | 392 | *nr_zones = nrz; |
393 | |||
394 | return zones; | ||
392 | } | 395 | } |
393 | 396 | ||
394 | void blk_queue_free_zone_bitmaps(struct request_queue *q) | 397 | void blk_queue_free_zone_bitmaps(struct request_queue *q) |
@@ -415,6 +418,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) | |||
415 | unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; | 418 | unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; |
416 | unsigned int i, rep_nr_zones = 0, z = 0, nrz; | 419 | unsigned int i, rep_nr_zones = 0, z = 0, nrz; |
417 | struct blk_zone *zones = NULL; | 420 | struct blk_zone *zones = NULL; |
421 | unsigned int noio_flag; | ||
418 | sector_t sector = 0; | 422 | sector_t sector = 0; |
419 | int ret = 0; | 423 | int ret = 0; |
420 | 424 | ||
@@ -427,6 +431,12 @@ int blk_revalidate_disk_zones(struct gendisk *disk) | |||
427 | return 0; | 431 | return 0; |
428 | } | 432 | } |
429 | 433 | ||
434 | /* | ||
435 | * Ensure that all memory allocations in this context are done as | ||
436 | * if GFP_NOIO was specified. | ||
437 | */ | ||
438 | noio_flag = memalloc_noio_save(); | ||
439 | |||
430 | if (!blk_queue_is_zoned(q) || !nr_zones) { | 440 | if (!blk_queue_is_zoned(q) || !nr_zones) { |
431 | nr_zones = 0; | 441 | nr_zones = 0; |
432 | goto update; | 442 | goto update; |
@@ -443,13 +453,13 @@ int blk_revalidate_disk_zones(struct gendisk *disk) | |||
443 | 453 | ||
444 | /* Get zone information and initialize seq_zones_bitmap */ | 454 | /* Get zone information and initialize seq_zones_bitmap */ |
445 | rep_nr_zones = nr_zones; | 455 | rep_nr_zones = nr_zones; |
446 | zones = blk_alloc_zones(q->node, &rep_nr_zones); | 456 | zones = blk_alloc_zones(&rep_nr_zones); |
447 | if (!zones) | 457 | if (!zones) |
448 | goto out; | 458 | goto out; |
449 | 459 | ||
450 | while (z < nr_zones) { | 460 | while (z < nr_zones) { |
451 | nrz = min(nr_zones - z, rep_nr_zones); | 461 | nrz = min(nr_zones - z, rep_nr_zones); |
452 | ret = blk_report_zones(disk, sector, zones, &nrz, GFP_NOIO); | 462 | ret = blk_report_zones(disk, sector, zones, &nrz); |
453 | if (ret) | 463 | if (ret) |
454 | goto out; | 464 | goto out; |
455 | if (!nrz) | 465 | if (!nrz) |
@@ -480,8 +490,9 @@ update: | |||
480 | blk_mq_unfreeze_queue(q); | 490 | blk_mq_unfreeze_queue(q); |
481 | 491 | ||
482 | out: | 492 | out: |
483 | free_pages((unsigned long)zones, | 493 | memalloc_noio_restore(noio_flag); |
484 | get_order(rep_nr_zones * sizeof(struct blk_zone))); | 494 | |
495 | kvfree(zones); | ||
485 | kfree(seq_zones_wlock); | 496 | kfree(seq_zones_wlock); |
486 | kfree(seq_zones_bitmap); | 497 | kfree(seq_zones_bitmap); |
487 | 498 | ||
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3a9bca3aa093..9bcde2325893 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -134,6 +134,8 @@ static struct dentry *nbd_dbg_dir; | |||
134 | 134 | ||
135 | #define NBD_MAGIC 0x68797548 | 135 | #define NBD_MAGIC 0x68797548 |
136 | 136 | ||
137 | #define NBD_DEF_BLKSIZE 1024 | ||
138 | |||
137 | static unsigned int nbds_max = 16; | 139 | static unsigned int nbds_max = 16; |
138 | static int max_part = 16; | 140 | static int max_part = 16; |
139 | static struct workqueue_struct *recv_workqueue; | 141 | static struct workqueue_struct *recv_workqueue; |
@@ -1236,6 +1238,14 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, | |||
1236 | nbd_config_put(nbd); | 1238 | nbd_config_put(nbd); |
1237 | } | 1239 | } |
1238 | 1240 | ||
1241 | static bool nbd_is_valid_blksize(unsigned long blksize) | ||
1242 | { | ||
1243 | if (!blksize || !is_power_of_2(blksize) || blksize < 512 || | ||
1244 | blksize > PAGE_SIZE) | ||
1245 | return false; | ||
1246 | return true; | ||
1247 | } | ||
1248 | |||
1239 | /* Must be called with config_lock held */ | 1249 | /* Must be called with config_lock held */ |
1240 | static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | 1250 | static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, |
1241 | unsigned int cmd, unsigned long arg) | 1251 | unsigned int cmd, unsigned long arg) |
@@ -1251,8 +1261,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, | |||
1251 | case NBD_SET_SOCK: | 1261 | case NBD_SET_SOCK: |
1252 | return nbd_add_socket(nbd, arg, false); | 1262 | return nbd_add_socket(nbd, arg, false); |
1253 | case NBD_SET_BLKSIZE: | 1263 | case NBD_SET_BLKSIZE: |
1254 | if (!arg || !is_power_of_2(arg) || arg < 512 || | 1264 | if (!arg) |
1255 | arg > PAGE_SIZE) | 1265 | arg = NBD_DEF_BLKSIZE; |
1266 | if (!nbd_is_valid_blksize(arg)) | ||
1256 | return -EINVAL; | 1267 | return -EINVAL; |
1257 | nbd_size_set(nbd, arg, | 1268 | nbd_size_set(nbd, arg, |
1258 | div_s64(config->bytesize, arg)); | 1269 | div_s64(config->bytesize, arg)); |
@@ -1332,7 +1343,7 @@ static struct nbd_config *nbd_alloc_config(void) | |||
1332 | atomic_set(&config->recv_threads, 0); | 1343 | atomic_set(&config->recv_threads, 0); |
1333 | init_waitqueue_head(&config->recv_wq); | 1344 | init_waitqueue_head(&config->recv_wq); |
1334 | init_waitqueue_head(&config->conn_wait); | 1345 | init_waitqueue_head(&config->conn_wait); |
1335 | config->blksize = 1024; | 1346 | config->blksize = NBD_DEF_BLKSIZE; |
1336 | atomic_set(&config->live_connections, 0); | 1347 | atomic_set(&config->live_connections, 0); |
1337 | try_module_get(THIS_MODULE); | 1348 | try_module_get(THIS_MODULE); |
1338 | return config; | 1349 | return config; |
@@ -1673,6 +1684,30 @@ nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { | |||
1673 | [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, | 1684 | [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, |
1674 | }; | 1685 | }; |
1675 | 1686 | ||
1687 | static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd) | ||
1688 | { | ||
1689 | struct nbd_config *config = nbd->config; | ||
1690 | u64 bsize = config->blksize; | ||
1691 | u64 bytes = config->bytesize; | ||
1692 | |||
1693 | if (info->attrs[NBD_ATTR_SIZE_BYTES]) | ||
1694 | bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); | ||
1695 | |||
1696 | if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { | ||
1697 | bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); | ||
1698 | if (!bsize) | ||
1699 | bsize = NBD_DEF_BLKSIZE; | ||
1700 | if (!nbd_is_valid_blksize(bsize)) { | ||
1701 | printk(KERN_ERR "Invalid block size %llu\n", bsize); | ||
1702 | return -EINVAL; | ||
1703 | } | ||
1704 | } | ||
1705 | |||
1706 | if (bytes != config->bytesize || bsize != config->blksize) | ||
1707 | nbd_size_set(nbd, bsize, div64_u64(bytes, bsize)); | ||
1708 | return 0; | ||
1709 | } | ||
1710 | |||
1676 | static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) | 1711 | static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) |
1677 | { | 1712 | { |
1678 | struct nbd_device *nbd = NULL; | 1713 | struct nbd_device *nbd = NULL; |
@@ -1760,16 +1795,10 @@ again: | |||
1760 | refcount_set(&nbd->config_refs, 1); | 1795 | refcount_set(&nbd->config_refs, 1); |
1761 | set_bit(NBD_BOUND, &config->runtime_flags); | 1796 | set_bit(NBD_BOUND, &config->runtime_flags); |
1762 | 1797 | ||
1763 | if (info->attrs[NBD_ATTR_SIZE_BYTES]) { | 1798 | ret = nbd_genl_size_set(info, nbd); |
1764 | u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); | 1799 | if (ret) |
1765 | nbd_size_set(nbd, config->blksize, | 1800 | goto out; |
1766 | div64_u64(bytes, config->blksize)); | 1801 | |
1767 | } | ||
1768 | if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { | ||
1769 | u64 bsize = | ||
1770 | nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); | ||
1771 | nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); | ||
1772 | } | ||
1773 | if (info->attrs[NBD_ATTR_TIMEOUT]) { | 1802 | if (info->attrs[NBD_ATTR_TIMEOUT]) { |
1774 | u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); | 1803 | u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); |
1775 | nbd->tag_set.timeout = timeout * HZ; | 1804 | nbd->tag_set.timeout = timeout * HZ; |
@@ -1938,6 +1967,10 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) | |||
1938 | goto out; | 1967 | goto out; |
1939 | } | 1968 | } |
1940 | 1969 | ||
1970 | ret = nbd_genl_size_set(info, nbd); | ||
1971 | if (ret) | ||
1972 | goto out; | ||
1973 | |||
1941 | if (info->attrs[NBD_ATTR_TIMEOUT]) { | 1974 | if (info->attrs[NBD_ATTR_TIMEOUT]) { |
1942 | u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); | 1975 | u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); |
1943 | nbd->tag_set.timeout = timeout * HZ; | 1976 | nbd->tag_set.timeout = timeout * HZ; |
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index 34b22d6523ba..a1b9929bd911 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h | |||
@@ -89,8 +89,7 @@ struct nullb { | |||
89 | int null_zone_init(struct nullb_device *dev); | 89 | int null_zone_init(struct nullb_device *dev); |
90 | void null_zone_exit(struct nullb_device *dev); | 90 | void null_zone_exit(struct nullb_device *dev); |
91 | int null_zone_report(struct gendisk *disk, sector_t sector, | 91 | int null_zone_report(struct gendisk *disk, sector_t sector, |
92 | struct blk_zone *zones, unsigned int *nr_zones, | 92 | struct blk_zone *zones, unsigned int *nr_zones); |
93 | gfp_t gfp_mask); | ||
94 | void null_zone_write(struct nullb_cmd *cmd, sector_t sector, | 93 | void null_zone_write(struct nullb_cmd *cmd, sector_t sector, |
95 | unsigned int nr_sectors); | 94 | unsigned int nr_sectors); |
96 | void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); | 95 | void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); |
@@ -103,7 +102,7 @@ static inline int null_zone_init(struct nullb_device *dev) | |||
103 | static inline void null_zone_exit(struct nullb_device *dev) {} | 102 | static inline void null_zone_exit(struct nullb_device *dev) {} |
104 | static inline int null_zone_report(struct gendisk *disk, sector_t sector, | 103 | static inline int null_zone_report(struct gendisk *disk, sector_t sector, |
105 | struct blk_zone *zones, | 104 | struct blk_zone *zones, |
106 | unsigned int *nr_zones, gfp_t gfp_mask) | 105 | unsigned int *nr_zones) |
107 | { | 106 | { |
108 | return -EOPNOTSUPP; | 107 | return -EOPNOTSUPP; |
109 | } | 108 | } |
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index fca0c97ff1aa..cb28d93f2bd1 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c | |||
@@ -67,8 +67,7 @@ void null_zone_exit(struct nullb_device *dev) | |||
67 | } | 67 | } |
68 | 68 | ||
69 | int null_zone_report(struct gendisk *disk, sector_t sector, | 69 | int null_zone_report(struct gendisk *disk, sector_t sector, |
70 | struct blk_zone *zones, unsigned int *nr_zones, | 70 | struct blk_zone *zones, unsigned int *nr_zones) |
71 | gfp_t gfp_mask) | ||
72 | { | 71 | { |
73 | struct nullb *nullb = disk->private_data; | 72 | struct nullb *nullb = disk->private_data; |
74 | struct nullb_device *dev = nullb->dev; | 73 | struct nullb_device *dev = nullb->dev; |
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index a9bc518156f2..2900fbde89b3 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c | |||
@@ -461,15 +461,14 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev | |||
461 | 461 | ||
462 | #ifdef CONFIG_BLK_DEV_ZONED | 462 | #ifdef CONFIG_BLK_DEV_ZONED |
463 | static int flakey_report_zones(struct dm_target *ti, sector_t sector, | 463 | static int flakey_report_zones(struct dm_target *ti, sector_t sector, |
464 | struct blk_zone *zones, unsigned int *nr_zones, | 464 | struct blk_zone *zones, unsigned int *nr_zones) |
465 | gfp_t gfp_mask) | ||
466 | { | 465 | { |
467 | struct flakey_c *fc = ti->private; | 466 | struct flakey_c *fc = ti->private; |
468 | int ret; | 467 | int ret; |
469 | 468 | ||
470 | /* Do report and remap it */ | 469 | /* Do report and remap it */ |
471 | ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector), | 470 | ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector), |
472 | zones, nr_zones, gfp_mask); | 471 | zones, nr_zones); |
473 | if (ret != 0) | 472 | if (ret != 0) |
474 | return ret; | 473 | return ret; |
475 | 474 | ||
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index ad980a38fb1e..ecefe6703736 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
@@ -137,15 +137,14 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev | |||
137 | 137 | ||
138 | #ifdef CONFIG_BLK_DEV_ZONED | 138 | #ifdef CONFIG_BLK_DEV_ZONED |
139 | static int linear_report_zones(struct dm_target *ti, sector_t sector, | 139 | static int linear_report_zones(struct dm_target *ti, sector_t sector, |
140 | struct blk_zone *zones, unsigned int *nr_zones, | 140 | struct blk_zone *zones, unsigned int *nr_zones) |
141 | gfp_t gfp_mask) | ||
142 | { | 141 | { |
143 | struct linear_c *lc = (struct linear_c *) ti->private; | 142 | struct linear_c *lc = (struct linear_c *) ti->private; |
144 | int ret; | 143 | int ret; |
145 | 144 | ||
146 | /* Do report and remap it */ | 145 | /* Do report and remap it */ |
147 | ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector), | 146 | ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector), |
148 | zones, nr_zones, gfp_mask); | 147 | zones, nr_zones); |
149 | if (ret != 0) | 148 | if (ret != 0) |
150 | return ret; | 149 | return ret; |
151 | 150 | ||
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index d8334cd45d7c..9faf3e49c7af 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/crc32.h> | 10 | #include <linux/crc32.h> |
11 | #include <linux/sched/mm.h> | ||
11 | 12 | ||
12 | #define DM_MSG_PREFIX "zoned metadata" | 13 | #define DM_MSG_PREFIX "zoned metadata" |
13 | 14 | ||
@@ -1162,8 +1163,7 @@ static int dmz_init_zones(struct dmz_metadata *zmd) | |||
1162 | while (sector < dev->capacity) { | 1163 | while (sector < dev->capacity) { |
1163 | /* Get zone information */ | 1164 | /* Get zone information */ |
1164 | nr_blkz = DMZ_REPORT_NR_ZONES; | 1165 | nr_blkz = DMZ_REPORT_NR_ZONES; |
1165 | ret = blkdev_report_zones(dev->bdev, sector, blkz, | 1166 | ret = blkdev_report_zones(dev->bdev, sector, blkz, &nr_blkz); |
1166 | &nr_blkz, GFP_KERNEL); | ||
1167 | if (ret) { | 1167 | if (ret) { |
1168 | dmz_dev_err(dev, "Report zones failed %d", ret); | 1168 | dmz_dev_err(dev, "Report zones failed %d", ret); |
1169 | goto out; | 1169 | goto out; |
@@ -1201,12 +1201,20 @@ out: | |||
1201 | static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) | 1201 | static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) |
1202 | { | 1202 | { |
1203 | unsigned int nr_blkz = 1; | 1203 | unsigned int nr_blkz = 1; |
1204 | unsigned int noio_flag; | ||
1204 | struct blk_zone blkz; | 1205 | struct blk_zone blkz; |
1205 | int ret; | 1206 | int ret; |
1206 | 1207 | ||
1207 | /* Get zone information from disk */ | 1208 | /* |
1209 | * Get zone information from disk. Since blkdev_report_zones() uses | ||
1210 | * GFP_KERNEL by default for memory allocations, set the per-task | ||
1211 | * PF_MEMALLOC_NOIO flag so that all allocations are done as if | ||
1212 | * GFP_NOIO was specified. | ||
1213 | */ | ||
1214 | noio_flag = memalloc_noio_save(); | ||
1208 | ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), | 1215 | ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), |
1209 | &blkz, &nr_blkz, GFP_NOIO); | 1216 | &blkz, &nr_blkz); |
1217 | memalloc_noio_restore(noio_flag); | ||
1210 | if (!nr_blkz) | 1218 | if (!nr_blkz) |
1211 | ret = -EIO; | 1219 | ret = -EIO; |
1212 | if (ret) { | 1220 | if (ret) { |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5475081dcbd6..61f1152b74e9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -441,8 +441,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) | |||
441 | } | 441 | } |
442 | 442 | ||
443 | static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, | 443 | static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, |
444 | struct blk_zone *zones, unsigned int *nr_zones, | 444 | struct blk_zone *zones, unsigned int *nr_zones) |
445 | gfp_t gfp_mask) | ||
446 | { | 445 | { |
447 | #ifdef CONFIG_BLK_DEV_ZONED | 446 | #ifdef CONFIG_BLK_DEV_ZONED |
448 | struct mapped_device *md = disk->private_data; | 447 | struct mapped_device *md = disk->private_data; |
@@ -480,8 +479,7 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, | |||
480 | * So there is no need to loop here trying to fill the entire array | 479 | * So there is no need to loop here trying to fill the entire array |
481 | * of zones. | 480 | * of zones. |
482 | */ | 481 | */ |
483 | ret = tgt->type->report_zones(tgt, sector, zones, | 482 | ret = tgt->type->report_zones(tgt, sector, zones, nr_zones); |
484 | nr_zones, gfp_mask); | ||
485 | 483 | ||
486 | out: | 484 | out: |
487 | dm_put_live_table(md, srcu_idx); | 485 | dm_put_live_table(md, srcu_idx); |
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b2dd4e391f5c..cc09b81fc7f4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/hdreg.h> | 11 | #include <linux/hdreg.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/backing-dev.h> | ||
14 | #include <linux/list_sort.h> | 15 | #include <linux/list_sort.h> |
15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
16 | #include <linux/types.h> | 17 | #include <linux/types.h> |
@@ -1626,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk, | |||
1626 | { | 1627 | { |
1627 | sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); | 1628 | sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); |
1628 | unsigned short bs = 1 << ns->lba_shift; | 1629 | unsigned short bs = 1 << ns->lba_shift; |
1630 | u32 atomic_bs, phys_bs, io_opt; | ||
1629 | 1631 | ||
1630 | if (ns->lba_shift > PAGE_SHIFT) { | 1632 | if (ns->lba_shift > PAGE_SHIFT) { |
1631 | /* unsupported block size, set capacity to 0 later */ | 1633 | /* unsupported block size, set capacity to 0 later */ |
@@ -1634,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk, | |||
1634 | blk_mq_freeze_queue(disk->queue); | 1636 | blk_mq_freeze_queue(disk->queue); |
1635 | blk_integrity_unregister(disk); | 1637 | blk_integrity_unregister(disk); |
1636 | 1638 | ||
1639 | if (id->nabo == 0) { | ||
1640 | /* | ||
1641 | * Bit 1 indicates whether NAWUPF is defined for this namespace | ||
1642 | * and whether it should be used instead of AWUPF. If NAWUPF == | ||
1643 | * 0 then AWUPF must be used instead. | ||
1644 | */ | ||
1645 | if (id->nsfeat & (1 << 1) && id->nawupf) | ||
1646 | atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; | ||
1647 | else | ||
1648 | atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; | ||
1649 | } else { | ||
1650 | atomic_bs = bs; | ||
1651 | } | ||
1652 | phys_bs = bs; | ||
1653 | io_opt = bs; | ||
1654 | if (id->nsfeat & (1 << 4)) { | ||
1655 | /* NPWG = Namespace Preferred Write Granularity */ | ||
1656 | phys_bs *= 1 + le16_to_cpu(id->npwg); | ||
1657 | /* NOWS = Namespace Optimal Write Size */ | ||
1658 | io_opt *= 1 + le16_to_cpu(id->nows); | ||
1659 | } | ||
1660 | |||
1637 | blk_queue_logical_block_size(disk->queue, bs); | 1661 | blk_queue_logical_block_size(disk->queue, bs); |
1638 | blk_queue_physical_block_size(disk->queue, bs); | 1662 | /* |
1639 | blk_queue_io_min(disk->queue, bs); | 1663 | * Linux filesystems assume writing a single physical block is |
1664 | * an atomic operation. Hence limit the physical block size to the | ||
1665 | * value of the Atomic Write Unit Power Fail parameter. | ||
1666 | */ | ||
1667 | blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs)); | ||
1668 | blk_queue_io_min(disk->queue, phys_bs); | ||
1669 | blk_queue_io_opt(disk->queue, io_opt); | ||
1640 | 1670 | ||
1641 | if (ns->ms && !ns->ext && | 1671 | if (ns->ms && !ns->ext && |
1642 | (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) | 1672 | (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) |
@@ -2386,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, | |||
2386 | lockdep_assert_held(&nvme_subsystems_lock); | 2416 | lockdep_assert_held(&nvme_subsystems_lock); |
2387 | 2417 | ||
2388 | list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { | 2418 | list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { |
2389 | if (ctrl->state == NVME_CTRL_DELETING || | 2419 | if (tmp->state == NVME_CTRL_DELETING || |
2390 | ctrl->state == NVME_CTRL_DEAD) | 2420 | tmp->state == NVME_CTRL_DEAD) |
2391 | continue; | 2421 | continue; |
2392 | 2422 | ||
2393 | if (tmp->cntlid == ctrl->cntlid) { | 2423 | if (tmp->cntlid == ctrl->cntlid) { |
@@ -2433,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) | |||
2433 | memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); | 2463 | memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); |
2434 | subsys->vendor_id = le16_to_cpu(id->vid); | 2464 | subsys->vendor_id = le16_to_cpu(id->vid); |
2435 | subsys->cmic = id->cmic; | 2465 | subsys->cmic = id->cmic; |
2466 | subsys->awupf = le16_to_cpu(id->awupf); | ||
2436 | #ifdef CONFIG_NVME_MULTIPATH | 2467 | #ifdef CONFIG_NVME_MULTIPATH |
2437 | subsys->iopolicy = NVME_IOPOLICY_NUMA; | 2468 | subsys->iopolicy = NVME_IOPOLICY_NUMA; |
2438 | #endif | 2469 | #endif |
@@ -3274,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) | |||
3274 | goto out_free_ns; | 3305 | goto out_free_ns; |
3275 | } | 3306 | } |
3276 | 3307 | ||
3308 | if (ctrl->opts && ctrl->opts->data_digest) | ||
3309 | ns->queue->backing_dev_info->capabilities | ||
3310 | |= BDI_CAP_STABLE_WRITES; | ||
3311 | |||
3277 | blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); | 3312 | blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); |
3278 | if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) | 3313 | if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) |
3279 | blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); | 3314 | blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); |
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index dcb2b799966f..232d8094091b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c | |||
@@ -204,6 +204,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt); | |||
204 | 204 | ||
205 | static struct workqueue_struct *nvme_fc_wq; | 205 | static struct workqueue_struct *nvme_fc_wq; |
206 | 206 | ||
207 | static bool nvme_fc_waiting_to_unload; | ||
208 | static DECLARE_COMPLETION(nvme_fc_unload_proceed); | ||
209 | |||
207 | /* | 210 | /* |
208 | * These items are short-term. They will eventually be moved into | 211 | * These items are short-term. They will eventually be moved into |
209 | * a generic FC class. See comments in module init. | 212 | * a generic FC class. See comments in module init. |
@@ -229,6 +232,8 @@ nvme_fc_free_lport(struct kref *ref) | |||
229 | /* remove from transport list */ | 232 | /* remove from transport list */ |
230 | spin_lock_irqsave(&nvme_fc_lock, flags); | 233 | spin_lock_irqsave(&nvme_fc_lock, flags); |
231 | list_del(&lport->port_list); | 234 | list_del(&lport->port_list); |
235 | if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) | ||
236 | complete(&nvme_fc_unload_proceed); | ||
232 | spin_unlock_irqrestore(&nvme_fc_lock, flags); | 237 | spin_unlock_irqrestore(&nvme_fc_lock, flags); |
233 | 238 | ||
234 | ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); | 239 | ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); |
@@ -3457,11 +3462,51 @@ out_destroy_wq: | |||
3457 | return ret; | 3462 | return ret; |
3458 | } | 3463 | } |
3459 | 3464 | ||
3465 | static void | ||
3466 | nvme_fc_delete_controllers(struct nvme_fc_rport *rport) | ||
3467 | { | ||
3468 | struct nvme_fc_ctrl *ctrl; | ||
3469 | |||
3470 | spin_lock(&rport->lock); | ||
3471 | list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { | ||
3472 | dev_warn(ctrl->ctrl.device, | ||
3473 | "NVME-FC{%d}: transport unloading: deleting ctrl\n", | ||
3474 | ctrl->cnum); | ||
3475 | nvme_delete_ctrl(&ctrl->ctrl); | ||
3476 | } | ||
3477 | spin_unlock(&rport->lock); | ||
3478 | } | ||
3479 | |||
3480 | static void | ||
3481 | nvme_fc_cleanup_for_unload(void) | ||
3482 | { | ||
3483 | struct nvme_fc_lport *lport; | ||
3484 | struct nvme_fc_rport *rport; | ||
3485 | |||
3486 | list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { | ||
3487 | list_for_each_entry(rport, &lport->endp_list, endp_list) { | ||
3488 | nvme_fc_delete_controllers(rport); | ||
3489 | } | ||
3490 | } | ||
3491 | } | ||
3492 | |||
3460 | static void __exit nvme_fc_exit_module(void) | 3493 | static void __exit nvme_fc_exit_module(void) |
3461 | { | 3494 | { |
3462 | /* sanity check - all lports should be removed */ | 3495 | unsigned long flags; |
3463 | if (!list_empty(&nvme_fc_lport_list)) | 3496 | bool need_cleanup = false; |
3464 | pr_warn("%s: localport list not empty\n", __func__); | 3497 | |
3498 | spin_lock_irqsave(&nvme_fc_lock, flags); | ||
3499 | nvme_fc_waiting_to_unload = true; | ||
3500 | if (!list_empty(&nvme_fc_lport_list)) { | ||
3501 | need_cleanup = true; | ||
3502 | nvme_fc_cleanup_for_unload(); | ||
3503 | } | ||
3504 | spin_unlock_irqrestore(&nvme_fc_lock, flags); | ||
3505 | if (need_cleanup) { | ||
3506 | pr_info("%s: waiting for ctlr deletes\n", __func__); | ||
3507 | wait_for_completion(&nvme_fc_unload_proceed); | ||
3508 | pr_info("%s: ctrl deletes complete\n", __func__); | ||
3509 | } | ||
3465 | 3510 | ||
3466 | nvmf_unregister_transport(&nvme_fc_transport); | 3511 | nvmf_unregister_transport(&nvme_fc_transport); |
3467 | 3512 | ||
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 499acf07d61a..a9a927677970 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c | |||
@@ -123,14 +123,20 @@ void nvme_mpath_clear_current_path(struct nvme_ns *ns) | |||
123 | } | 123 | } |
124 | } | 124 | } |
125 | 125 | ||
126 | static bool nvme_path_is_disabled(struct nvme_ns *ns) | ||
127 | { | ||
128 | return ns->ctrl->state != NVME_CTRL_LIVE || | ||
129 | test_bit(NVME_NS_ANA_PENDING, &ns->flags) || | ||
130 | test_bit(NVME_NS_REMOVING, &ns->flags); | ||
131 | } | ||
132 | |||
126 | static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) | 133 | static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) |
127 | { | 134 | { |
128 | int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; | 135 | int found_distance = INT_MAX, fallback_distance = INT_MAX, distance; |
129 | struct nvme_ns *found = NULL, *fallback = NULL, *ns; | 136 | struct nvme_ns *found = NULL, *fallback = NULL, *ns; |
130 | 137 | ||
131 | list_for_each_entry_rcu(ns, &head->list, siblings) { | 138 | list_for_each_entry_rcu(ns, &head->list, siblings) { |
132 | if (ns->ctrl->state != NVME_CTRL_LIVE || | 139 | if (nvme_path_is_disabled(ns)) |
133 | test_bit(NVME_NS_ANA_PENDING, &ns->flags)) | ||
134 | continue; | 140 | continue; |
135 | 141 | ||
136 | if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) | 142 | if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) |
@@ -178,14 +184,16 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, | |||
178 | { | 184 | { |
179 | struct nvme_ns *ns, *found, *fallback = NULL; | 185 | struct nvme_ns *ns, *found, *fallback = NULL; |
180 | 186 | ||
181 | if (list_is_singular(&head->list)) | 187 | if (list_is_singular(&head->list)) { |
188 | if (nvme_path_is_disabled(old)) | ||
189 | return NULL; | ||
182 | return old; | 190 | return old; |
191 | } | ||
183 | 192 | ||
184 | for (ns = nvme_next_ns(head, old); | 193 | for (ns = nvme_next_ns(head, old); |
185 | ns != old; | 194 | ns != old; |
186 | ns = nvme_next_ns(head, ns)) { | 195 | ns = nvme_next_ns(head, ns)) { |
187 | if (ns->ctrl->state != NVME_CTRL_LIVE || | 196 | if (nvme_path_is_disabled(ns)) |
188 | test_bit(NVME_NS_ANA_PENDING, &ns->flags)) | ||
189 | continue; | 197 | continue; |
190 | 198 | ||
191 | if (ns->ana_state == NVME_ANA_OPTIMIZED) { | 199 | if (ns->ana_state == NVME_ANA_OPTIMIZED) { |
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ea45d7d393ad..716a876119c8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -283,6 +283,7 @@ struct nvme_subsystem { | |||
283 | char firmware_rev[8]; | 283 | char firmware_rev[8]; |
284 | u8 cmic; | 284 | u8 cmic; |
285 | u16 vendor_id; | 285 | u16 vendor_id; |
286 | u16 awupf; /* 0's based awupf value. */ | ||
286 | struct ida ns_ida; | 287 | struct ida ns_ida; |
287 | #ifdef CONFIG_NVME_MULTIPATH | 288 | #ifdef CONFIG_NVME_MULTIPATH |
288 | enum nvme_iopolicy iopolicy; | 289 | enum nvme_iopolicy iopolicy; |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 189352081994..bb970ca82517 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -1439,11 +1439,15 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, | |||
1439 | 1439 | ||
1440 | if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { | 1440 | if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { |
1441 | nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth)); | 1441 | nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth)); |
1442 | nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, | 1442 | if (nvmeq->sq_cmds) { |
1443 | nvmeq->sq_cmds); | 1443 | nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, |
1444 | if (nvmeq->sq_dma_addr) { | 1444 | nvmeq->sq_cmds); |
1445 | set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); | 1445 | if (nvmeq->sq_dma_addr) { |
1446 | return 0; | 1446 | set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); |
1447 | return 0; | ||
1448 | } | ||
1449 | |||
1450 | pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth)); | ||
1447 | } | 1451 | } |
1448 | } | 1452 | } |
1449 | 1453 | ||
@@ -2250,7 +2254,9 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
2250 | if (!dev->ctrl.tagset) { | 2254 | if (!dev->ctrl.tagset) { |
2251 | dev->tagset.ops = &nvme_mq_ops; | 2255 | dev->tagset.ops = &nvme_mq_ops; |
2252 | dev->tagset.nr_hw_queues = dev->online_queues - 1; | 2256 | dev->tagset.nr_hw_queues = dev->online_queues - 1; |
2253 | dev->tagset.nr_maps = 2; /* default + read */ | 2257 | dev->tagset.nr_maps = 1; /* default */ |
2258 | if (dev->io_queues[HCTX_TYPE_READ]) | ||
2259 | dev->tagset.nr_maps++; | ||
2254 | if (dev->io_queues[HCTX_TYPE_POLL]) | 2260 | if (dev->io_queues[HCTX_TYPE_POLL]) |
2255 | dev->tagset.nr_maps++; | 2261 | dev->tagset.nr_maps++; |
2256 | dev->tagset.timeout = NVME_IO_TIMEOUT; | 2262 | dev->tagset.timeout = NVME_IO_TIMEOUT; |
@@ -2289,8 +2295,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) | |||
2289 | 2295 | ||
2290 | pci_set_master(pdev); | 2296 | pci_set_master(pdev); |
2291 | 2297 | ||
2292 | if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && | 2298 | if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64))) |
2293 | dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) | ||
2294 | goto disable; | 2299 | goto disable; |
2295 | 2300 | ||
2296 | if (readl(dev->bar + NVME_REG_CSTS) == -1) { | 2301 | if (readl(dev->bar + NVME_REG_CSTS) == -1) { |
@@ -2498,7 +2503,8 @@ static void nvme_reset_work(struct work_struct *work) | |||
2498 | * Limit the max command size to prevent iod->sg allocations going | 2503 | * Limit the max command size to prevent iod->sg allocations going |
2499 | * over a single page. | 2504 | * over a single page. |
2500 | */ | 2505 | */ |
2501 | dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; | 2506 | dev->ctrl.max_hw_sectors = min_t(u32, |
2507 | NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9); | ||
2502 | dev->ctrl.max_segments = NVME_MAX_SEGS; | 2508 | dev->ctrl.max_segments = NVME_MAX_SEGS; |
2503 | 2509 | ||
2504 | /* | 2510 | /* |
@@ -2923,7 +2929,7 @@ static int nvme_simple_resume(struct device *dev) | |||
2923 | return 0; | 2929 | return 0; |
2924 | } | 2930 | } |
2925 | 2931 | ||
2926 | const struct dev_pm_ops nvme_dev_pm_ops = { | 2932 | static const struct dev_pm_ops nvme_dev_pm_ops = { |
2927 | .suspend = nvme_suspend, | 2933 | .suspend = nvme_suspend, |
2928 | .resume = nvme_resume, | 2934 | .resume = nvme_resume, |
2929 | .freeze = nvme_simple_suspend, | 2935 | .freeze = nvme_simple_suspend, |
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 08a2501b9357..606b13d35d16 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c | |||
@@ -860,7 +860,14 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) | |||
860 | else | 860 | else |
861 | flags |= MSG_MORE; | 861 | flags |= MSG_MORE; |
862 | 862 | ||
863 | ret = kernel_sendpage(queue->sock, page, offset, len, flags); | 863 | /* can't zcopy slab pages */ |
864 | if (unlikely(PageSlab(page))) { | ||
865 | ret = sock_no_sendpage(queue->sock, page, offset, len, | ||
866 | flags); | ||
867 | } else { | ||
868 | ret = kernel_sendpage(queue->sock, page, offset, len, | ||
869 | flags); | ||
870 | } | ||
864 | if (ret <= 0) | 871 | if (ret <= 0) |
865 | return ret; | 872 | return ret; |
866 | 873 | ||
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index f01ad0fd60bb..9778eb0406b3 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c | |||
@@ -7,6 +7,17 @@ | |||
7 | #include <asm/unaligned.h> | 7 | #include <asm/unaligned.h> |
8 | #include "trace.h" | 8 | #include "trace.h" |
9 | 9 | ||
10 | static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10) | ||
11 | { | ||
12 | const char *ret = trace_seq_buffer_ptr(p); | ||
13 | u16 sqid = get_unaligned_le16(cdw10); | ||
14 | |||
15 | trace_seq_printf(p, "sqid=%u", sqid); | ||
16 | trace_seq_putc(p, 0); | ||
17 | |||
18 | return ret; | ||
19 | } | ||
20 | |||
10 | static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10) | 21 | static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10) |
11 | { | 22 | { |
12 | const char *ret = trace_seq_buffer_ptr(p); | 23 | const char *ret = trace_seq_buffer_ptr(p); |
@@ -23,6 +34,17 @@ static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10) | |||
23 | return ret; | 34 | return ret; |
24 | } | 35 | } |
25 | 36 | ||
37 | static const char *nvme_trace_delete_cq(struct trace_seq *p, u8 *cdw10) | ||
38 | { | ||
39 | const char *ret = trace_seq_buffer_ptr(p); | ||
40 | u16 cqid = get_unaligned_le16(cdw10); | ||
41 | |||
42 | trace_seq_printf(p, "cqid=%u", cqid); | ||
43 | trace_seq_putc(p, 0); | ||
44 | |||
45 | return ret; | ||
46 | } | ||
47 | |||
26 | static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10) | 48 | static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10) |
27 | { | 49 | { |
28 | const char *ret = trace_seq_buffer_ptr(p); | 50 | const char *ret = trace_seq_buffer_ptr(p); |
@@ -107,8 +129,12 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, | |||
107 | u8 opcode, u8 *cdw10) | 129 | u8 opcode, u8 *cdw10) |
108 | { | 130 | { |
109 | switch (opcode) { | 131 | switch (opcode) { |
132 | case nvme_admin_delete_sq: | ||
133 | return nvme_trace_delete_sq(p, cdw10); | ||
110 | case nvme_admin_create_sq: | 134 | case nvme_admin_create_sq: |
111 | return nvme_trace_create_sq(p, cdw10); | 135 | return nvme_trace_create_sq(p, cdw10); |
136 | case nvme_admin_delete_cq: | ||
137 | return nvme_trace_delete_cq(p, cdw10); | ||
112 | case nvme_admin_create_cq: | 138 | case nvme_admin_create_cq: |
113 | return nvme_trace_create_cq(p, cdw10); | 139 | return nvme_trace_create_cq(p, cdw10); |
114 | case nvme_admin_identify: | 140 | case nvme_admin_identify: |
@@ -178,7 +204,7 @@ static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc) | |||
178 | { | 204 | { |
179 | const char *ret = trace_seq_buffer_ptr(p); | 205 | const char *ret = trace_seq_buffer_ptr(p); |
180 | 206 | ||
181 | trace_seq_printf(p, "spcecific=%*ph", 24, spc); | 207 | trace_seq_printf(p, "specific=%*ph", 24, spc); |
182 | trace_seq_putc(p, 0); | 208 | trace_seq_putc(p, 0); |
183 | return ret; | 209 | return ret; |
184 | } | 210 | } |
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 9f72d515fc4b..4dc12ea52f23 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c | |||
@@ -442,6 +442,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) | |||
442 | break; | 442 | break; |
443 | } | 443 | } |
444 | 444 | ||
445 | if (ns->bdev) | ||
446 | nvmet_bdev_set_limits(ns->bdev, id); | ||
447 | |||
445 | /* | 448 | /* |
446 | * We just provide a single LBA format that matches what the | 449 | * We just provide a single LBA format that matches what the |
447 | * underlying device reports. | 450 | * underlying device reports. |
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 08dd5af357f7..cd52b9f15376 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c | |||
@@ -588,8 +588,10 @@ static struct config_group *nvmet_ns_make(struct config_group *group, | |||
588 | goto out; | 588 | goto out; |
589 | 589 | ||
590 | ret = -EINVAL; | 590 | ret = -EINVAL; |
591 | if (nsid == 0 || nsid == NVME_NSID_ALL) | 591 | if (nsid == 0 || nsid == NVME_NSID_ALL) { |
592 | pr_err("invalid nsid %#x", nsid); | ||
592 | goto out; | 593 | goto out; |
594 | } | ||
593 | 595 | ||
594 | ret = -ENOMEM; | 596 | ret = -ENOMEM; |
595 | ns = nvmet_ns_alloc(subsys, nsid); | 597 | ns = nvmet_ns_alloc(subsys, nsid); |
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index b8c1cc54a0db..b50b53db3746 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c | |||
@@ -434,7 +434,7 @@ fcloop_fcp_recv_work(struct work_struct *work) | |||
434 | int ret = 0; | 434 | int ret = 0; |
435 | bool aborted = false; | 435 | bool aborted = false; |
436 | 436 | ||
437 | spin_lock(&tfcp_req->reqlock); | 437 | spin_lock_irq(&tfcp_req->reqlock); |
438 | switch (tfcp_req->inistate) { | 438 | switch (tfcp_req->inistate) { |
439 | case INI_IO_START: | 439 | case INI_IO_START: |
440 | tfcp_req->inistate = INI_IO_ACTIVE; | 440 | tfcp_req->inistate = INI_IO_ACTIVE; |
@@ -443,11 +443,11 @@ fcloop_fcp_recv_work(struct work_struct *work) | |||
443 | aborted = true; | 443 | aborted = true; |
444 | break; | 444 | break; |
445 | default: | 445 | default: |
446 | spin_unlock(&tfcp_req->reqlock); | 446 | spin_unlock_irq(&tfcp_req->reqlock); |
447 | WARN_ON(1); | 447 | WARN_ON(1); |
448 | return; | 448 | return; |
449 | } | 449 | } |
450 | spin_unlock(&tfcp_req->reqlock); | 450 | spin_unlock_irq(&tfcp_req->reqlock); |
451 | 451 | ||
452 | if (unlikely(aborted)) | 452 | if (unlikely(aborted)) |
453 | ret = -ECANCELED; | 453 | ret = -ECANCELED; |
@@ -469,7 +469,7 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) | |||
469 | struct nvmefc_fcp_req *fcpreq; | 469 | struct nvmefc_fcp_req *fcpreq; |
470 | bool completed = false; | 470 | bool completed = false; |
471 | 471 | ||
472 | spin_lock(&tfcp_req->reqlock); | 472 | spin_lock_irq(&tfcp_req->reqlock); |
473 | fcpreq = tfcp_req->fcpreq; | 473 | fcpreq = tfcp_req->fcpreq; |
474 | switch (tfcp_req->inistate) { | 474 | switch (tfcp_req->inistate) { |
475 | case INI_IO_ABORTED: | 475 | case INI_IO_ABORTED: |
@@ -478,11 +478,11 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) | |||
478 | completed = true; | 478 | completed = true; |
479 | break; | 479 | break; |
480 | default: | 480 | default: |
481 | spin_unlock(&tfcp_req->reqlock); | 481 | spin_unlock_irq(&tfcp_req->reqlock); |
482 | WARN_ON(1); | 482 | WARN_ON(1); |
483 | return; | 483 | return; |
484 | } | 484 | } |
485 | spin_unlock(&tfcp_req->reqlock); | 485 | spin_unlock_irq(&tfcp_req->reqlock); |
486 | 486 | ||
487 | if (unlikely(completed)) { | 487 | if (unlikely(completed)) { |
488 | /* remove reference taken in original abort downcall */ | 488 | /* remove reference taken in original abort downcall */ |
@@ -494,9 +494,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) | |||
494 | nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, | 494 | nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, |
495 | &tfcp_req->tgt_fcp_req); | 495 | &tfcp_req->tgt_fcp_req); |
496 | 496 | ||
497 | spin_lock(&tfcp_req->reqlock); | 497 | spin_lock_irq(&tfcp_req->reqlock); |
498 | tfcp_req->fcpreq = NULL; | 498 | tfcp_req->fcpreq = NULL; |
499 | spin_unlock(&tfcp_req->reqlock); | 499 | spin_unlock_irq(&tfcp_req->reqlock); |
500 | 500 | ||
501 | fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); | 501 | fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); |
502 | /* call_host_done releases reference for abort downcall */ | 502 | /* call_host_done releases reference for abort downcall */ |
@@ -513,10 +513,10 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) | |||
513 | container_of(work, struct fcloop_fcpreq, tio_done_work); | 513 | container_of(work, struct fcloop_fcpreq, tio_done_work); |
514 | struct nvmefc_fcp_req *fcpreq; | 514 | struct nvmefc_fcp_req *fcpreq; |
515 | 515 | ||
516 | spin_lock(&tfcp_req->reqlock); | 516 | spin_lock_irq(&tfcp_req->reqlock); |
517 | fcpreq = tfcp_req->fcpreq; | 517 | fcpreq = tfcp_req->fcpreq; |
518 | tfcp_req->inistate = INI_IO_COMPLETED; | 518 | tfcp_req->inistate = INI_IO_COMPLETED; |
519 | spin_unlock(&tfcp_req->reqlock); | 519 | spin_unlock_irq(&tfcp_req->reqlock); |
520 | 520 | ||
521 | fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status); | 521 | fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status); |
522 | } | 522 | } |
@@ -535,7 +535,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, | |||
535 | if (!rport->targetport) | 535 | if (!rport->targetport) |
536 | return -ECONNREFUSED; | 536 | return -ECONNREFUSED; |
537 | 537 | ||
538 | tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL); | 538 | tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_ATOMIC); |
539 | if (!tfcp_req) | 539 | if (!tfcp_req) |
540 | return -ENOMEM; | 540 | return -ENOMEM; |
541 | 541 | ||
@@ -621,12 +621,12 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, | |||
621 | int fcp_err = 0, active, aborted; | 621 | int fcp_err = 0, active, aborted; |
622 | u8 op = tgt_fcpreq->op; | 622 | u8 op = tgt_fcpreq->op; |
623 | 623 | ||
624 | spin_lock(&tfcp_req->reqlock); | 624 | spin_lock_irq(&tfcp_req->reqlock); |
625 | fcpreq = tfcp_req->fcpreq; | 625 | fcpreq = tfcp_req->fcpreq; |
626 | active = tfcp_req->active; | 626 | active = tfcp_req->active; |
627 | aborted = tfcp_req->aborted; | 627 | aborted = tfcp_req->aborted; |
628 | tfcp_req->active = true; | 628 | tfcp_req->active = true; |
629 | spin_unlock(&tfcp_req->reqlock); | 629 | spin_unlock_irq(&tfcp_req->reqlock); |
630 | 630 | ||
631 | if (unlikely(active)) | 631 | if (unlikely(active)) |
632 | /* illegal - call while i/o active */ | 632 | /* illegal - call while i/o active */ |
@@ -634,9 +634,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, | |||
634 | 634 | ||
635 | if (unlikely(aborted)) { | 635 | if (unlikely(aborted)) { |
636 | /* target transport has aborted i/o prior */ | 636 | /* target transport has aborted i/o prior */ |
637 | spin_lock(&tfcp_req->reqlock); | 637 | spin_lock_irq(&tfcp_req->reqlock); |
638 | tfcp_req->active = false; | 638 | tfcp_req->active = false; |
639 | spin_unlock(&tfcp_req->reqlock); | 639 | spin_unlock_irq(&tfcp_req->reqlock); |
640 | tgt_fcpreq->transferred_length = 0; | 640 | tgt_fcpreq->transferred_length = 0; |
641 | tgt_fcpreq->fcp_error = -ECANCELED; | 641 | tgt_fcpreq->fcp_error = -ECANCELED; |
642 | tgt_fcpreq->done(tgt_fcpreq); | 642 | tgt_fcpreq->done(tgt_fcpreq); |
@@ -693,9 +693,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, | |||
693 | break; | 693 | break; |
694 | } | 694 | } |
695 | 695 | ||
696 | spin_lock(&tfcp_req->reqlock); | 696 | spin_lock_irq(&tfcp_req->reqlock); |
697 | tfcp_req->active = false; | 697 | tfcp_req->active = false; |
698 | spin_unlock(&tfcp_req->reqlock); | 698 | spin_unlock_irq(&tfcp_req->reqlock); |
699 | 699 | ||
700 | tgt_fcpreq->transferred_length = xfrlen; | 700 | tgt_fcpreq->transferred_length = xfrlen; |
701 | tgt_fcpreq->fcp_error = fcp_err; | 701 | tgt_fcpreq->fcp_error = fcp_err; |
@@ -715,9 +715,9 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport, | |||
715 | * (one doing io, other doing abort) and only kills ops posted | 715 | * (one doing io, other doing abort) and only kills ops posted |
716 | * after the abort request | 716 | * after the abort request |
717 | */ | 717 | */ |
718 | spin_lock(&tfcp_req->reqlock); | 718 | spin_lock_irq(&tfcp_req->reqlock); |
719 | tfcp_req->aborted = true; | 719 | tfcp_req->aborted = true; |
720 | spin_unlock(&tfcp_req->reqlock); | 720 | spin_unlock_irq(&tfcp_req->reqlock); |
721 | 721 | ||
722 | tfcp_req->status = NVME_SC_INTERNAL; | 722 | tfcp_req->status = NVME_SC_INTERNAL; |
723 | 723 | ||
@@ -765,7 +765,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, | |||
765 | return; | 765 | return; |
766 | 766 | ||
767 | /* break initiator/target relationship for io */ | 767 | /* break initiator/target relationship for io */ |
768 | spin_lock(&tfcp_req->reqlock); | 768 | spin_lock_irq(&tfcp_req->reqlock); |
769 | switch (tfcp_req->inistate) { | 769 | switch (tfcp_req->inistate) { |
770 | case INI_IO_START: | 770 | case INI_IO_START: |
771 | case INI_IO_ACTIVE: | 771 | case INI_IO_ACTIVE: |
@@ -775,11 +775,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, | |||
775 | abortio = false; | 775 | abortio = false; |
776 | break; | 776 | break; |
777 | default: | 777 | default: |
778 | spin_unlock(&tfcp_req->reqlock); | 778 | spin_unlock_irq(&tfcp_req->reqlock); |
779 | WARN_ON(1); | 779 | WARN_ON(1); |
780 | return; | 780 | return; |
781 | } | 781 | } |
782 | spin_unlock(&tfcp_req->reqlock); | 782 | spin_unlock_irq(&tfcp_req->reqlock); |
783 | 783 | ||
784 | if (abortio) | 784 | if (abortio) |
785 | /* leave the reference while the work item is scheduled */ | 785 | /* leave the reference while the work item is scheduled */ |
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 7a1cf6437a6a..de0bff70ebb6 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c | |||
@@ -8,6 +8,45 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include "nvmet.h" | 9 | #include "nvmet.h" |
10 | 10 | ||
11 | void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) | ||
12 | { | ||
13 | const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; | ||
14 | /* Number of physical blocks per logical block. */ | ||
15 | const u32 ppl = ql->physical_block_size / ql->logical_block_size; | ||
16 | /* Physical blocks per logical block, 0's based. */ | ||
17 | const __le16 ppl0b = to0based(ppl); | ||
18 | |||
19 | /* | ||
20 | * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, | ||
21 | * NAWUPF, and NACWU are defined for this namespace and should be | ||
22 | * used by the host for this namespace instead of the AWUN, AWUPF, | ||
23 | * and ACWU fields in the Identify Controller data structure. If | ||
24 | * any of these fields are zero that means that the corresponding | ||
25 | * field from the identify controller data structure should be used. | ||
26 | */ | ||
27 | id->nsfeat |= 1 << 1; | ||
28 | id->nawun = ppl0b; | ||
29 | id->nawupf = ppl0b; | ||
30 | id->nacwu = ppl0b; | ||
31 | |||
32 | /* | ||
33 | * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and | ||
34 | * NOWS are defined for this namespace and should be used by | ||
35 | * the host for I/O optimization. | ||
36 | */ | ||
37 | id->nsfeat |= 1 << 4; | ||
38 | /* NPWG = Namespace Preferred Write Granularity. 0's based */ | ||
39 | id->npwg = ppl0b; | ||
40 | /* NPWA = Namespace Preferred Write Alignment. 0's based */ | ||
41 | id->npwa = id->npwg; | ||
42 | /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ | ||
43 | id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); | ||
44 | /* NPDG = Namespace Preferred Deallocate Alignment */ | ||
45 | id->npda = id->npdg; | ||
46 | /* NOWS = Namespace Optimal Write Size */ | ||
47 | id->nows = to0based(ql->io_opt / ql->logical_block_size); | ||
48 | } | ||
49 | |||
11 | int nvmet_bdev_ns_enable(struct nvmet_ns *ns) | 50 | int nvmet_bdev_ns_enable(struct nvmet_ns *ns) |
12 | { | 51 | { |
13 | int ret; | 52 | int ret; |
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index dc270944bb25..6ee66c610739 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h | |||
@@ -365,6 +365,7 @@ u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask); | |||
365 | void nvmet_execute_async_event(struct nvmet_req *req); | 365 | void nvmet_execute_async_event(struct nvmet_req *req); |
366 | 366 | ||
367 | u16 nvmet_parse_connect_cmd(struct nvmet_req *req); | 367 | u16 nvmet_parse_connect_cmd(struct nvmet_req *req); |
368 | void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id); | ||
368 | u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); | 369 | u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req); |
369 | u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); | 370 | u16 nvmet_file_parse_io_cmd(struct nvmet_req *req); |
370 | u16 nvmet_parse_admin_cmd(struct nvmet_req *req); | 371 | u16 nvmet_parse_admin_cmd(struct nvmet_req *req); |
@@ -492,4 +493,11 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req) | |||
492 | } | 493 | } |
493 | 494 | ||
494 | u16 errno_to_nvme_status(struct nvmet_req *req, int errno); | 495 | u16 errno_to_nvme_status(struct nvmet_req *req, int errno); |
496 | |||
497 | /* Convert a 32-bit number to a 16-bit 0's based number */ | ||
498 | static inline __le16 to0based(u32 a) | ||
499 | { | ||
500 | return cpu_to_le16(max(1U, min(1U << 16, a)) - 1); | ||
501 | } | ||
502 | |||
495 | #endif /* _NVMET_H */ | 503 | #endif /* _NVMET_H */ |
diff --git a/drivers/nvme/target/trace.c b/drivers/nvme/target/trace.c index cdcdd14c6408..6af11d493271 100644 --- a/drivers/nvme/target/trace.c +++ b/drivers/nvme/target/trace.c | |||
@@ -146,7 +146,7 @@ static const char *nvmet_trace_fabrics_common(struct trace_seq *p, u8 *spc) | |||
146 | { | 146 | { |
147 | const char *ret = trace_seq_buffer_ptr(p); | 147 | const char *ret = trace_seq_buffer_ptr(p); |
148 | 148 | ||
149 | trace_seq_printf(p, "spcecific=%*ph", 24, spc); | 149 | trace_seq_printf(p, "specific=%*ph", 24, spc); |
150 | trace_seq_putc(p, 0); | 150 | trace_seq_putc(p, 0); |
151 | return ret; | 151 | return ret; |
152 | } | 152 | } |
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 5796ace76225..38c50946fc42 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h | |||
@@ -213,8 +213,7 @@ extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd); | |||
213 | extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, | 213 | extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, |
214 | struct scsi_sense_hdr *sshdr); | 214 | struct scsi_sense_hdr *sshdr); |
215 | extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, | 215 | extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, |
216 | struct blk_zone *zones, unsigned int *nr_zones, | 216 | struct blk_zone *zones, unsigned int *nr_zones); |
217 | gfp_t gfp_mask); | ||
218 | 217 | ||
219 | #else /* CONFIG_BLK_DEV_ZONED */ | 218 | #else /* CONFIG_BLK_DEV_ZONED */ |
220 | 219 | ||
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 7334024b64f1..db16c19e05c4 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c | |||
@@ -9,6 +9,8 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/blkdev.h> | 11 | #include <linux/blkdev.h> |
12 | #include <linux/vmalloc.h> | ||
13 | #include <linux/sched/mm.h> | ||
12 | 14 | ||
13 | #include <asm/unaligned.h> | 15 | #include <asm/unaligned.h> |
14 | 16 | ||
@@ -50,7 +52,7 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, | |||
50 | /** | 52 | /** |
51 | * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. | 53 | * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. |
52 | * @sdkp: The target disk | 54 | * @sdkp: The target disk |
53 | * @buf: Buffer to use for the reply | 55 | * @buf: vmalloc-ed buffer to use for the reply |
54 | * @buflen: the buffer size | 56 | * @buflen: the buffer size |
55 | * @lba: Start LBA of the report | 57 | * @lba: Start LBA of the report |
56 | * @partial: Do partial report | 58 | * @partial: Do partial report |
@@ -79,7 +81,6 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, | |||
79 | put_unaligned_be32(buflen, &cmd[10]); | 81 | put_unaligned_be32(buflen, &cmd[10]); |
80 | if (partial) | 82 | if (partial) |
81 | cmd[14] = ZBC_REPORT_ZONE_PARTIAL; | 83 | cmd[14] = ZBC_REPORT_ZONE_PARTIAL; |
82 | memset(buf, 0, buflen); | ||
83 | 84 | ||
84 | result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, | 85 | result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, |
85 | buf, buflen, &sshdr, | 86 | buf, buflen, &sshdr, |
@@ -103,45 +104,83 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, | |||
103 | return 0; | 104 | return 0; |
104 | } | 105 | } |
105 | 106 | ||
107 | /* | ||
108 | * Maximum number of zones to get with one report zones command. | ||
109 | */ | ||
110 | #define SD_ZBC_REPORT_MAX_ZONES 8192U | ||
111 | |||
112 | /** | ||
113 | * Allocate a buffer for report zones reply. | ||
114 | * @sdkp: The target disk | ||
115 | * @nr_zones: Maximum number of zones to report | ||
116 | * @buflen: Size of the buffer allocated | ||
117 | * | ||
118 | * Try to allocate a reply buffer for the number of requested zones. | ||
119 | * The size of the buffer allocated may be smaller than requested to | ||
120 | * satify the device constraint (max_hw_sectors, max_segments, etc). | ||
121 | * | ||
122 | * Return the address of the allocated buffer and update @buflen with | ||
123 | * the size of the allocated buffer. | ||
124 | */ | ||
125 | static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, | ||
126 | unsigned int nr_zones, size_t *buflen) | ||
127 | { | ||
128 | struct request_queue *q = sdkp->disk->queue; | ||
129 | size_t bufsize; | ||
130 | void *buf; | ||
131 | |||
132 | /* | ||
133 | * Report zone buffer size should be at most 64B times the number of | ||
134 | * zones requested plus the 64B reply header, but should be at least | ||
135 | * SECTOR_SIZE for ATA devices. | ||
136 | * Make sure that this size does not exceed the hardware capabilities. | ||
137 | * Furthermore, since the report zone command cannot be split, make | ||
138 | * sure that the allocated buffer can always be mapped by limiting the | ||
139 | * number of pages allocated to the HBA max segments limit. | ||
140 | */ | ||
141 | nr_zones = min(nr_zones, SD_ZBC_REPORT_MAX_ZONES); | ||
142 | bufsize = roundup((nr_zones + 1) * 64, 512); | ||
143 | bufsize = min_t(size_t, bufsize, | ||
144 | queue_max_hw_sectors(q) << SECTOR_SHIFT); | ||
145 | bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); | ||
146 | |||
147 | buf = vzalloc(bufsize); | ||
148 | if (buf) | ||
149 | *buflen = bufsize; | ||
150 | |||
151 | return buf; | ||
152 | } | ||
153 | |||
106 | /** | 154 | /** |
107 | * sd_zbc_report_zones - Disk report zones operation. | 155 | * sd_zbc_report_zones - Disk report zones operation. |
108 | * @disk: The target disk | 156 | * @disk: The target disk |
109 | * @sector: Start 512B sector of the report | 157 | * @sector: Start 512B sector of the report |
110 | * @zones: Array of zone descriptors | 158 | * @zones: Array of zone descriptors |
111 | * @nr_zones: Number of descriptors in the array | 159 | * @nr_zones: Number of descriptors in the array |
112 | * @gfp_mask: Memory allocation mask | ||
113 | * | 160 | * |
114 | * Execute a report zones command on the target disk. | 161 | * Execute a report zones command on the target disk. |
115 | */ | 162 | */ |
116 | int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, | 163 | int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, |
117 | struct blk_zone *zones, unsigned int *nr_zones, | 164 | struct blk_zone *zones, unsigned int *nr_zones) |
118 | gfp_t gfp_mask) | ||
119 | { | 165 | { |
120 | struct scsi_disk *sdkp = scsi_disk(disk); | 166 | struct scsi_disk *sdkp = scsi_disk(disk); |
121 | unsigned int i, buflen, nrz = *nr_zones; | 167 | unsigned int i, nrz = *nr_zones; |
122 | unsigned char *buf; | 168 | unsigned char *buf; |
123 | size_t offset = 0; | 169 | size_t buflen = 0, offset = 0; |
124 | int ret = 0; | 170 | int ret = 0; |
125 | 171 | ||
126 | if (!sd_is_zoned(sdkp)) | 172 | if (!sd_is_zoned(sdkp)) |
127 | /* Not a zoned device */ | 173 | /* Not a zoned device */ |
128 | return -EOPNOTSUPP; | 174 | return -EOPNOTSUPP; |
129 | 175 | ||
130 | /* | 176 | buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen); |
131 | * Get a reply buffer for the number of requested zones plus a header, | ||
132 | * without exceeding the device maximum command size. For ATA disks, | ||
133 | * buffers must be aligned to 512B. | ||
134 | */ | ||
135 | buflen = min(queue_max_hw_sectors(disk->queue) << 9, | ||
136 | roundup((nrz + 1) * 64, 512)); | ||
137 | buf = kmalloc(buflen, gfp_mask); | ||
138 | if (!buf) | 177 | if (!buf) |
139 | return -ENOMEM; | 178 | return -ENOMEM; |
140 | 179 | ||
141 | ret = sd_zbc_do_report_zones(sdkp, buf, buflen, | 180 | ret = sd_zbc_do_report_zones(sdkp, buf, buflen, |
142 | sectors_to_logical(sdkp->device, sector), true); | 181 | sectors_to_logical(sdkp->device, sector), true); |
143 | if (ret) | 182 | if (ret) |
144 | goto out_free_buf; | 183 | goto out; |
145 | 184 | ||
146 | nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64); | 185 | nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64); |
147 | for (i = 0; i < nrz; i++) { | 186 | for (i = 0; i < nrz; i++) { |
@@ -152,8 +191,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, | |||
152 | 191 | ||
153 | *nr_zones = nrz; | 192 | *nr_zones = nrz; |
154 | 193 | ||
155 | out_free_buf: | 194 | out: |
156 | kfree(buf); | 195 | kvfree(buf); |
157 | 196 | ||
158 | return ret; | 197 | return ret; |
159 | } | 198 | } |
@@ -287,8 +326,6 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, | |||
287 | return 0; | 326 | return 0; |
288 | } | 327 | } |
289 | 328 | ||
290 | #define SD_ZBC_BUF_SIZE 131072U | ||
291 | |||
292 | /** | 329 | /** |
293 | * sd_zbc_check_zones - Check the device capacity and zone sizes | 330 | * sd_zbc_check_zones - Check the device capacity and zone sizes |
294 | * @sdkp: Target disk | 331 | * @sdkp: Target disk |
@@ -304,22 +341,28 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, | |||
304 | */ | 341 | */ |
305 | static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) | 342 | static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) |
306 | { | 343 | { |
344 | size_t bufsize, buflen; | ||
345 | unsigned int noio_flag; | ||
307 | u64 zone_blocks = 0; | 346 | u64 zone_blocks = 0; |
308 | sector_t max_lba, block = 0; | 347 | sector_t max_lba, block = 0; |
309 | unsigned char *buf; | 348 | unsigned char *buf; |
310 | unsigned char *rec; | 349 | unsigned char *rec; |
311 | unsigned int buf_len; | ||
312 | unsigned int list_length; | ||
313 | int ret; | 350 | int ret; |
314 | u8 same; | 351 | u8 same; |
315 | 352 | ||
353 | /* Do all memory allocations as if GFP_NOIO was specified */ | ||
354 | noio_flag = memalloc_noio_save(); | ||
355 | |||
316 | /* Get a buffer */ | 356 | /* Get a buffer */ |
317 | buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); | 357 | buf = sd_zbc_alloc_report_buffer(sdkp, SD_ZBC_REPORT_MAX_ZONES, |
318 | if (!buf) | 358 | &bufsize); |
319 | return -ENOMEM; | 359 | if (!buf) { |
360 | ret = -ENOMEM; | ||
361 | goto out; | ||
362 | } | ||
320 | 363 | ||
321 | /* Do a report zone to get max_lba and the same field */ | 364 | /* Do a report zone to get max_lba and the same field */ |
322 | ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false); | 365 | ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, 0, false); |
323 | if (ret) | 366 | if (ret) |
324 | goto out_free; | 367 | goto out_free; |
325 | 368 | ||
@@ -355,12 +398,12 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) | |||
355 | do { | 398 | do { |
356 | 399 | ||
357 | /* Parse REPORT ZONES header */ | 400 | /* Parse REPORT ZONES header */ |
358 | list_length = get_unaligned_be32(&buf[0]) + 64; | 401 | buflen = min_t(size_t, get_unaligned_be32(&buf[0]) + 64, |
402 | bufsize); | ||
359 | rec = buf + 64; | 403 | rec = buf + 64; |
360 | buf_len = min(list_length, SD_ZBC_BUF_SIZE); | ||
361 | 404 | ||
362 | /* Parse zone descriptors */ | 405 | /* Parse zone descriptors */ |
363 | while (rec < buf + buf_len) { | 406 | while (rec < buf + buflen) { |
364 | u64 this_zone_blocks = get_unaligned_be64(&rec[8]); | 407 | u64 this_zone_blocks = get_unaligned_be64(&rec[8]); |
365 | 408 | ||
366 | if (zone_blocks == 0) { | 409 | if (zone_blocks == 0) { |
@@ -376,8 +419,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) | |||
376 | } | 419 | } |
377 | 420 | ||
378 | if (block < sdkp->capacity) { | 421 | if (block < sdkp->capacity) { |
379 | ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, | 422 | ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, block, |
380 | block, true); | 423 | true); |
381 | if (ret) | 424 | if (ret) |
382 | goto out_free; | 425 | goto out_free; |
383 | } | 426 | } |
@@ -408,7 +451,8 @@ out: | |||
408 | } | 451 | } |
409 | 452 | ||
410 | out_free: | 453 | out_free: |
411 | kfree(buf); | 454 | memalloc_noio_restore(noio_flag); |
455 | kvfree(buf); | ||
412 | 456 | ||
413 | return ret; | 457 | return ret; |
414 | } | 458 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index db337e53aab3..5106008f5e28 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2911,7 +2911,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, | |||
2911 | bio = NULL; | 2911 | bio = NULL; |
2912 | } else { | 2912 | } else { |
2913 | if (wbc) | 2913 | if (wbc) |
2914 | wbc_account_io(wbc, page, page_size); | 2914 | wbc_account_cgroup_owner(wbc, page, page_size); |
2915 | return 0; | 2915 | return 0; |
2916 | } | 2916 | } |
2917 | } | 2917 | } |
@@ -2924,7 +2924,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, | |||
2924 | bio->bi_opf = opf; | 2924 | bio->bi_opf = opf; |
2925 | if (wbc) { | 2925 | if (wbc) { |
2926 | wbc_init_bio(wbc, bio); | 2926 | wbc_init_bio(wbc, bio); |
2927 | wbc_account_io(wbc, page, page_size); | 2927 | wbc_account_cgroup_owner(wbc, page, page_size); |
2928 | } | 2928 | } |
2929 | 2929 | ||
2930 | *bio_ret = bio; | 2930 | *bio_ret = bio; |
diff --git a/fs/buffer.c b/fs/buffer.c index 49a871570092..86a38b979323 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3089,7 +3089,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, | |||
3089 | 3089 | ||
3090 | if (wbc) { | 3090 | if (wbc) { |
3091 | wbc_init_bio(wbc, bio); | 3091 | wbc_init_bio(wbc, bio); |
3092 | wbc_account_io(wbc, bh->b_page, bh->b_size); | 3092 | wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size); |
3093 | } | 3093 | } |
3094 | 3094 | ||
3095 | submit_bio(bio); | 3095 | submit_bio(bio); |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index a18a47a2a1d1..12ceadef32c5 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -396,7 +396,7 @@ submit_and_retry: | |||
396 | ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); | 396 | ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); |
397 | if (ret != bh->b_size) | 397 | if (ret != bh->b_size) |
398 | goto submit_and_retry; | 398 | goto submit_and_retry; |
399 | wbc_account_io(io->io_wbc, page, bh->b_size); | 399 | wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size); |
400 | io->io_next_block++; | 400 | io->io_next_block++; |
401 | return 0; | 401 | return 0; |
402 | } | 402 | } |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0ca530afc684..4eb2f3920140 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -470,7 +470,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) | |||
470 | } | 470 | } |
471 | 471 | ||
472 | if (fio->io_wbc && !is_read_io(fio->op)) | 472 | if (fio->io_wbc && !is_read_io(fio->op)) |
473 | wbc_account_io(fio->io_wbc, page, PAGE_SIZE); | 473 | wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); |
474 | 474 | ||
475 | bio_set_op_attrs(bio, fio->op, fio->op_flags); | 475 | bio_set_op_attrs(bio, fio->op, fio->op_flags); |
476 | 476 | ||
@@ -513,7 +513,7 @@ alloc_new: | |||
513 | } | 513 | } |
514 | 514 | ||
515 | if (fio->io_wbc) | 515 | if (fio->io_wbc) |
516 | wbc_account_io(fio->io_wbc, page, PAGE_SIZE); | 516 | wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); |
517 | 517 | ||
518 | inc_page_count(fio->sbi, WB_DATA_TYPE(page)); | 518 | inc_page_count(fio->sbi, WB_DATA_TYPE(page)); |
519 | 519 | ||
@@ -592,7 +592,7 @@ alloc_new: | |||
592 | } | 592 | } |
593 | 593 | ||
594 | if (fio->io_wbc) | 594 | if (fio->io_wbc) |
595 | wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE); | 595 | wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE); |
596 | 596 | ||
597 | io->last_block_in_bio = fio->new_blkaddr; | 597 | io->last_block_in_bio = fio->new_blkaddr; |
598 | f2fs_trace_ios(fio, 0); | 598 | f2fs_trace_ios(fio, 0); |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d95a681ef7c9..6de6cda44031 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -2818,9 +2818,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) | |||
2818 | while (zones && sector < nr_sectors) { | 2818 | while (zones && sector < nr_sectors) { |
2819 | 2819 | ||
2820 | nr_zones = F2FS_REPORT_NR_ZONES; | 2820 | nr_zones = F2FS_REPORT_NR_ZONES; |
2821 | err = blkdev_report_zones(bdev, sector, | 2821 | err = blkdev_report_zones(bdev, sector, zones, &nr_zones); |
2822 | zones, &nr_zones, | ||
2823 | GFP_KERNEL); | ||
2824 | if (err) | 2822 | if (err) |
2825 | break; | 2823 | break; |
2826 | if (!nr_zones) { | 2824 | if (!nr_zones) { |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 9ebfb1b28430..542b02d170f8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -270,6 +270,7 @@ void __inode_attach_wb(struct inode *inode, struct page *page) | |||
270 | if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) | 270 | if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) |
271 | wb_put(wb); | 271 | wb_put(wb); |
272 | } | 272 | } |
273 | EXPORT_SYMBOL_GPL(__inode_attach_wb); | ||
273 | 274 | ||
274 | /** | 275 | /** |
275 | * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it | 276 | * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it |
@@ -582,6 +583,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, | |||
582 | if (unlikely(wb_dying(wbc->wb))) | 583 | if (unlikely(wb_dying(wbc->wb))) |
583 | inode_switch_wbs(inode, wbc->wb_id); | 584 | inode_switch_wbs(inode, wbc->wb_id); |
584 | } | 585 | } |
586 | EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode); | ||
585 | 587 | ||
586 | /** | 588 | /** |
587 | * wbc_detach_inode - disassociate wbc from inode and perform foreign detection | 589 | * wbc_detach_inode - disassociate wbc from inode and perform foreign detection |
@@ -701,9 +703,10 @@ void wbc_detach_inode(struct writeback_control *wbc) | |||
701 | wb_put(wbc->wb); | 703 | wb_put(wbc->wb); |
702 | wbc->wb = NULL; | 704 | wbc->wb = NULL; |
703 | } | 705 | } |
706 | EXPORT_SYMBOL_GPL(wbc_detach_inode); | ||
704 | 707 | ||
705 | /** | 708 | /** |
706 | * wbc_account_io - account IO issued during writeback | 709 | * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership |
707 | * @wbc: writeback_control of the writeback in progress | 710 | * @wbc: writeback_control of the writeback in progress |
708 | * @page: page being written out | 711 | * @page: page being written out |
709 | * @bytes: number of bytes being written out | 712 | * @bytes: number of bytes being written out |
@@ -712,8 +715,8 @@ void wbc_detach_inode(struct writeback_control *wbc) | |||
712 | * controlled by @wbc. Keep the book for foreign inode detection. See | 715 | * controlled by @wbc. Keep the book for foreign inode detection. See |
713 | * wbc_detach_inode(). | 716 | * wbc_detach_inode(). |
714 | */ | 717 | */ |
715 | void wbc_account_io(struct writeback_control *wbc, struct page *page, | 718 | void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, |
716 | size_t bytes) | 719 | size_t bytes) |
717 | { | 720 | { |
718 | struct cgroup_subsys_state *css; | 721 | struct cgroup_subsys_state *css; |
719 | int id; | 722 | int id; |
@@ -724,7 +727,7 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page, | |||
724 | * behind a slow cgroup. Ultimately, we want pageout() to kick off | 727 | * behind a slow cgroup. Ultimately, we want pageout() to kick off |
725 | * regular writeback instead of writing things out itself. | 728 | * regular writeback instead of writing things out itself. |
726 | */ | 729 | */ |
727 | if (!wbc->wb) | 730 | if (!wbc->wb || wbc->no_cgroup_owner) |
728 | return; | 731 | return; |
729 | 732 | ||
730 | css = mem_cgroup_css_from_page(page); | 733 | css = mem_cgroup_css_from_page(page); |
@@ -750,7 +753,7 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page, | |||
750 | else | 753 | else |
751 | wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); | 754 | wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); |
752 | } | 755 | } |
753 | EXPORT_SYMBOL_GPL(wbc_account_io); | 756 | EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner); |
754 | 757 | ||
755 | /** | 758 | /** |
756 | * inode_congested - test whether an inode is congested | 759 | * inode_congested - test whether an inode is congested |
diff --git a/fs/mpage.c b/fs/mpage.c index 436a85260394..a63620cdb73a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -647,7 +647,7 @@ alloc_new: | |||
647 | * the confused fail path above (OOM) will be very confused when | 647 | * the confused fail path above (OOM) will be very confused when |
648 | * it finds all bh marked clean (i.e. it will not write anything) | 648 | * it finds all bh marked clean (i.e. it will not write anything) |
649 | */ | 649 | */ |
650 | wbc_account_io(wbc, page, PAGE_SIZE); | 650 | wbc_account_cgroup_owner(wbc, page, PAGE_SIZE); |
651 | length = first_unmapped << blkbits; | 651 | length = first_unmapped << blkbits; |
652 | if (bio_add_page(bio, page, length, 0) < length) { | 652 | if (bio_add_page(bio, page, length, 0) < length) { |
653 | bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); | 653 | bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 761248ee2778..f16d5f196c6b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -796,7 +796,7 @@ xfs_add_to_ioend( | |||
796 | } | 796 | } |
797 | 797 | ||
798 | wpc->ioend->io_size += len; | 798 | wpc->ioend->io_size += len; |
799 | wbc_account_io(wbc, page, len); | 799 | wbc_account_cgroup_owner(wbc, page, len); |
800 | } | 800 | } |
801 | 801 | ||
802 | STATIC void | 802 | STATIC void |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f9b029180241..35b31d176f74 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -48,6 +48,7 @@ extern spinlock_t bdi_lock; | |||
48 | extern struct list_head bdi_list; | 48 | extern struct list_head bdi_list; |
49 | 49 | ||
50 | extern struct workqueue_struct *bdi_wq; | 50 | extern struct workqueue_struct *bdi_wq; |
51 | extern struct workqueue_struct *bdi_async_bio_wq; | ||
51 | 52 | ||
52 | static inline bool wb_has_dirty_io(struct bdi_writeback *wb) | 53 | static inline bool wb_has_dirty_io(struct bdi_writeback *wb) |
53 | { | 54 | { |
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 33f23a858438..689a58231288 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
@@ -132,13 +132,17 @@ struct blkcg_gq { | |||
132 | 132 | ||
133 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; | 133 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
134 | 134 | ||
135 | struct rcu_head rcu_head; | 135 | spinlock_t async_bio_lock; |
136 | struct bio_list async_bios; | ||
137 | struct work_struct async_bio_work; | ||
136 | 138 | ||
137 | atomic_t use_delay; | 139 | atomic_t use_delay; |
138 | atomic64_t delay_nsec; | 140 | atomic64_t delay_nsec; |
139 | atomic64_t delay_start; | 141 | atomic64_t delay_start; |
140 | u64 last_delay; | 142 | u64 last_delay; |
141 | int last_use; | 143 | int last_use; |
144 | |||
145 | struct rcu_head rcu_head; | ||
142 | }; | 146 | }; |
143 | 147 | ||
144 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); | 148 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
@@ -701,6 +705,15 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg | |||
701 | struct bio *bio) { return false; } | 705 | struct bio *bio) { return false; } |
702 | #endif | 706 | #endif |
703 | 707 | ||
708 | bool __blkcg_punt_bio_submit(struct bio *bio); | ||
709 | |||
710 | static inline bool blkcg_punt_bio_submit(struct bio *bio) | ||
711 | { | ||
712 | if (bio->bi_opf & REQ_CGROUP_PUNT) | ||
713 | return __blkcg_punt_bio_submit(bio); | ||
714 | else | ||
715 | return false; | ||
716 | } | ||
704 | 717 | ||
705 | static inline void blkcg_bio_issue_init(struct bio *bio) | 718 | static inline void blkcg_bio_issue_init(struct bio *bio) |
706 | { | 719 | { |
@@ -848,6 +861,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } | |||
848 | static inline void blkg_get(struct blkcg_gq *blkg) { } | 861 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
849 | static inline void blkg_put(struct blkcg_gq *blkg) { } | 862 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
850 | 863 | ||
864 | static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } | ||
851 | static inline void blkcg_bio_issue_init(struct bio *bio) { } | 865 | static inline void blkcg_bio_issue_init(struct bio *bio) { } |
852 | static inline bool blkcg_bio_issue_check(struct request_queue *q, | 866 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
853 | struct bio *bio) { return true; } | 867 | struct bio *bio) { return true; } |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6a53799c3fe2..feff3fe4467e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -311,6 +311,14 @@ enum req_flag_bits { | |||
311 | __REQ_RAHEAD, /* read ahead, can fail anytime */ | 311 | __REQ_RAHEAD, /* read ahead, can fail anytime */ |
312 | __REQ_BACKGROUND, /* background IO */ | 312 | __REQ_BACKGROUND, /* background IO */ |
313 | __REQ_NOWAIT, /* Don't wait if request will block */ | 313 | __REQ_NOWAIT, /* Don't wait if request will block */ |
314 | /* | ||
315 | * When a shared kthread needs to issue a bio for a cgroup, doing | ||
316 | * so synchronously can lead to priority inversions as the kthread | ||
317 | * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes | ||
318 | * submit_bio() punt the actual issuing to a dedicated per-blkcg | ||
319 | * work item to avoid such priority inversions. | ||
320 | */ | ||
321 | __REQ_CGROUP_PUNT, | ||
314 | 322 | ||
315 | /* command specific flags for REQ_OP_WRITE_ZEROES: */ | 323 | /* command specific flags for REQ_OP_WRITE_ZEROES: */ |
316 | __REQ_NOUNMAP, /* do not free blocks when zeroing */ | 324 | __REQ_NOUNMAP, /* do not free blocks when zeroing */ |
@@ -337,6 +345,8 @@ enum req_flag_bits { | |||
337 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) | 345 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) |
338 | #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) | 346 | #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) |
339 | #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) | 347 | #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) |
348 | #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) | ||
349 | |||
340 | #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) | 350 | #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) |
341 | #define REQ_HIPRI (1ULL << __REQ_HIPRI) | 351 | #define REQ_HIPRI (1ULL << __REQ_HIPRI) |
342 | 352 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c482371c8b3..1ef375dafb1c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -344,10 +344,15 @@ struct queue_limits { | |||
344 | 344 | ||
345 | #ifdef CONFIG_BLK_DEV_ZONED | 345 | #ifdef CONFIG_BLK_DEV_ZONED |
346 | 346 | ||
347 | /* | ||
348 | * Maximum number of zones to report with a single report zones command. | ||
349 | */ | ||
350 | #define BLK_ZONED_REPORT_MAX_ZONES 8192U | ||
351 | |||
347 | extern unsigned int blkdev_nr_zones(struct block_device *bdev); | 352 | extern unsigned int blkdev_nr_zones(struct block_device *bdev); |
348 | extern int blkdev_report_zones(struct block_device *bdev, | 353 | extern int blkdev_report_zones(struct block_device *bdev, |
349 | sector_t sector, struct blk_zone *zones, | 354 | sector_t sector, struct blk_zone *zones, |
350 | unsigned int *nr_zones, gfp_t gfp_mask); | 355 | unsigned int *nr_zones); |
351 | extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, | 356 | extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, |
352 | sector_t nr_sectors, gfp_t gfp_mask); | 357 | sector_t nr_sectors, gfp_t gfp_mask); |
353 | extern int blk_revalidate_disk_zones(struct gendisk *disk); | 358 | extern int blk_revalidate_disk_zones(struct gendisk *disk); |
@@ -681,7 +686,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) | |||
681 | } | 686 | } |
682 | } | 687 | } |
683 | 688 | ||
684 | static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) | 689 | static inline sector_t blk_queue_zone_sectors(struct request_queue *q) |
685 | { | 690 | { |
686 | return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; | 691 | return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; |
687 | } | 692 | } |
@@ -1418,7 +1423,7 @@ static inline bool bdev_is_zoned(struct block_device *bdev) | |||
1418 | return false; | 1423 | return false; |
1419 | } | 1424 | } |
1420 | 1425 | ||
1421 | static inline unsigned int bdev_zone_sectors(struct block_device *bdev) | 1426 | static inline sector_t bdev_zone_sectors(struct block_device *bdev) |
1422 | { | 1427 | { |
1423 | struct request_queue *q = bdev_get_queue(bdev); | 1428 | struct request_queue *q = bdev_get_queue(bdev); |
1424 | 1429 | ||
@@ -1673,8 +1678,7 @@ struct block_device_operations { | |||
1673 | /* this callback is with swap_lock and sometimes page table lock held */ | 1678 | /* this callback is with swap_lock and sometimes page table lock held */ |
1674 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); | 1679 | void (*swap_slot_free_notify) (struct block_device *, unsigned long); |
1675 | int (*report_zones)(struct gendisk *, sector_t sector, | 1680 | int (*report_zones)(struct gendisk *, sector_t sector, |
1676 | struct blk_zone *zones, unsigned int *nr_zones, | 1681 | struct blk_zone *zones, unsigned int *nr_zones); |
1677 | gfp_t gfp_mask); | ||
1678 | struct module *owner; | 1682 | struct module *owner; |
1679 | const struct pr_ops *pr_ops; | 1683 | const struct pr_ops *pr_ops; |
1680 | }; | 1684 | }; |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2af9b1b419f1..f6b048902d6c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -699,6 +699,7 @@ void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, | |||
699 | struct cgroup_subsys_state; | 699 | struct cgroup_subsys_state; |
700 | struct cgroup; | 700 | struct cgroup; |
701 | 701 | ||
702 | static inline void css_get(struct cgroup_subsys_state *css) {} | ||
702 | static inline void css_put(struct cgroup_subsys_state *css) {} | 703 | static inline void css_put(struct cgroup_subsys_state *css) {} |
703 | static inline int cgroup_attach_task_all(struct task_struct *from, | 704 | static inline int cgroup_attach_task_all(struct task_struct *from, |
704 | struct task_struct *t) { return 0; } | 705 | struct task_struct *t) { return 0; } |
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e1f51d607cc5..3b470cb03b66 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h | |||
@@ -95,8 +95,7 @@ typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device ** | |||
95 | 95 | ||
96 | typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector, | 96 | typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector, |
97 | struct blk_zone *zones, | 97 | struct blk_zone *zones, |
98 | unsigned int *nr_zones, | 98 | unsigned int *nr_zones); |
99 | gfp_t gfp_mask); | ||
100 | 99 | ||
101 | /* | 100 | /* |
102 | * These iteration functions are typically used to check (and combine) | 101 | * These iteration functions are typically used to check (and combine) |
diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 169bb2e02516..17cd0078377c 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h | |||
@@ -75,7 +75,7 @@ struct elevator_type | |||
75 | size_t icq_size; /* see iocontext.h */ | 75 | size_t icq_size; /* see iocontext.h */ |
76 | size_t icq_align; /* ditto */ | 76 | size_t icq_align; /* ditto */ |
77 | struct elv_fs_entry *elevator_attrs; | 77 | struct elv_fs_entry *elevator_attrs; |
78 | char elevator_name[ELV_NAME_MAX]; | 78 | const char *elevator_name; |
79 | const char *elevator_alias; | 79 | const char *elevator_alias; |
80 | struct module *elevator_owner; | 80 | struct module *elevator_owner; |
81 | #ifdef CONFIG_BLK_DEBUG_FS | 81 | #ifdef CONFIG_BLK_DEBUG_FS |
@@ -160,15 +160,6 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t); | |||
160 | #define ELEVATOR_INSERT_FLUSH 5 | 160 | #define ELEVATOR_INSERT_FLUSH 5 |
161 | #define ELEVATOR_INSERT_SORT_MERGE 6 | 161 | #define ELEVATOR_INSERT_SORT_MERGE 6 |
162 | 162 | ||
163 | /* | ||
164 | * return values from elevator_may_queue_fn | ||
165 | */ | ||
166 | enum { | ||
167 | ELV_MQUEUE_MAY, | ||
168 | ELV_MQUEUE_NO, | ||
169 | ELV_MQUEUE_MUST, | ||
170 | }; | ||
171 | |||
172 | #define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) | 163 | #define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) |
173 | #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) | 164 | #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) |
174 | 165 | ||
diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d98b2d8baf4e..01aa6a6c241d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h | |||
@@ -315,7 +315,7 @@ struct nvme_id_ns { | |||
315 | __u8 nmic; | 315 | __u8 nmic; |
316 | __u8 rescap; | 316 | __u8 rescap; |
317 | __u8 fpi; | 317 | __u8 fpi; |
318 | __u8 rsvd33; | 318 | __u8 dlfeat; |
319 | __le16 nawun; | 319 | __le16 nawun; |
320 | __le16 nawupf; | 320 | __le16 nawupf; |
321 | __le16 nacwu; | 321 | __le16 nacwu; |
@@ -324,11 +324,17 @@ struct nvme_id_ns { | |||
324 | __le16 nabspf; | 324 | __le16 nabspf; |
325 | __le16 noiob; | 325 | __le16 noiob; |
326 | __u8 nvmcap[16]; | 326 | __u8 nvmcap[16]; |
327 | __u8 rsvd64[28]; | 327 | __le16 npwg; |
328 | __le16 npwa; | ||
329 | __le16 npdg; | ||
330 | __le16 npda; | ||
331 | __le16 nows; | ||
332 | __u8 rsvd74[18]; | ||
328 | __le32 anagrpid; | 333 | __le32 anagrpid; |
329 | __u8 rsvd96[3]; | 334 | __u8 rsvd96[3]; |
330 | __u8 nsattr; | 335 | __u8 nsattr; |
331 | __u8 rsvd100[4]; | 336 | __le16 nvmsetid; |
337 | __le16 endgid; | ||
332 | __u8 nguid[16]; | 338 | __u8 nguid[16]; |
333 | __u8 eui64[8]; | 339 | __u8 eui64[8]; |
334 | struct nvme_lbaf lbaf[16]; | 340 | struct nvme_lbaf lbaf[16]; |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 738a0c24874f..8945aac31392 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/flex_proportions.h> | 11 | #include <linux/flex_proportions.h> |
12 | #include <linux/backing-dev-defs.h> | 12 | #include <linux/backing-dev-defs.h> |
13 | #include <linux/blk_types.h> | 13 | #include <linux/blk_types.h> |
14 | #include <linux/blk-cgroup.h> | ||
14 | 15 | ||
15 | struct bio; | 16 | struct bio; |
16 | 17 | ||
@@ -68,6 +69,17 @@ struct writeback_control { | |||
68 | unsigned for_reclaim:1; /* Invoked from the page allocator */ | 69 | unsigned for_reclaim:1; /* Invoked from the page allocator */ |
69 | unsigned range_cyclic:1; /* range_start is cyclic */ | 70 | unsigned range_cyclic:1; /* range_start is cyclic */ |
70 | unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | 71 | unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ |
72 | |||
73 | /* | ||
74 | * When writeback IOs are bounced through async layers, only the | ||
75 | * initial synchronous phase should be accounted towards inode | ||
76 | * cgroup ownership arbitration to avoid confusion. Later stages | ||
77 | * can set the following flag to disable the accounting. | ||
78 | */ | ||
79 | unsigned no_cgroup_owner:1; | ||
80 | |||
81 | unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */ | ||
82 | |||
71 | #ifdef CONFIG_CGROUP_WRITEBACK | 83 | #ifdef CONFIG_CGROUP_WRITEBACK |
72 | struct bdi_writeback *wb; /* wb this writeback is issued under */ | 84 | struct bdi_writeback *wb; /* wb this writeback is issued under */ |
73 | struct inode *inode; /* inode being written out */ | 85 | struct inode *inode; /* inode being written out */ |
@@ -84,12 +96,27 @@ struct writeback_control { | |||
84 | 96 | ||
85 | static inline int wbc_to_write_flags(struct writeback_control *wbc) | 97 | static inline int wbc_to_write_flags(struct writeback_control *wbc) |
86 | { | 98 | { |
99 | int flags = 0; | ||
100 | |||
101 | if (wbc->punt_to_cgroup) | ||
102 | flags = REQ_CGROUP_PUNT; | ||
103 | |||
87 | if (wbc->sync_mode == WB_SYNC_ALL) | 104 | if (wbc->sync_mode == WB_SYNC_ALL) |
88 | return REQ_SYNC; | 105 | flags |= REQ_SYNC; |
89 | else if (wbc->for_kupdate || wbc->for_background) | 106 | else if (wbc->for_kupdate || wbc->for_background) |
90 | return REQ_BACKGROUND; | 107 | flags |= REQ_BACKGROUND; |
91 | 108 | ||
92 | return 0; | 109 | return flags; |
110 | } | ||
111 | |||
112 | static inline struct cgroup_subsys_state * | ||
113 | wbc_blkcg_css(struct writeback_control *wbc) | ||
114 | { | ||
115 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
116 | if (wbc->wb) | ||
117 | return wbc->wb->blkcg_css; | ||
118 | #endif | ||
119 | return blkcg_root_css; | ||
93 | } | 120 | } |
94 | 121 | ||
95 | /* | 122 | /* |
@@ -188,8 +215,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, | |||
188 | struct inode *inode) | 215 | struct inode *inode) |
189 | __releases(&inode->i_lock); | 216 | __releases(&inode->i_lock); |
190 | void wbc_detach_inode(struct writeback_control *wbc); | 217 | void wbc_detach_inode(struct writeback_control *wbc); |
191 | void wbc_account_io(struct writeback_control *wbc, struct page *page, | 218 | void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, |
192 | size_t bytes); | 219 | size_t bytes); |
193 | void cgroup_writeback_umount(void); | 220 | void cgroup_writeback_umount(void); |
194 | 221 | ||
195 | /** | 222 | /** |
@@ -291,8 +318,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) | |||
291 | { | 318 | { |
292 | } | 319 | } |
293 | 320 | ||
294 | static inline void wbc_account_io(struct writeback_control *wbc, | 321 | static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, |
295 | struct page *page, size_t bytes) | 322 | struct page *page, size_t bytes) |
296 | { | 323 | { |
297 | } | 324 | } |
298 | 325 | ||