diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 14:51:49 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 14:51:49 -0500 |
| commit | 0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch) | |
| tree | cefccd67dc1f27bb45830f6b8065dd4a1c05e83b /include/linux/blkdev.h | |
| parent | 9697e9da84299d0d715d515dd2cc48f1eceb277d (diff) | |
| parent | 796baeeef85a40b3495a907fb7425086e7010102 (diff) | |
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block IO related changes for the
4.16 kernel. Nothing major in this pull request, but a good amount of
improvements and fixes all over the map. This contains:
- BFQ improvements, fixes, and cleanups from Angelo, Chiara, and
Paolo.
- Support for SMR zones for deadline and mq-deadline from Damien and
Christoph.
- Set of fixes for bcache by way of Michael Lyle, including fixes
from himself, Kent, Rui, Tang, and Coly.
- Series from Matias for lightnvm with fixes from Hans Holmberg,
Javier, and Matias. Mostly centered around pblk, and the removing
rrpc 1.2 in preparation for supporting 2.0.
- A couple of NVMe pull requests from Christoph. Nothing major in
here, just fixes and cleanups, and support for command tracing from
Johannes.
- Support for blk-throttle for tracking reads and writes separately.
From Joseph Qi. A few cleanups/fixes also for blk-throttle from
Weiping.
- Series from Mike Snitzer that enables dm to register its queue more
logically, something that's alwways been problematic on dm since
it's a stacked device.
- Series from Ming cleaning up some of the bio accessor use, in
preparation for supporting multipage bvecs.
- Various fixes from Ming closing up holes around queue mapping and
quiescing.
- BSD partition fix from Richard Narron, fixing a problem where we
can't mount newer (10/11) FreeBSD partitions.
- Series from Tejun reworking blk-mq timeout handling. The previous
scheme relied on atomic bits, but it had races where we would think
a request had timed out if it to reused at the wrong time.
- null_blk now supports faking timeouts, to enable us to better
exercise and test that functionality separately. From me.
- Kill the separate atomic poll bit in the request struct. After
this, we don't use the atomic bits on blk-mq anymore at all. From
me.
- sgl_alloc/free helpers from Bart.
- Heavily contended tag case scalability improvement from me.
- Various little fixes and cleanups from Arnd, Bart, Corentin,
Douglas, Eryu, Goldwyn, and myself"
* 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits)
block: remove smart1,2.h
nvme: add tracepoint for nvme_complete_rq
nvme: add tracepoint for nvme_setup_cmd
nvme-pci: introduce RECONNECTING state to mark initializing procedure
nvme-rdma: remove redundant boolean for inline_data
nvme: don't free uuid pointer before printing it
nvme-pci: Suspend queues after deleting them
bsg: use pr_debug instead of hand crafted macros
blk-mq-debugfs: don't allow write on attributes with seq_operations set
nvme-pci: Fix queue double allocations
block: Set BIO_TRACE_COMPLETION on new bio during split
blk-throttle: use queue_is_rq_based
block: Remove kblockd_schedule_delayed_work{,_on}()
blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays
blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly()
lib/scatterlist: Fix chaining support in sgl_alloc_order()
blk-throttle: track read and write request individually
block: add bdev_read_only() checks to common helpers
block: fail op_is_write() requests to read-only partitions
blk-throttle: export io_serviced_recursive, io_service_bytes_recursive
...
Diffstat (limited to 'include/linux/blkdev.h')
| -rw-r--r-- | include/linux/blkdev.h | 172 |
1 files changed, 155 insertions, 17 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0ce8a372d506..4f3df807cf8f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -27,6 +27,8 @@ | |||
| 27 | #include <linux/percpu-refcount.h> | 27 | #include <linux/percpu-refcount.h> |
| 28 | #include <linux/scatterlist.h> | 28 | #include <linux/scatterlist.h> |
| 29 | #include <linux/blkzoned.h> | 29 | #include <linux/blkzoned.h> |
| 30 | #include <linux/seqlock.h> | ||
| 31 | #include <linux/u64_stats_sync.h> | ||
| 30 | 32 | ||
| 31 | struct module; | 33 | struct module; |
| 32 | struct scsi_ioctl_command; | 34 | struct scsi_ioctl_command; |
| @@ -121,6 +123,12 @@ typedef __u32 __bitwise req_flags_t; | |||
| 121 | /* Look at ->special_vec for the actual data payload instead of the | 123 | /* Look at ->special_vec for the actual data payload instead of the |
| 122 | bio chain. */ | 124 | bio chain. */ |
| 123 | #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) | 125 | #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) |
| 126 | /* The per-zone write lock is held for this request */ | ||
| 127 | #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) | ||
| 128 | /* timeout is expired */ | ||
| 129 | #define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20)) | ||
| 130 | /* already slept for hybrid poll */ | ||
| 131 | #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21)) | ||
| 124 | 132 | ||
| 125 | /* flags that prevent us from merging requests: */ | 133 | /* flags that prevent us from merging requests: */ |
| 126 | #define RQF_NOMERGE_FLAGS \ | 134 | #define RQF_NOMERGE_FLAGS \ |
| @@ -133,12 +141,6 @@ typedef __u32 __bitwise req_flags_t; | |||
| 133 | * especially blk_mq_rq_ctx_init() to take care of the added fields. | 141 | * especially blk_mq_rq_ctx_init() to take care of the added fields. |
| 134 | */ | 142 | */ |
| 135 | struct request { | 143 | struct request { |
| 136 | struct list_head queuelist; | ||
| 137 | union { | ||
| 138 | struct __call_single_data csd; | ||
| 139 | u64 fifo_time; | ||
| 140 | }; | ||
| 141 | |||
| 142 | struct request_queue *q; | 144 | struct request_queue *q; |
| 143 | struct blk_mq_ctx *mq_ctx; | 145 | struct blk_mq_ctx *mq_ctx; |
| 144 | 146 | ||
| @@ -148,8 +150,6 @@ struct request { | |||
| 148 | 150 | ||
| 149 | int internal_tag; | 151 | int internal_tag; |
| 150 | 152 | ||
| 151 | unsigned long atomic_flags; | ||
| 152 | |||
| 153 | /* the following two fields are internal, NEVER access directly */ | 153 | /* the following two fields are internal, NEVER access directly */ |
| 154 | unsigned int __data_len; /* total data len */ | 154 | unsigned int __data_len; /* total data len */ |
| 155 | int tag; | 155 | int tag; |
| @@ -158,6 +158,8 @@ struct request { | |||
| 158 | struct bio *bio; | 158 | struct bio *bio; |
| 159 | struct bio *biotail; | 159 | struct bio *biotail; |
| 160 | 160 | ||
| 161 | struct list_head queuelist; | ||
| 162 | |||
| 161 | /* | 163 | /* |
| 162 | * The hash is used inside the scheduler, and killed once the | 164 | * The hash is used inside the scheduler, and killed once the |
| 163 | * request reaches the dispatch list. The ipi_list is only used | 165 | * request reaches the dispatch list. The ipi_list is only used |
| @@ -205,19 +207,16 @@ struct request { | |||
| 205 | struct hd_struct *part; | 207 | struct hd_struct *part; |
| 206 | unsigned long start_time; | 208 | unsigned long start_time; |
| 207 | struct blk_issue_stat issue_stat; | 209 | struct blk_issue_stat issue_stat; |
| 208 | #ifdef CONFIG_BLK_CGROUP | ||
| 209 | struct request_list *rl; /* rl this rq is alloced from */ | ||
| 210 | unsigned long long start_time_ns; | ||
| 211 | unsigned long long io_start_time_ns; /* when passed to hardware */ | ||
| 212 | #endif | ||
| 213 | /* Number of scatter-gather DMA addr+len pairs after | 210 | /* Number of scatter-gather DMA addr+len pairs after |
| 214 | * physical address coalescing is performed. | 211 | * physical address coalescing is performed. |
| 215 | */ | 212 | */ |
| 216 | unsigned short nr_phys_segments; | 213 | unsigned short nr_phys_segments; |
| 214 | |||
| 217 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | 215 | #if defined(CONFIG_BLK_DEV_INTEGRITY) |
| 218 | unsigned short nr_integrity_segments; | 216 | unsigned short nr_integrity_segments; |
| 219 | #endif | 217 | #endif |
| 220 | 218 | ||
| 219 | unsigned short write_hint; | ||
| 221 | unsigned short ioprio; | 220 | unsigned short ioprio; |
| 222 | 221 | ||
| 223 | unsigned int timeout; | 222 | unsigned int timeout; |
| @@ -226,11 +225,37 @@ struct request { | |||
| 226 | 225 | ||
| 227 | unsigned int extra_len; /* length of alignment and padding */ | 226 | unsigned int extra_len; /* length of alignment and padding */ |
| 228 | 227 | ||
| 229 | unsigned short write_hint; | 228 | /* |
| 229 | * On blk-mq, the lower bits of ->gstate (generation number and | ||
| 230 | * state) carry the MQ_RQ_* state value and the upper bits the | ||
| 231 | * generation number which is monotonically incremented and used to | ||
| 232 | * distinguish the reuse instances. | ||
| 233 | * | ||
| 234 | * ->gstate_seq allows updates to ->gstate and other fields | ||
| 235 | * (currently ->deadline) during request start to be read | ||
| 236 | * atomically from the timeout path, so that it can operate on a | ||
| 237 | * coherent set of information. | ||
| 238 | */ | ||
| 239 | seqcount_t gstate_seq; | ||
| 240 | u64 gstate; | ||
| 241 | |||
| 242 | /* | ||
| 243 | * ->aborted_gstate is used by the timeout to claim a specific | ||
| 244 | * recycle instance of this request. See blk_mq_timeout_work(). | ||
| 245 | */ | ||
| 246 | struct u64_stats_sync aborted_gstate_sync; | ||
| 247 | u64 aborted_gstate; | ||
| 248 | |||
| 249 | /* access through blk_rq_set_deadline, blk_rq_deadline */ | ||
| 250 | unsigned long __deadline; | ||
| 230 | 251 | ||
| 231 | unsigned long deadline; | ||
| 232 | struct list_head timeout_list; | 252 | struct list_head timeout_list; |
| 233 | 253 | ||
| 254 | union { | ||
| 255 | struct __call_single_data csd; | ||
| 256 | u64 fifo_time; | ||
| 257 | }; | ||
| 258 | |||
| 234 | /* | 259 | /* |
| 235 | * completion callback. | 260 | * completion callback. |
| 236 | */ | 261 | */ |
| @@ -239,6 +264,12 @@ struct request { | |||
| 239 | 264 | ||
| 240 | /* for bidi */ | 265 | /* for bidi */ |
| 241 | struct request *next_rq; | 266 | struct request *next_rq; |
| 267 | |||
| 268 | #ifdef CONFIG_BLK_CGROUP | ||
| 269 | struct request_list *rl; /* rl this rq is alloced from */ | ||
| 270 | unsigned long long start_time_ns; | ||
| 271 | unsigned long long io_start_time_ns; /* when passed to hardware */ | ||
| 272 | #endif | ||
| 242 | }; | 273 | }; |
| 243 | 274 | ||
| 244 | static inline bool blk_op_is_scsi(unsigned int op) | 275 | static inline bool blk_op_is_scsi(unsigned int op) |
| @@ -564,6 +595,22 @@ struct request_queue { | |||
| 564 | struct queue_limits limits; | 595 | struct queue_limits limits; |
| 565 | 596 | ||
| 566 | /* | 597 | /* |
| 598 | * Zoned block device information for request dispatch control. | ||
| 599 | * nr_zones is the total number of zones of the device. This is always | ||
| 600 | * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones | ||
| 601 | * bits which indicates if a zone is conventional (bit clear) or | ||
| 602 | * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones | ||
| 603 | * bits which indicates if a zone is write locked, that is, if a write | ||
| 604 | * request targeting the zone was dispatched. All three fields are | ||
| 605 | * initialized by the low level device driver (e.g. scsi/sd.c). | ||
| 606 | * Stacking drivers (device mappers) may or may not initialize | ||
| 607 | * these fields. | ||
| 608 | */ | ||
| 609 | unsigned int nr_zones; | ||
| 610 | unsigned long *seq_zones_bitmap; | ||
| 611 | unsigned long *seq_zones_wlock; | ||
| 612 | |||
| 613 | /* | ||
| 567 | * sg stuff | 614 | * sg stuff |
| 568 | */ | 615 | */ |
| 569 | unsigned int sg_timeout; | 616 | unsigned int sg_timeout; |
| @@ -807,6 +854,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) | |||
| 807 | return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; | 854 | return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; |
| 808 | } | 855 | } |
| 809 | 856 | ||
| 857 | static inline unsigned int blk_queue_nr_zones(struct request_queue *q) | ||
| 858 | { | ||
| 859 | return q->nr_zones; | ||
| 860 | } | ||
| 861 | |||
| 862 | static inline unsigned int blk_queue_zone_no(struct request_queue *q, | ||
| 863 | sector_t sector) | ||
| 864 | { | ||
| 865 | if (!blk_queue_is_zoned(q)) | ||
| 866 | return 0; | ||
| 867 | return sector >> ilog2(q->limits.chunk_sectors); | ||
| 868 | } | ||
| 869 | |||
| 870 | static inline bool blk_queue_zone_is_seq(struct request_queue *q, | ||
| 871 | sector_t sector) | ||
| 872 | { | ||
| 873 | if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) | ||
| 874 | return false; | ||
| 875 | return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); | ||
| 876 | } | ||
| 877 | |||
| 810 | static inline bool rq_is_sync(struct request *rq) | 878 | static inline bool rq_is_sync(struct request *rq) |
| 811 | { | 879 | { |
| 812 | return op_is_sync(rq->cmd_flags); | 880 | return op_is_sync(rq->cmd_flags); |
| @@ -1046,6 +1114,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) | |||
| 1046 | return blk_rq_cur_bytes(rq) >> 9; | 1114 | return blk_rq_cur_bytes(rq) >> 9; |
| 1047 | } | 1115 | } |
| 1048 | 1116 | ||
| 1117 | static inline unsigned int blk_rq_zone_no(struct request *rq) | ||
| 1118 | { | ||
| 1119 | return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | static inline unsigned int blk_rq_zone_is_seq(struct request *rq) | ||
| 1123 | { | ||
| 1124 | return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); | ||
| 1125 | } | ||
| 1126 | |||
| 1049 | /* | 1127 | /* |
| 1050 | * Some commands like WRITE SAME have a payload or data transfer size which | 1128 | * Some commands like WRITE SAME have a payload or data transfer size which |
| 1051 | * is different from the size of the request. Any driver that supports such | 1129 | * is different from the size of the request. Any driver that supports such |
| @@ -1595,7 +1673,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev) | |||
| 1595 | 1673 | ||
| 1596 | if (q) | 1674 | if (q) |
| 1597 | return blk_queue_zone_sectors(q); | 1675 | return blk_queue_zone_sectors(q); |
| 1676 | return 0; | ||
| 1677 | } | ||
| 1678 | |||
| 1679 | static inline unsigned int bdev_nr_zones(struct block_device *bdev) | ||
| 1680 | { | ||
| 1681 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 1598 | 1682 | ||
| 1683 | if (q) | ||
| 1684 | return blk_queue_nr_zones(q); | ||
| 1599 | return 0; | 1685 | return 0; |
| 1600 | } | 1686 | } |
| 1601 | 1687 | ||
| @@ -1731,8 +1817,6 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio) | |||
| 1731 | 1817 | ||
| 1732 | int kblockd_schedule_work(struct work_struct *work); | 1818 | int kblockd_schedule_work(struct work_struct *work); |
| 1733 | int kblockd_schedule_work_on(int cpu, struct work_struct *work); | 1819 | int kblockd_schedule_work_on(int cpu, struct work_struct *work); |
| 1734 | int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); | ||
| 1735 | int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); | ||
| 1736 | int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); | 1820 | int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); |
| 1737 | 1821 | ||
| 1738 | #ifdef CONFIG_BLK_CGROUP | 1822 | #ifdef CONFIG_BLK_CGROUP |
| @@ -1971,6 +2055,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, | |||
| 1971 | extern int bdev_read_page(struct block_device *, sector_t, struct page *); | 2055 | extern int bdev_read_page(struct block_device *, sector_t, struct page *); |
| 1972 | extern int bdev_write_page(struct block_device *, sector_t, struct page *, | 2056 | extern int bdev_write_page(struct block_device *, sector_t, struct page *, |
| 1973 | struct writeback_control *); | 2057 | struct writeback_control *); |
| 2058 | |||
| 2059 | #ifdef CONFIG_BLK_DEV_ZONED | ||
| 2060 | bool blk_req_needs_zone_write_lock(struct request *rq); | ||
| 2061 | void __blk_req_zone_write_lock(struct request *rq); | ||
| 2062 | void __blk_req_zone_write_unlock(struct request *rq); | ||
| 2063 | |||
| 2064 | static inline void blk_req_zone_write_lock(struct request *rq) | ||
| 2065 | { | ||
| 2066 | if (blk_req_needs_zone_write_lock(rq)) | ||
| 2067 | __blk_req_zone_write_lock(rq); | ||
| 2068 | } | ||
| 2069 | |||
| 2070 | static inline void blk_req_zone_write_unlock(struct request *rq) | ||
| 2071 | { | ||
| 2072 | if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) | ||
| 2073 | __blk_req_zone_write_unlock(rq); | ||
| 2074 | } | ||
| 2075 | |||
| 2076 | static inline bool blk_req_zone_is_write_locked(struct request *rq) | ||
| 2077 | { | ||
| 2078 | return rq->q->seq_zones_wlock && | ||
| 2079 | test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); | ||
| 2080 | } | ||
| 2081 | |||
| 2082 | static inline bool blk_req_can_dispatch_to_zone(struct request *rq) | ||
| 2083 | { | ||
| 2084 | if (!blk_req_needs_zone_write_lock(rq)) | ||
| 2085 | return true; | ||
| 2086 | return !blk_req_zone_is_write_locked(rq); | ||
| 2087 | } | ||
| 2088 | #else | ||
| 2089 | static inline bool blk_req_needs_zone_write_lock(struct request *rq) | ||
| 2090 | { | ||
| 2091 | return false; | ||
| 2092 | } | ||
| 2093 | |||
| 2094 | static inline void blk_req_zone_write_lock(struct request *rq) | ||
| 2095 | { | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | static inline void blk_req_zone_write_unlock(struct request *rq) | ||
| 2099 | { | ||
| 2100 | } | ||
| 2101 | static inline bool blk_req_zone_is_write_locked(struct request *rq) | ||
| 2102 | { | ||
| 2103 | return false; | ||
| 2104 | } | ||
| 2105 | |||
| 2106 | static inline bool blk_req_can_dispatch_to_zone(struct request *rq) | ||
| 2107 | { | ||
| 2108 | return true; | ||
| 2109 | } | ||
| 2110 | #endif /* CONFIG_BLK_DEV_ZONED */ | ||
| 2111 | |||
| 1974 | #else /* CONFIG_BLOCK */ | 2112 | #else /* CONFIG_BLOCK */ |
| 1975 | 2113 | ||
| 1976 | struct block_device; | 2114 | struct block_device; |
