diff options
| author | Christoph Hellwig <hch@lst.de> | 2017-12-21 01:43:38 -0500 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2018-01-05 11:22:17 -0500 |
| commit | 6cc77e9cb08041627fe1d32ac3a743249deb8167 (patch) | |
| tree | 66303b9eb83c696b900fafc4f0c446acd6e2d72e /include/linux/blkdev.h | |
| parent | 882d4171a8950646413b1a3cbe0e4a6a612fe82e (diff) | |
block: introduce zoned block devices zone write locking
Components relying only on the request_queue structure for accessing
block devices (e.g. I/O schedulers) have a limited knowledged of the
device characteristics. In particular, the device capacity cannot be
easily discovered, which for a zoned block device also result in the
inability to easily know the number of zones of the device (the zone
size is indicated by the chunk_sectors field of the queue limits).
Introduce the nr_zones field to the request_queue structure to simplify
access to this information. Also, add the bitmap seq_zone_bitmap which
indicates which zones of the device are sequential zones (write
preferred or write required) and the bitmap seq_zones_wlock which
indicates if a zone is write locked, that is, if a write request
targeting a zone was dispatched to the device. These fields are
initialized by the low level block device driver (sd.c for ZBC/ZAC
disks). They are not initialized by stacking drivers (device mappers)
handling zoned block devices (e.g. dm-linear).
Using this, I/O schedulers can introduce zone write locking to control
request dispatching to a zoned block device and avoid write request
reordering by limiting to at most a single write request per zone
outside of the scheduler at any time.
Based on previous patches from Damien Le Moal.
Signed-off-by: Christoph Hellwig <hch@lst.de>
[Damien]
* Fixed comments and identation in blkdev.h
* Changed helper functions
* Fixed this commit message
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include/linux/blkdev.h')
| -rw-r--r-- | include/linux/blkdev.h | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..46e606f5b44b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -121,6 +121,8 @@ typedef __u32 __bitwise req_flags_t; | |||
| 121 | /* Look at ->special_vec for the actual data payload instead of the | 121 | /* Look at ->special_vec for the actual data payload instead of the |
| 122 | bio chain. */ | 122 | bio chain. */ |
| 123 | #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) | 123 | #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) |
| 124 | /* The per-zone write lock is held for this request */ | ||
| 125 | #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) | ||
| 124 | 126 | ||
| 125 | /* flags that prevent us from merging requests: */ | 127 | /* flags that prevent us from merging requests: */ |
| 126 | #define RQF_NOMERGE_FLAGS \ | 128 | #define RQF_NOMERGE_FLAGS \ |
| @@ -547,6 +549,22 @@ struct request_queue { | |||
| 547 | struct queue_limits limits; | 549 | struct queue_limits limits; |
| 548 | 550 | ||
| 549 | /* | 551 | /* |
| 552 | * Zoned block device information for request dispatch control. | ||
| 553 | * nr_zones is the total number of zones of the device. This is always | ||
| 554 | * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones | ||
| 555 | * bits which indicates if a zone is conventional (bit clear) or | ||
| 556 | * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones | ||
| 557 | * bits which indicates if a zone is write locked, that is, if a write | ||
| 558 | * request targeting the zone was dispatched. All three fields are | ||
| 559 | * initialized by the low level device driver (e.g. scsi/sd.c). | ||
| 560 | * Stacking drivers (device mappers) may or may not initialize | ||
| 561 | * these fields. | ||
| 562 | */ | ||
| 563 | unsigned int nr_zones; | ||
| 564 | unsigned long *seq_zones_bitmap; | ||
| 565 | unsigned long *seq_zones_wlock; | ||
| 566 | |||
| 567 | /* | ||
| 550 | * sg stuff | 568 | * sg stuff |
| 551 | */ | 569 | */ |
| 552 | unsigned int sg_timeout; | 570 | unsigned int sg_timeout; |
| @@ -790,6 +808,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) | |||
| 790 | return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; | 808 | return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; |
| 791 | } | 809 | } |
| 792 | 810 | ||
| 811 | static inline unsigned int blk_queue_nr_zones(struct request_queue *q) | ||
| 812 | { | ||
| 813 | return q->nr_zones; | ||
| 814 | } | ||
| 815 | |||
| 816 | static inline unsigned int blk_queue_zone_no(struct request_queue *q, | ||
| 817 | sector_t sector) | ||
| 818 | { | ||
| 819 | if (!blk_queue_is_zoned(q)) | ||
| 820 | return 0; | ||
| 821 | return sector >> ilog2(q->limits.chunk_sectors); | ||
| 822 | } | ||
| 823 | |||
| 824 | static inline bool blk_queue_zone_is_seq(struct request_queue *q, | ||
| 825 | sector_t sector) | ||
| 826 | { | ||
| 827 | if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) | ||
| 828 | return false; | ||
| 829 | return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); | ||
| 830 | } | ||
| 831 | |||
| 793 | static inline bool rq_is_sync(struct request *rq) | 832 | static inline bool rq_is_sync(struct request *rq) |
| 794 | { | 833 | { |
| 795 | return op_is_sync(rq->cmd_flags); | 834 | return op_is_sync(rq->cmd_flags); |
| @@ -1029,6 +1068,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) | |||
| 1029 | return blk_rq_cur_bytes(rq) >> 9; | 1068 | return blk_rq_cur_bytes(rq) >> 9; |
| 1030 | } | 1069 | } |
| 1031 | 1070 | ||
| 1071 | static inline unsigned int blk_rq_zone_no(struct request *rq) | ||
| 1072 | { | ||
| 1073 | return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | static inline unsigned int blk_rq_zone_is_seq(struct request *rq) | ||
| 1077 | { | ||
| 1078 | return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); | ||
| 1079 | } | ||
| 1080 | |||
| 1032 | /* | 1081 | /* |
| 1033 | * Some commands like WRITE SAME have a payload or data transfer size which | 1082 | * Some commands like WRITE SAME have a payload or data transfer size which |
| 1034 | * is different from the size of the request. Any driver that supports such | 1083 | * is different from the size of the request. Any driver that supports such |
| @@ -1578,7 +1627,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev) | |||
| 1578 | 1627 | ||
| 1579 | if (q) | 1628 | if (q) |
| 1580 | return blk_queue_zone_sectors(q); | 1629 | return blk_queue_zone_sectors(q); |
| 1630 | return 0; | ||
| 1631 | } | ||
| 1581 | 1632 | ||
| 1633 | static inline unsigned int bdev_nr_zones(struct block_device *bdev) | ||
| 1634 | { | ||
| 1635 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 1636 | |||
| 1637 | if (q) | ||
| 1638 | return blk_queue_nr_zones(q); | ||
| 1582 | return 0; | 1639 | return 0; |
| 1583 | } | 1640 | } |
| 1584 | 1641 | ||
| @@ -1954,6 +2011,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, | |||
| 1954 | extern int bdev_read_page(struct block_device *, sector_t, struct page *); | 2011 | extern int bdev_read_page(struct block_device *, sector_t, struct page *); |
| 1955 | extern int bdev_write_page(struct block_device *, sector_t, struct page *, | 2012 | extern int bdev_write_page(struct block_device *, sector_t, struct page *, |
| 1956 | struct writeback_control *); | 2013 | struct writeback_control *); |
| 2014 | |||
| 2015 | #ifdef CONFIG_BLK_DEV_ZONED | ||
| 2016 | bool blk_req_needs_zone_write_lock(struct request *rq); | ||
| 2017 | void __blk_req_zone_write_lock(struct request *rq); | ||
| 2018 | void __blk_req_zone_write_unlock(struct request *rq); | ||
| 2019 | |||
| 2020 | static inline void blk_req_zone_write_lock(struct request *rq) | ||
| 2021 | { | ||
| 2022 | if (blk_req_needs_zone_write_lock(rq)) | ||
| 2023 | __blk_req_zone_write_lock(rq); | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | static inline void blk_req_zone_write_unlock(struct request *rq) | ||
| 2027 | { | ||
| 2028 | if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) | ||
| 2029 | __blk_req_zone_write_unlock(rq); | ||
| 2030 | } | ||
| 2031 | |||
| 2032 | static inline bool blk_req_zone_is_write_locked(struct request *rq) | ||
| 2033 | { | ||
| 2034 | return rq->q->seq_zones_wlock && | ||
| 2035 | test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | static inline bool blk_req_can_dispatch_to_zone(struct request *rq) | ||
| 2039 | { | ||
| 2040 | if (!blk_req_needs_zone_write_lock(rq)) | ||
| 2041 | return true; | ||
| 2042 | return !blk_req_zone_is_write_locked(rq); | ||
| 2043 | } | ||
| 2044 | #else | ||
| 2045 | static inline bool blk_req_needs_zone_write_lock(struct request *rq) | ||
| 2046 | { | ||
| 2047 | return false; | ||
| 2048 | } | ||
| 2049 | |||
| 2050 | static inline void blk_req_zone_write_lock(struct request *rq) | ||
| 2051 | { | ||
| 2052 | } | ||
| 2053 | |||
| 2054 | static inline void blk_req_zone_write_unlock(struct request *rq) | ||
| 2055 | { | ||
| 2056 | } | ||
| 2057 | static inline bool blk_req_zone_is_write_locked(struct request *rq) | ||
| 2058 | { | ||
| 2059 | return false; | ||
| 2060 | } | ||
| 2061 | |||
| 2062 | static inline bool blk_req_can_dispatch_to_zone(struct request *rq) | ||
| 2063 | { | ||
| 2064 | return true; | ||
| 2065 | } | ||
| 2066 | #endif /* CONFIG_BLK_DEV_ZONED */ | ||
| 2067 | |||
| 1957 | #else /* CONFIG_BLOCK */ | 2068 | #else /* CONFIG_BLOCK */ |
| 1958 | 2069 | ||
| 1959 | struct block_device; | 2070 | struct block_device; |
