aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2017-12-21 01:43:38 -0500
committerJens Axboe <axboe@kernel.dk>2018-01-05 11:22:17 -0500
commit6cc77e9cb08041627fe1d32ac3a743249deb8167 (patch)
tree66303b9eb83c696b900fafc4f0c446acd6e2d72e /include/linux
parent882d4171a8950646413b1a3cbe0e4a6a612fe82e (diff)
block: introduce zoned block devices zone write locking
Components relying only on the request_queue structure for accessing block devices (e.g. I/O schedulers) have a limited knowledged of the device characteristics. In particular, the device capacity cannot be easily discovered, which for a zoned block device also result in the inability to easily know the number of zones of the device (the zone size is indicated by the chunk_sectors field of the queue limits). Introduce the nr_zones field to the request_queue structure to simplify access to this information. Also, add the bitmap seq_zone_bitmap which indicates which zones of the device are sequential zones (write preferred or write required) and the bitmap seq_zones_wlock which indicates if a zone is write locked, that is, if a write request targeting a zone was dispatched to the device. These fields are initialized by the low level block device driver (sd.c for ZBC/ZAC disks). They are not initialized by stacking drivers (device mappers) handling zoned block devices (e.g. dm-linear). Using this, I/O schedulers can introduce zone write locking to control request dispatching to a zoned block device and avoid write request reordering by limiting to at most a single write request per zone outside of the scheduler at any time. Based on previous patches from Damien Le Moal. Signed-off-by: Christoph Hellwig <hch@lst.de> [Damien] * Fixed comments and identation in blkdev.h * Changed helper functions * Fixed this commit message Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/blkdev.h111
1 files changed, 111 insertions, 0 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8089ca17db9a..46e606f5b44b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -121,6 +121,8 @@ typedef __u32 __bitwise req_flags_t;
121/* Look at ->special_vec for the actual data payload instead of the 121/* Look at ->special_vec for the actual data payload instead of the
122 bio chain. */ 122 bio chain. */
123#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) 123#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
124/* The per-zone write lock is held for this request */
125#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
124 126
125/* flags that prevent us from merging requests: */ 127/* flags that prevent us from merging requests: */
126#define RQF_NOMERGE_FLAGS \ 128#define RQF_NOMERGE_FLAGS \
@@ -547,6 +549,22 @@ struct request_queue {
547 struct queue_limits limits; 549 struct queue_limits limits;
548 550
549 /* 551 /*
552 * Zoned block device information for request dispatch control.
553 * nr_zones is the total number of zones of the device. This is always
554 * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
555 * bits which indicates if a zone is conventional (bit clear) or
556 * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
557 * bits which indicates if a zone is write locked, that is, if a write
558 * request targeting the zone was dispatched. All three fields are
559 * initialized by the low level device driver (e.g. scsi/sd.c).
560 * Stacking drivers (device mappers) may or may not initialize
561 * these fields.
562 */
563 unsigned int nr_zones;
564 unsigned long *seq_zones_bitmap;
565 unsigned long *seq_zones_wlock;
566
567 /*
550 * sg stuff 568 * sg stuff
551 */ 569 */
552 unsigned int sg_timeout; 570 unsigned int sg_timeout;
@@ -790,6 +808,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
790 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; 808 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
791} 809}
792 810
811static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
812{
813 return q->nr_zones;
814}
815
816static inline unsigned int blk_queue_zone_no(struct request_queue *q,
817 sector_t sector)
818{
819 if (!blk_queue_is_zoned(q))
820 return 0;
821 return sector >> ilog2(q->limits.chunk_sectors);
822}
823
824static inline bool blk_queue_zone_is_seq(struct request_queue *q,
825 sector_t sector)
826{
827 if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
828 return false;
829 return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
830}
831
793static inline bool rq_is_sync(struct request *rq) 832static inline bool rq_is_sync(struct request *rq)
794{ 833{
795 return op_is_sync(rq->cmd_flags); 834 return op_is_sync(rq->cmd_flags);
@@ -1029,6 +1068,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
1029 return blk_rq_cur_bytes(rq) >> 9; 1068 return blk_rq_cur_bytes(rq) >> 9;
1030} 1069}
1031 1070
1071static inline unsigned int blk_rq_zone_no(struct request *rq)
1072{
1073 return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
1074}
1075
1076static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
1077{
1078 return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
1079}
1080
1032/* 1081/*
1033 * Some commands like WRITE SAME have a payload or data transfer size which 1082 * Some commands like WRITE SAME have a payload or data transfer size which
1034 * is different from the size of the request. Any driver that supports such 1083 * is different from the size of the request. Any driver that supports such
@@ -1578,7 +1627,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
1578 1627
1579 if (q) 1628 if (q)
1580 return blk_queue_zone_sectors(q); 1629 return blk_queue_zone_sectors(q);
1630 return 0;
1631}
1581 1632
1633static inline unsigned int bdev_nr_zones(struct block_device *bdev)
1634{
1635 struct request_queue *q = bdev_get_queue(bdev);
1636
1637 if (q)
1638 return blk_queue_nr_zones(q);
1582 return 0; 1639 return 0;
1583} 1640}
1584 1641
@@ -1954,6 +2011,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
1954extern int bdev_read_page(struct block_device *, sector_t, struct page *); 2011extern int bdev_read_page(struct block_device *, sector_t, struct page *);
1955extern int bdev_write_page(struct block_device *, sector_t, struct page *, 2012extern int bdev_write_page(struct block_device *, sector_t, struct page *,
1956 struct writeback_control *); 2013 struct writeback_control *);
2014
2015#ifdef CONFIG_BLK_DEV_ZONED
2016bool blk_req_needs_zone_write_lock(struct request *rq);
2017void __blk_req_zone_write_lock(struct request *rq);
2018void __blk_req_zone_write_unlock(struct request *rq);
2019
2020static inline void blk_req_zone_write_lock(struct request *rq)
2021{
2022 if (blk_req_needs_zone_write_lock(rq))
2023 __blk_req_zone_write_lock(rq);
2024}
2025
2026static inline void blk_req_zone_write_unlock(struct request *rq)
2027{
2028 if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
2029 __blk_req_zone_write_unlock(rq);
2030}
2031
2032static inline bool blk_req_zone_is_write_locked(struct request *rq)
2033{
2034 return rq->q->seq_zones_wlock &&
2035 test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
2036}
2037
2038static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
2039{
2040 if (!blk_req_needs_zone_write_lock(rq))
2041 return true;
2042 return !blk_req_zone_is_write_locked(rq);
2043}
2044#else
2045static inline bool blk_req_needs_zone_write_lock(struct request *rq)
2046{
2047 return false;
2048}
2049
2050static inline void blk_req_zone_write_lock(struct request *rq)
2051{
2052}
2053
2054static inline void blk_req_zone_write_unlock(struct request *rq)
2055{
2056}
2057static inline bool blk_req_zone_is_write_locked(struct request *rq)
2058{
2059 return false;
2060}
2061
2062static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
2063{
2064 return true;
2065}
2066#endif /* CONFIG_BLK_DEV_ZONED */
2067
1957#else /* CONFIG_BLOCK */ 2068#else /* CONFIG_BLOCK */
1958 2069
1959struct block_device; 2070struct block_device;