aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 14:51:49 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 14:51:49 -0500
commit0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
treecefccd67dc1f27bb45830f6b8065dd4a1c05e83b /include/linux
parent9697e9da84299d0d715d515dd2cc48f1eceb277d (diff)
parent796baeeef85a40b3495a907fb7425086e7010102 (diff)
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio.h24
-rw-r--r--include/linux/blk-cgroup.h8
-rw-r--r--include/linux/blk-mq.h3
-rw-r--r--include/linux/blk_types.h28
-rw-r--r--include/linux/blkdev.h172
-rw-r--r--include/linux/bvec.h9
-rw-r--r--include/linux/elevator.h2
-rw-r--r--include/linux/genhd.h5
-rw-r--r--include/linux/lightnvm.h125
-rw-r--r--include/linux/nvme.h22
-rw-r--r--include/linux/scatterlist.h11
11 files changed, 290 insertions, 119 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 23d29b39f71e..d0eb659fa733 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -300,6 +300,29 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
300 bv->bv_len = iter.bi_bvec_done; 300 bv->bv_len = iter.bi_bvec_done;
301} 301}
302 302
303static inline unsigned bio_pages_all(struct bio *bio)
304{
305 WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
306 return bio->bi_vcnt;
307}
308
309static inline struct bio_vec *bio_first_bvec_all(struct bio *bio)
310{
311 WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
312 return bio->bi_io_vec;
313}
314
315static inline struct page *bio_first_page_all(struct bio *bio)
316{
317 return bio_first_bvec_all(bio)->bv_page;
318}
319
320static inline struct bio_vec *bio_last_bvec_all(struct bio *bio)
321{
322 WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
323 return &bio->bi_io_vec[bio->bi_vcnt - 1];
324}
325
303enum bip_flags { 326enum bip_flags {
304 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ 327 BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
305 BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ 328 BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
@@ -477,7 +500,6 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
477#endif 500#endif
478 501
479extern void bio_copy_data(struct bio *dst, struct bio *src); 502extern void bio_copy_data(struct bio *dst, struct bio *src);
480extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
481extern void bio_free_pages(struct bio *bio); 503extern void bio_free_pages(struct bio *bio);
482 504
483extern struct bio *bio_copy_user_iov(struct request_queue *, 505extern struct bio *bio_copy_user_iov(struct request_queue *,
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index e9825ff57b15..69bea82ebeb1 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -660,12 +660,14 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
660static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, 660static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
661 struct blkg_rwstat *from) 661 struct blkg_rwstat *from)
662{ 662{
663 struct blkg_rwstat v = blkg_rwstat_read(from); 663 u64 sum[BLKG_RWSTAT_NR];
664 int i; 664 int i;
665 665
666 for (i = 0; i < BLKG_RWSTAT_NR; i++) 666 for (i = 0; i < BLKG_RWSTAT_NR; i++)
667 atomic64_add(atomic64_read(&v.aux_cnt[i]) + 667 sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
668 atomic64_read(&from->aux_cnt[i]), 668
669 for (i = 0; i < BLKG_RWSTAT_NR; i++)
670 atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
669 &to->aux_cnt[i]); 671 &to->aux_cnt[i]);
670} 672}
671 673
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 95c9a5c862e2..8efcf49796a3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -51,6 +51,7 @@ struct blk_mq_hw_ctx {
51 unsigned int queue_num; 51 unsigned int queue_num;
52 52
53 atomic_t nr_active; 53 atomic_t nr_active;
54 unsigned int nr_expired;
54 55
55 struct hlist_node cpuhp_dead; 56 struct hlist_node cpuhp_dead;
56 struct kobject kobj; 57 struct kobject kobj;
@@ -65,7 +66,7 @@ struct blk_mq_hw_ctx {
65#endif 66#endif
66 67
67 /* Must be the last member - see also blk_mq_hw_ctx_size(). */ 68 /* Must be the last member - see also blk_mq_hw_ctx_size(). */
68 struct srcu_struct queue_rq_srcu[0]; 69 struct srcu_struct srcu[0];
69}; 70};
70 71
71struct blk_mq_tag_set { 72struct blk_mq_tag_set {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 9e7d8bd776d2..c5d3db0d83f8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,6 +39,34 @@ typedef u8 __bitwise blk_status_t;
39 39
40#define BLK_STS_AGAIN ((__force blk_status_t)12) 40#define BLK_STS_AGAIN ((__force blk_status_t)12)
41 41
42/**
43 * blk_path_error - returns true if error may be path related
44 * @error: status the request was completed with
45 *
46 * Description:
47 * This classifies block error status into non-retryable errors and ones
48 * that may be successful if retried on a failover path.
49 *
50 * Return:
51 * %false - retrying failover path will not help
52 * %true - may succeed if retried
53 */
54static inline bool blk_path_error(blk_status_t error)
55{
56 switch (error) {
57 case BLK_STS_NOTSUPP:
58 case BLK_STS_NOSPC:
59 case BLK_STS_TARGET:
60 case BLK_STS_NEXUS:
61 case BLK_STS_MEDIUM:
62 case BLK_STS_PROTECTION:
63 return false;
64 }
65
66 /* Anything else could be a path failure, so should be retried */
67 return true;
68}
69
42struct blk_issue_stat { 70struct blk_issue_stat {
43 u64 stat; 71 u64 stat;
44}; 72};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0ce8a372d506..4f3df807cf8f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,6 +27,8 @@
27#include <linux/percpu-refcount.h> 27#include <linux/percpu-refcount.h>
28#include <linux/scatterlist.h> 28#include <linux/scatterlist.h>
29#include <linux/blkzoned.h> 29#include <linux/blkzoned.h>
30#include <linux/seqlock.h>
31#include <linux/u64_stats_sync.h>
30 32
31struct module; 33struct module;
32struct scsi_ioctl_command; 34struct scsi_ioctl_command;
@@ -121,6 +123,12 @@ typedef __u32 __bitwise req_flags_t;
121/* Look at ->special_vec for the actual data payload instead of the 123/* Look at ->special_vec for the actual data payload instead of the
122 bio chain. */ 124 bio chain. */
123#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) 125#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
126/* The per-zone write lock is held for this request */
127#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
128/* timeout is expired */
129#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
130/* already slept for hybrid poll */
131#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
124 132
125/* flags that prevent us from merging requests: */ 133/* flags that prevent us from merging requests: */
126#define RQF_NOMERGE_FLAGS \ 134#define RQF_NOMERGE_FLAGS \
@@ -133,12 +141,6 @@ typedef __u32 __bitwise req_flags_t;
133 * especially blk_mq_rq_ctx_init() to take care of the added fields. 141 * especially blk_mq_rq_ctx_init() to take care of the added fields.
134 */ 142 */
135struct request { 143struct request {
136 struct list_head queuelist;
137 union {
138 struct __call_single_data csd;
139 u64 fifo_time;
140 };
141
142 struct request_queue *q; 144 struct request_queue *q;
143 struct blk_mq_ctx *mq_ctx; 145 struct blk_mq_ctx *mq_ctx;
144 146
@@ -148,8 +150,6 @@ struct request {
148 150
149 int internal_tag; 151 int internal_tag;
150 152
151 unsigned long atomic_flags;
152
153 /* the following two fields are internal, NEVER access directly */ 153 /* the following two fields are internal, NEVER access directly */
154 unsigned int __data_len; /* total data len */ 154 unsigned int __data_len; /* total data len */
155 int tag; 155 int tag;
@@ -158,6 +158,8 @@ struct request {
158 struct bio *bio; 158 struct bio *bio;
159 struct bio *biotail; 159 struct bio *biotail;
160 160
161 struct list_head queuelist;
162
161 /* 163 /*
162 * The hash is used inside the scheduler, and killed once the 164 * The hash is used inside the scheduler, and killed once the
163 * request reaches the dispatch list. The ipi_list is only used 165 * request reaches the dispatch list. The ipi_list is only used
@@ -205,19 +207,16 @@ struct request {
205 struct hd_struct *part; 207 struct hd_struct *part;
206 unsigned long start_time; 208 unsigned long start_time;
207 struct blk_issue_stat issue_stat; 209 struct blk_issue_stat issue_stat;
208#ifdef CONFIG_BLK_CGROUP
209 struct request_list *rl; /* rl this rq is alloced from */
210 unsigned long long start_time_ns;
211 unsigned long long io_start_time_ns; /* when passed to hardware */
212#endif
213 /* Number of scatter-gather DMA addr+len pairs after 210 /* Number of scatter-gather DMA addr+len pairs after
214 * physical address coalescing is performed. 211 * physical address coalescing is performed.
215 */ 212 */
216 unsigned short nr_phys_segments; 213 unsigned short nr_phys_segments;
214
217#if defined(CONFIG_BLK_DEV_INTEGRITY) 215#if defined(CONFIG_BLK_DEV_INTEGRITY)
218 unsigned short nr_integrity_segments; 216 unsigned short nr_integrity_segments;
219#endif 217#endif
220 218
219 unsigned short write_hint;
221 unsigned short ioprio; 220 unsigned short ioprio;
222 221
223 unsigned int timeout; 222 unsigned int timeout;
@@ -226,11 +225,37 @@ struct request {
226 225
227 unsigned int extra_len; /* length of alignment and padding */ 226 unsigned int extra_len; /* length of alignment and padding */
228 227
229 unsigned short write_hint; 228 /*
229 * On blk-mq, the lower bits of ->gstate (generation number and
230 * state) carry the MQ_RQ_* state value and the upper bits the
231 * generation number which is monotonically incremented and used to
232 * distinguish the reuse instances.
233 *
234 * ->gstate_seq allows updates to ->gstate and other fields
235 * (currently ->deadline) during request start to be read
236 * atomically from the timeout path, so that it can operate on a
237 * coherent set of information.
238 */
239 seqcount_t gstate_seq;
240 u64 gstate;
241
242 /*
243 * ->aborted_gstate is used by the timeout to claim a specific
244 * recycle instance of this request. See blk_mq_timeout_work().
245 */
246 struct u64_stats_sync aborted_gstate_sync;
247 u64 aborted_gstate;
248
249 /* access through blk_rq_set_deadline, blk_rq_deadline */
250 unsigned long __deadline;
230 251
231 unsigned long deadline;
232 struct list_head timeout_list; 252 struct list_head timeout_list;
233 253
254 union {
255 struct __call_single_data csd;
256 u64 fifo_time;
257 };
258
234 /* 259 /*
235 * completion callback. 260 * completion callback.
236 */ 261 */
@@ -239,6 +264,12 @@ struct request {
239 264
240 /* for bidi */ 265 /* for bidi */
241 struct request *next_rq; 266 struct request *next_rq;
267
268#ifdef CONFIG_BLK_CGROUP
269 struct request_list *rl; /* rl this rq is alloced from */
270 unsigned long long start_time_ns;
271 unsigned long long io_start_time_ns; /* when passed to hardware */
272#endif
242}; 273};
243 274
244static inline bool blk_op_is_scsi(unsigned int op) 275static inline bool blk_op_is_scsi(unsigned int op)
@@ -564,6 +595,22 @@ struct request_queue {
564 struct queue_limits limits; 595 struct queue_limits limits;
565 596
566 /* 597 /*
598 * Zoned block device information for request dispatch control.
599 * nr_zones is the total number of zones of the device. This is always
600 * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
601 * bits which indicates if a zone is conventional (bit clear) or
602 * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
603 * bits which indicates if a zone is write locked, that is, if a write
604 * request targeting the zone was dispatched. All three fields are
605 * initialized by the low level device driver (e.g. scsi/sd.c).
606 * Stacking drivers (device mappers) may or may not initialize
607 * these fields.
608 */
609 unsigned int nr_zones;
610 unsigned long *seq_zones_bitmap;
611 unsigned long *seq_zones_wlock;
612
613 /*
567 * sg stuff 614 * sg stuff
568 */ 615 */
569 unsigned int sg_timeout; 616 unsigned int sg_timeout;
@@ -807,6 +854,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
807 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; 854 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
808} 855}
809 856
857static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
858{
859 return q->nr_zones;
860}
861
862static inline unsigned int blk_queue_zone_no(struct request_queue *q,
863 sector_t sector)
864{
865 if (!blk_queue_is_zoned(q))
866 return 0;
867 return sector >> ilog2(q->limits.chunk_sectors);
868}
869
870static inline bool blk_queue_zone_is_seq(struct request_queue *q,
871 sector_t sector)
872{
873 if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
874 return false;
875 return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
876}
877
810static inline bool rq_is_sync(struct request *rq) 878static inline bool rq_is_sync(struct request *rq)
811{ 879{
812 return op_is_sync(rq->cmd_flags); 880 return op_is_sync(rq->cmd_flags);
@@ -1046,6 +1114,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
1046 return blk_rq_cur_bytes(rq) >> 9; 1114 return blk_rq_cur_bytes(rq) >> 9;
1047} 1115}
1048 1116
1117static inline unsigned int blk_rq_zone_no(struct request *rq)
1118{
1119 return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
1120}
1121
1122static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
1123{
1124 return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
1125}
1126
1049/* 1127/*
1050 * Some commands like WRITE SAME have a payload or data transfer size which 1128 * Some commands like WRITE SAME have a payload or data transfer size which
1051 * is different from the size of the request. Any driver that supports such 1129 * is different from the size of the request. Any driver that supports such
@@ -1595,7 +1673,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
1595 1673
1596 if (q) 1674 if (q)
1597 return blk_queue_zone_sectors(q); 1675 return blk_queue_zone_sectors(q);
1676 return 0;
1677}
1678
1679static inline unsigned int bdev_nr_zones(struct block_device *bdev)
1680{
1681 struct request_queue *q = bdev_get_queue(bdev);
1598 1682
1683 if (q)
1684 return blk_queue_nr_zones(q);
1599 return 0; 1685 return 0;
1600} 1686}
1601 1687
@@ -1731,8 +1817,6 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
1731 1817
1732int kblockd_schedule_work(struct work_struct *work); 1818int kblockd_schedule_work(struct work_struct *work);
1733int kblockd_schedule_work_on(int cpu, struct work_struct *work); 1819int kblockd_schedule_work_on(int cpu, struct work_struct *work);
1734int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
1735int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
1736int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); 1820int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
1737 1821
1738#ifdef CONFIG_BLK_CGROUP 1822#ifdef CONFIG_BLK_CGROUP
@@ -1971,6 +2055,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
1971extern int bdev_read_page(struct block_device *, sector_t, struct page *); 2055extern int bdev_read_page(struct block_device *, sector_t, struct page *);
1972extern int bdev_write_page(struct block_device *, sector_t, struct page *, 2056extern int bdev_write_page(struct block_device *, sector_t, struct page *,
1973 struct writeback_control *); 2057 struct writeback_control *);
2058
2059#ifdef CONFIG_BLK_DEV_ZONED
2060bool blk_req_needs_zone_write_lock(struct request *rq);
2061void __blk_req_zone_write_lock(struct request *rq);
2062void __blk_req_zone_write_unlock(struct request *rq);
2063
2064static inline void blk_req_zone_write_lock(struct request *rq)
2065{
2066 if (blk_req_needs_zone_write_lock(rq))
2067 __blk_req_zone_write_lock(rq);
2068}
2069
2070static inline void blk_req_zone_write_unlock(struct request *rq)
2071{
2072 if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
2073 __blk_req_zone_write_unlock(rq);
2074}
2075
2076static inline bool blk_req_zone_is_write_locked(struct request *rq)
2077{
2078 return rq->q->seq_zones_wlock &&
2079 test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
2080}
2081
2082static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
2083{
2084 if (!blk_req_needs_zone_write_lock(rq))
2085 return true;
2086 return !blk_req_zone_is_write_locked(rq);
2087}
2088#else
2089static inline bool blk_req_needs_zone_write_lock(struct request *rq)
2090{
2091 return false;
2092}
2093
2094static inline void blk_req_zone_write_lock(struct request *rq)
2095{
2096}
2097
2098static inline void blk_req_zone_write_unlock(struct request *rq)
2099{
2100}
2101static inline bool blk_req_zone_is_write_locked(struct request *rq)
2102{
2103 return false;
2104}
2105
2106static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
2107{
2108 return true;
2109}
2110#endif /* CONFIG_BLK_DEV_ZONED */
2111
1974#else /* CONFIG_BLOCK */ 2112#else /* CONFIG_BLOCK */
1975 2113
1976struct block_device; 2114struct block_device;
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ec8a4d7af6bd..fe7a22dd133b 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -125,4 +125,13 @@ static inline bool bvec_iter_rewind(const struct bio_vec *bv,
125 ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ 125 ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
126 bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) 126 bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
127 127
128/* for iterating one bio from start to end */
129#define BVEC_ITER_ALL_INIT (struct bvec_iter) \
130{ \
131 .bi_sector = 0, \
132 .bi_size = UINT_MAX, \
133 .bi_idx = 0, \
134 .bi_bvec_done = 0, \
135}
136
128#endif /* __LINUX_BVEC_ITER_H */ 137#endif /* __LINUX_BVEC_ITER_H */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 3d794b3dc532..6d9e230dffd2 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -198,8 +198,6 @@ extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
198extern void elv_requeue_request(struct request_queue *, struct request *); 198extern void elv_requeue_request(struct request_queue *, struct request *);
199extern struct request *elv_former_request(struct request_queue *, struct request *); 199extern struct request *elv_former_request(struct request_queue *, struct request *);
200extern struct request *elv_latter_request(struct request_queue *, struct request *); 200extern struct request *elv_latter_request(struct request_queue *, struct request *);
201extern int elv_register_queue(struct request_queue *q);
202extern void elv_unregister_queue(struct request_queue *q);
203extern int elv_may_queue(struct request_queue *, unsigned int); 201extern int elv_may_queue(struct request_queue *, unsigned int);
204extern void elv_completed_request(struct request_queue *, struct request *); 202extern void elv_completed_request(struct request_queue *, struct request *);
205extern int elv_set_request(struct request_queue *q, struct request *rq, 203extern int elv_set_request(struct request_queue *q, struct request *rq,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5144ebe046c9..5e3531027b51 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -395,6 +395,11 @@ static inline void add_disk(struct gendisk *disk)
395{ 395{
396 device_add_disk(NULL, disk); 396 device_add_disk(NULL, disk);
397} 397}
398extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk);
399static inline void add_disk_no_queue_reg(struct gendisk *disk)
400{
401 device_add_disk_no_queue_reg(NULL, disk);
402}
398 403
399extern void del_gendisk(struct gendisk *gp); 404extern void del_gendisk(struct gendisk *gp);
400extern struct gendisk *get_gendisk(dev_t dev, int *partno); 405extern struct gendisk *get_gendisk(dev_t dev, int *partno);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2d1d9de06728..7f4b60abdf27 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -50,10 +50,7 @@ struct nvm_id;
50struct nvm_dev; 50struct nvm_dev;
51struct nvm_tgt_dev; 51struct nvm_tgt_dev;
52 52
53typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
54typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); 53typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
55typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
56 nvm_l2p_update_fn *, void *);
57typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); 54typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
58typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); 55typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
59typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); 56typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
@@ -66,7 +63,6 @@ typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
66 63
67struct nvm_dev_ops { 64struct nvm_dev_ops {
68 nvm_id_fn *identity; 65 nvm_id_fn *identity;
69 nvm_get_l2p_tbl_fn *get_l2p_tbl;
70 nvm_op_bb_tbl_fn *get_bb_tbl; 66 nvm_op_bb_tbl_fn *get_bb_tbl;
71 nvm_op_set_bb_fn *set_bb_tbl; 67 nvm_op_set_bb_fn *set_bb_tbl;
72 68
@@ -112,8 +108,6 @@ enum {
112 NVM_RSP_WARN_HIGHECC = 0x4700, 108 NVM_RSP_WARN_HIGHECC = 0x4700,
113 109
114 /* Device opcodes */ 110 /* Device opcodes */
115 NVM_OP_HBREAD = 0x02,
116 NVM_OP_HBWRITE = 0x81,
117 NVM_OP_PWRITE = 0x91, 111 NVM_OP_PWRITE = 0x91,
118 NVM_OP_PREAD = 0x92, 112 NVM_OP_PREAD = 0x92,
119 NVM_OP_ERASE = 0x90, 113 NVM_OP_ERASE = 0x90,
@@ -165,12 +159,16 @@ struct nvm_id_group {
165 u8 fmtype; 159 u8 fmtype;
166 u8 num_ch; 160 u8 num_ch;
167 u8 num_lun; 161 u8 num_lun;
168 u8 num_pln; 162 u16 num_chk;
169 u16 num_blk; 163 u16 clba;
170 u16 num_pg;
171 u16 fpg_sz;
172 u16 csecs; 164 u16 csecs;
173 u16 sos; 165 u16 sos;
166
167 u16 ws_min;
168 u16 ws_opt;
169 u16 ws_seq;
170 u16 ws_per_chk;
171
174 u32 trdt; 172 u32 trdt;
175 u32 trdm; 173 u32 trdm;
176 u32 tprt; 174 u32 tprt;
@@ -181,7 +179,10 @@ struct nvm_id_group {
181 u32 mccap; 179 u32 mccap;
182 u16 cpar; 180 u16 cpar;
183 181
184 struct nvm_id_lp_tbl lptbl; 182 /* 1.2 compatibility */
183 u8 num_pln;
184 u16 num_pg;
185 u16 fpg_sz;
185}; 186};
186 187
187struct nvm_addr_format { 188struct nvm_addr_format {
@@ -217,6 +218,10 @@ struct nvm_target {
217 218
218#define ADDR_EMPTY (~0ULL) 219#define ADDR_EMPTY (~0ULL)
219 220
221#define NVM_TARGET_DEFAULT_OP (101)
222#define NVM_TARGET_MIN_OP (3)
223#define NVM_TARGET_MAX_OP (80)
224
220#define NVM_VERSION_MAJOR 1 225#define NVM_VERSION_MAJOR 1
221#define NVM_VERSION_MINOR 0 226#define NVM_VERSION_MINOR 0
222#define NVM_VERSION_PATCH 0 227#define NVM_VERSION_PATCH 0
@@ -239,7 +244,6 @@ struct nvm_rq {
239 void *meta_list; 244 void *meta_list;
240 dma_addr_t dma_meta_list; 245 dma_addr_t dma_meta_list;
241 246
242 struct completion *wait;
243 nvm_end_io_fn *end_io; 247 nvm_end_io_fn *end_io;
244 248
245 uint8_t opcode; 249 uint8_t opcode;
@@ -268,31 +272,38 @@ enum {
268 NVM_BLK_ST_BAD = 0x8, /* Bad block */ 272 NVM_BLK_ST_BAD = 0x8, /* Bad block */
269}; 273};
270 274
275
271/* Device generic information */ 276/* Device generic information */
272struct nvm_geo { 277struct nvm_geo {
278 /* generic geometry */
273 int nr_chnls; 279 int nr_chnls;
274 int nr_luns; 280 int all_luns; /* across channels */
275 int luns_per_chnl; /* -1 if channels are not symmetric */ 281 int nr_luns; /* per channel */
276 int nr_planes; 282 int nr_chks; /* per lun */
277 int sec_per_pg; /* only sectors for a single page */ 283
278 int pgs_per_blk;
279 int blks_per_lun;
280 int fpg_size;
281 int pfpg_size; /* size of buffer if all pages are to be read */
282 int sec_size; 284 int sec_size;
283 int oob_size; 285 int oob_size;
284 int mccap; 286 int mccap;
285 struct nvm_addr_format ppaf;
286 287
287 /* Calculated/Cached values. These do not reflect the actual usable 288 int sec_per_chk;
288 * blocks at run-time. 289 int sec_per_lun;
289 */ 290
291 int ws_min;
292 int ws_opt;
293 int ws_seq;
294 int ws_per_chk;
295
290 int max_rq_size; 296 int max_rq_size;
291 int plane_mode; /* drive device in single, double or quad mode */
292 297
298 int op;
299
300 struct nvm_addr_format ppaf;
301
302 /* Legacy 1.2 specific geometry */
303 int plane_mode; /* drive device in single, double or quad mode */
304 int nr_planes;
305 int sec_per_pg; /* only sectors for a single page */
293 int sec_per_pl; /* all sectors across planes */ 306 int sec_per_pl; /* all sectors across planes */
294 int sec_per_blk;
295 int sec_per_lun;
296}; 307};
297 308
298/* sub-device structure */ 309/* sub-device structure */
@@ -320,10 +331,6 @@ struct nvm_dev {
320 /* Device information */ 331 /* Device information */
321 struct nvm_geo geo; 332 struct nvm_geo geo;
322 333
323 /* lower page table */
324 int lps_per_blk;
325 int *lptbl;
326
327 unsigned long total_secs; 334 unsigned long total_secs;
328 335
329 unsigned long *lun_map; 336 unsigned long *lun_map;
@@ -346,36 +353,6 @@ struct nvm_dev {
346 struct list_head targets; 353 struct list_head targets;
347}; 354};
348 355
349static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
350 u64 pba)
351{
352 struct ppa_addr l;
353 int secs, pgs, blks, luns;
354 sector_t ppa = pba;
355
356 l.ppa = 0;
357
358 div_u64_rem(ppa, geo->sec_per_pg, &secs);
359 l.g.sec = secs;
360
361 sector_div(ppa, geo->sec_per_pg);
362 div_u64_rem(ppa, geo->pgs_per_blk, &pgs);
363 l.g.pg = pgs;
364
365 sector_div(ppa, geo->pgs_per_blk);
366 div_u64_rem(ppa, geo->blks_per_lun, &blks);
367 l.g.blk = blks;
368
369 sector_div(ppa, geo->blks_per_lun);
370 div_u64_rem(ppa, geo->luns_per_chnl, &luns);
371 l.g.lun = luns;
372
373 sector_div(ppa, geo->luns_per_chnl);
374 l.g.ch = ppa;
375
376 return l;
377}
378
379static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, 356static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev,
380 struct ppa_addr r) 357 struct ppa_addr r)
381{ 358{
@@ -418,25 +395,6 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev,
418 return l; 395 return l;
419} 396}
420 397
421static inline int ppa_empty(struct ppa_addr ppa_addr)
422{
423 return (ppa_addr.ppa == ADDR_EMPTY);
424}
425
426static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
427{
428 ppa_addr->ppa = ADDR_EMPTY;
429}
430
431static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
432{
433 if (ppa_empty(ppa1) || ppa_empty(ppa2))
434 return 0;
435
436 return ((ppa1.g.ch == ppa2.g.ch) && (ppa1.g.lun == ppa2.g.lun) &&
437 (ppa1.g.blk == ppa2.g.blk));
438}
439
440typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); 398typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
441typedef sector_t (nvm_tgt_capacity_fn)(void *); 399typedef sector_t (nvm_tgt_capacity_fn)(void *);
442typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, 400typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
@@ -481,17 +439,10 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
481extern int nvm_max_phys_sects(struct nvm_tgt_dev *); 439extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
482extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); 440extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
483extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); 441extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
484extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int);
485extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
486 void *);
487extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
488extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
489extern void nvm_end_io(struct nvm_rq *); 442extern void nvm_end_io(struct nvm_rq *);
490extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); 443extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
491extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); 444extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
492 445
493extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int);
494
495#else /* CONFIG_NVM */ 446#else /* CONFIG_NVM */
496struct nvm_dev_ops; 447struct nvm_dev_ops;
497 448
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index aea87f0d917b..4112e2bd747f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -124,14 +124,20 @@ enum {
124 124
125#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) 125#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
126#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) 126#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
127#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff) 127
128#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf) 128enum {
129 129 NVME_CMBSZ_SQS = 1 << 0,
130#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10) 130 NVME_CMBSZ_CQS = 1 << 1,
131#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8) 131 NVME_CMBSZ_LISTS = 1 << 2,
132#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4) 132 NVME_CMBSZ_RDS = 1 << 3,
133#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2) 133 NVME_CMBSZ_WDS = 1 << 4,
134#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1) 134
135 NVME_CMBSZ_SZ_SHIFT = 12,
136 NVME_CMBSZ_SZ_MASK = 0xfffff,
137
138 NVME_CMBSZ_SZU_SHIFT = 8,
139 NVME_CMBSZ_SZU_MASK = 0xf,
140};
135 141
136/* 142/*
137 * Submission and Completion Queue Entry Sizes for the NVM command set. 143 * Submission and Completion Queue Entry Sizes for the NVM command set.
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index b7c83254c566..22b2131bcdcd 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -276,6 +276,17 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
276 unsigned int n_pages, unsigned int offset, 276 unsigned int n_pages, unsigned int offset,
277 unsigned long size, gfp_t gfp_mask); 277 unsigned long size, gfp_t gfp_mask);
278 278
279#ifdef CONFIG_SGL_ALLOC
280struct scatterlist *sgl_alloc_order(unsigned long long length,
281 unsigned int order, bool chainable,
282 gfp_t gfp, unsigned int *nent_p);
283struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
284 unsigned int *nent_p);
285void sgl_free_n_order(struct scatterlist *sgl, int nents, int order);
286void sgl_free_order(struct scatterlist *sgl, int order);
287void sgl_free(struct scatterlist *sgl);
288#endif /* CONFIG_SGL_ALLOC */
289
279size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, 290size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
280 size_t buflen, off_t skip, bool to_buffer); 291 size_t buflen, off_t skip, bool to_buffer);
281 292