aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 13:23:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 13:23:25 -0400
commit73ba2fb33c492916853dfe63e3b3163da0be661d (patch)
treec2fda8ca1273744d2e884d24189a15ac1a7d63c2 /include
parent958f338e96f874a0d29442396d6adf9c1e17aa2d (diff)
parentb86d865cb1cae1e61527ea0b8977078bbf694328 (diff)
Merge tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "First pull request for this merge window, there will also be a followup request with some stragglers. This pull request contains: - Fix for a thundering heard issue in the wbt block code (Anchal Agarwal) - A few NVMe pull requests: * Improved tracepoints (Keith) * Larger inline data support for RDMA (Steve Wise) * RDMA setup/teardown fixes (Sagi) * Effects log suppor for NVMe target (Chaitanya Kulkarni) * Buffered IO suppor for NVMe target (Chaitanya Kulkarni) * TP4004 (ANA) support (Christoph) * Various NVMe fixes - Block io-latency controller support. Much needed support for properly containing block devices. (Josef) - Series improving how we handle sense information on the stack (Kees) - Lightnvm fixes and updates/improvements (Mathias/Javier et al) - Zoned device support for null_blk (Matias) - AIX partition fixes (Mauricio Faria de Oliveira) - DIF checksum code made generic (Max Gurtovoy) - Add support for discard in iostats (Michael Callahan / Tejun) - Set of updates for BFQ (Paolo) - Removal of async write support for bsg (Christoph) - Bio page dirtying and clone fixups (Christoph) - Set of bcache fix/changes (via Coly) - Series improving blk-mq queue setup/teardown speed (Ming) - Series improving merging performance on blk-mq (Ming) - Lots of other fixes and cleanups from a slew of folks" * tag 'for-4.19/block-20180812' of git://git.kernel.dk/linux-block: (190 commits) blkcg: Make blkg_root_lookup() work for queues in bypass mode bcache: fix error setting writeback_rate through sysfs interface null_blk: add lock drop/acquire annotation Blk-throttle: reduce tail io latency when iops limit is enforced block: paride: pd: mark expected switch fall-throughs block: Ensure that a request queue is dissociated from the cgroup controller block: Introduce blk_exit_queue() blkcg: Introduce blkg_root_lookup() block: Remove two superfluous #include directives blk-mq: count the hctx as active before allocating tag block: bvec_nr_vecs() returns value for wrong slab bcache: trivial - remove tailing backslash in macro BTREE_FLAG bcache: make the pr_err statement used for ENOENT only in sysfs_attatch section bcache: set max writeback rate when I/O request is idle bcache: add code comments for bset.c bcache: fix mistaken comments in request.c bcache: fix mistaken code comments in bcache.h bcache: add a comment in super.c bcache: avoid unncessary cache prefetch bch_btree_node_get() bcache: display rate debug parameters to 0 when writeback is not running ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h19
-rw-r--r--include/linux/blk-cgroup.h146
-rw-r--r--include/linux/blk-mq.h4
-rw-r--r--include/linux/blk_types.h27
-rw-r--r--include/linux/blkdev.h66
-rw-r--r--include/linux/cdrom.h3
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--include/linux/genhd.h14
-rw-r--r--include/linux/memcontrol.h13
-rw-r--r--include/linux/nvme.h72
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/swap.h11
-rw-r--r--include/linux/t10-pi.h24
-rw-r--r--include/linux/tracehook.h2
-rw-r--r--include/scsi/scsi_cmnd.h13
-rw-r--r--include/scsi/scsi_device.h14
-rw-r--r--include/uapi/linux/bcache.h4
-rw-r--r--include/uapi/linux/blkzoned.h2
18 files changed, 380 insertions, 65 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f08f5fe7bd08..51371740d2a8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -429,7 +429,6 @@ extern void bio_put(struct bio *);
429 429
430extern void __bio_clone_fast(struct bio *, struct bio *); 430extern void __bio_clone_fast(struct bio *, struct bio *);
431extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); 431extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
432extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
433 432
434extern struct bio_set fs_bio_set; 433extern struct bio_set fs_bio_set;
435 434
@@ -443,12 +442,6 @@ static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
443 return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); 442 return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
444} 443}
445 444
446static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
447{
448 return bio_clone_bioset(bio, gfp_mask, NULL);
449
450}
451
452extern blk_qc_t submit_bio(struct bio *); 445extern blk_qc_t submit_bio(struct bio *);
453 446
454extern void bio_endio(struct bio *); 447extern void bio_endio(struct bio *);
@@ -496,9 +489,9 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
496extern void bio_set_pages_dirty(struct bio *bio); 489extern void bio_set_pages_dirty(struct bio *bio);
497extern void bio_check_pages_dirty(struct bio *bio); 490extern void bio_check_pages_dirty(struct bio *bio);
498 491
499void generic_start_io_acct(struct request_queue *q, int rw, 492void generic_start_io_acct(struct request_queue *q, int op,
500 unsigned long sectors, struct hd_struct *part); 493 unsigned long sectors, struct hd_struct *part);
501void generic_end_io_acct(struct request_queue *q, int rw, 494void generic_end_io_acct(struct request_queue *q, int op,
502 struct hd_struct *part, 495 struct hd_struct *part,
503 unsigned long start_time); 496 unsigned long start_time);
504 497
@@ -553,8 +546,16 @@ do { \
553#define bio_dev(bio) \ 546#define bio_dev(bio) \
554 disk_devt((bio)->bi_disk) 547 disk_devt((bio)->bi_disk)
555 548
549#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
550int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
551#else
552static inline int bio_associate_blkcg_from_page(struct bio *bio,
553 struct page *page) { return 0; }
554#endif
555
556#ifdef CONFIG_BLK_CGROUP 556#ifdef CONFIG_BLK_CGROUP
557int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); 557int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
558int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
558void bio_disassociate_task(struct bio *bio); 559void bio_disassociate_task(struct bio *bio);
559void bio_clone_blkcg_association(struct bio *dst, struct bio *src); 560void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
560#else /* CONFIG_BLK_CGROUP */ 561#else /* CONFIG_BLK_CGROUP */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6c666fd7de3c..34aec30e06c7 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -35,6 +35,7 @@ enum blkg_rwstat_type {
35 BLKG_RWSTAT_WRITE, 35 BLKG_RWSTAT_WRITE,
36 BLKG_RWSTAT_SYNC, 36 BLKG_RWSTAT_SYNC,
37 BLKG_RWSTAT_ASYNC, 37 BLKG_RWSTAT_ASYNC,
38 BLKG_RWSTAT_DISCARD,
38 39
39 BLKG_RWSTAT_NR, 40 BLKG_RWSTAT_NR,
40 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, 41 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
@@ -136,6 +137,12 @@ struct blkcg_gq {
136 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 137 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
137 138
138 struct rcu_head rcu_head; 139 struct rcu_head rcu_head;
140
141 atomic_t use_delay;
142 atomic64_t delay_nsec;
143 atomic64_t delay_start;
144 u64 last_delay;
145 int last_use;
139}; 146};
140 147
141typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); 148typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -148,6 +155,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
148typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); 155typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
149typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); 156typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
150typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); 157typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
158typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
159 size_t size);
151 160
152struct blkcg_policy { 161struct blkcg_policy {
153 int plid; 162 int plid;
@@ -167,6 +176,7 @@ struct blkcg_policy {
167 blkcg_pol_offline_pd_fn *pd_offline_fn; 176 blkcg_pol_offline_pd_fn *pd_offline_fn;
168 blkcg_pol_free_pd_fn *pd_free_fn; 177 blkcg_pol_free_pd_fn *pd_free_fn;
169 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 178 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
179 blkcg_pol_stat_pd_fn *pd_stat_fn;
170}; 180};
171 181
172extern struct blkcg blkcg_root; 182extern struct blkcg blkcg_root;
@@ -238,6 +248,42 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
238 return css_to_blkcg(task_css(current, io_cgrp_id)); 248 return css_to_blkcg(task_css(current, io_cgrp_id));
239} 249}
240 250
251static inline bool blk_cgroup_congested(void)
252{
253 struct cgroup_subsys_state *css;
254 bool ret = false;
255
256 rcu_read_lock();
257 css = kthread_blkcg();
258 if (!css)
259 css = task_css(current, io_cgrp_id);
260 while (css) {
261 if (atomic_read(&css->cgroup->congestion_count)) {
262 ret = true;
263 break;
264 }
265 css = css->parent;
266 }
267 rcu_read_unlock();
268 return ret;
269}
270
271/**
272 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
273 * @return: true if this bio needs to be submitted with the root blkg context.
274 *
275 * In order to avoid priority inversions we sometimes need to issue a bio as if
276 * it were attached to the root blkg, and then backcharge to the actual owning
277 * blkg. The idea is we do bio_blkcg() to look up the actual context for the
278 * bio and attach the appropriate blkg to the bio. Then we call this helper and
279 * if it is true run with the root blkg for that queue and then do any
280 * backcharging to the originating cgroup once the io is complete.
281 */
282static inline bool bio_issue_as_root_blkg(struct bio *bio)
283{
284 return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
285}
286
241/** 287/**
242 * blkcg_parent - get the parent of a blkcg 288 * blkcg_parent - get the parent of a blkcg
243 * @blkcg: blkcg of interest 289 * @blkcg: blkcg of interest
@@ -296,6 +342,17 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
296} 342}
297 343
298/** 344/**
345 * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
346 * @q: request_queue of interest
347 *
348 * Lookup blkg for @q at the root level. See also blkg_lookup().
349 */
350static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
351{
352 return q->root_blkg;
353}
354
355/**
299 * blkg_to_pdata - get policy private data 356 * blkg_to_pdata - get policy private data
300 * @blkg: blkg of interest 357 * @blkg: blkg of interest
301 * @pol: policy of interest 358 * @pol: policy of interest
@@ -355,6 +412,21 @@ static inline void blkg_get(struct blkcg_gq *blkg)
355 atomic_inc(&blkg->refcnt); 412 atomic_inc(&blkg->refcnt);
356} 413}
357 414
415/**
416 * blkg_try_get - try and get a blkg reference
417 * @blkg: blkg to get
418 *
419 * This is for use when doing an RCU lookup of the blkg. We may be in the midst
420 * of freeing this blkg, so we can only use it if the refcnt is not zero.
421 */
422static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
423{
424 if (atomic_inc_not_zero(&blkg->refcnt))
425 return blkg;
426 return NULL;
427}
428
429
358void __blkg_release_rcu(struct rcu_head *rcu); 430void __blkg_release_rcu(struct rcu_head *rcu);
359 431
360/** 432/**
@@ -589,7 +661,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
589{ 661{
590 struct percpu_counter *cnt; 662 struct percpu_counter *cnt;
591 663
592 if (op_is_write(op)) 664 if (op_is_discard(op))
665 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
666 else if (op_is_write(op))
593 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; 667 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
594 else 668 else
595 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; 669 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
@@ -706,8 +780,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
706 780
707 if (!throtl) { 781 if (!throtl) {
708 blkg = blkg ?: q->root_blkg; 782 blkg = blkg ?: q->root_blkg;
709 blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, 783 /*
710 bio->bi_iter.bi_size); 784 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
785 * is a split bio and we would have already accounted for the
786 * size of the bio.
787 */
788 if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
789 blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
790 bio->bi_iter.bi_size);
711 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); 791 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
712 } 792 }
713 793
@@ -715,6 +795,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
715 return !throtl; 795 return !throtl;
716} 796}
717 797
798static inline void blkcg_use_delay(struct blkcg_gq *blkg)
799{
800 if (atomic_add_return(1, &blkg->use_delay) == 1)
801 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
802}
803
804static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
805{
806 int old = atomic_read(&blkg->use_delay);
807
808 if (old == 0)
809 return 0;
810
811 /*
812 * We do this song and dance because we can race with somebody else
813 * adding or removing delay. If we just did an atomic_dec we'd end up
814 * negative and we'd already be in trouble. We need to subtract 1 and
815 * then check to see if we were the last delay so we can drop the
816 * congestion count on the cgroup.
817 */
818 while (old) {
819 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
820 if (cur == old)
821 break;
822 old = cur;
823 }
824
825 if (old == 0)
826 return 0;
827 if (old == 1)
828 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
829 return 1;
830}
831
832static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
833{
834 int old = atomic_read(&blkg->use_delay);
835 if (!old)
836 return;
837 /* We only want 1 person clearing the congestion count for this blkg. */
838 while (old) {
839 int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
840 if (cur == old) {
841 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
842 break;
843 }
844 old = cur;
845 }
846}
847
848void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
849void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
850void blkcg_maybe_throttle_current(void);
718#else /* CONFIG_BLK_CGROUP */ 851#else /* CONFIG_BLK_CGROUP */
719 852
720struct blkcg { 853struct blkcg {
@@ -734,9 +867,16 @@ struct blkcg_policy {
734 867
735#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) 868#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
736 869
870static inline void blkcg_maybe_throttle_current(void) { }
871static inline bool blk_cgroup_congested(void) { return false; }
872
737#ifdef CONFIG_BLOCK 873#ifdef CONFIG_BLOCK
738 874
875static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
876
739static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } 877static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
878static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
879{ return NULL; }
740static inline int blkcg_init_queue(struct request_queue *q) { return 0; } 880static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
741static inline void blkcg_drain_queue(struct request_queue *q) { } 881static inline void blkcg_drain_queue(struct request_queue *q) { }
742static inline void blkcg_exit_queue(struct request_queue *q) { } 882static inline void blkcg_exit_queue(struct request_queue *q) { }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ca3f2c2edd85..1da59c16f637 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -35,10 +35,12 @@ struct blk_mq_hw_ctx {
35 struct sbitmap ctx_map; 35 struct sbitmap ctx_map;
36 36
37 struct blk_mq_ctx *dispatch_from; 37 struct blk_mq_ctx *dispatch_from;
38 unsigned int dispatch_busy;
38 39
39 struct blk_mq_ctx **ctxs;
40 unsigned int nr_ctx; 40 unsigned int nr_ctx;
41 struct blk_mq_ctx **ctxs;
41 42
43 spinlock_t dispatch_wait_lock;
42 wait_queue_entry_t dispatch_wait; 44 wait_queue_entry_t dispatch_wait;
43 atomic_t wait_index; 45 atomic_t wait_index;
44 46
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3c4f390aea4b..f6dfb30737d8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -179,11 +179,9 @@ struct bio {
179 */ 179 */
180 struct io_context *bi_ioc; 180 struct io_context *bi_ioc;
181 struct cgroup_subsys_state *bi_css; 181 struct cgroup_subsys_state *bi_css;
182#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 182 struct blkcg_gq *bi_blkg;
183 void *bi_cg_private;
184 struct bio_issue bi_issue; 183 struct bio_issue bi_issue;
185#endif 184#endif
186#endif
187 union { 185 union {
188#if defined(CONFIG_BLK_DEV_INTEGRITY) 186#if defined(CONFIG_BLK_DEV_INTEGRITY)
189 struct bio_integrity_payload *bi_integrity; /* data integrity */ 187 struct bio_integrity_payload *bi_integrity; /* data integrity */
@@ -329,7 +327,7 @@ enum req_flag_bits {
329 327
330 /* for driver use */ 328 /* for driver use */
331 __REQ_DRV, 329 __REQ_DRV,
332 330 __REQ_SWAP, /* swapping request. */
333 __REQ_NR_BITS, /* stops here */ 331 __REQ_NR_BITS, /* stops here */
334}; 332};
335 333
@@ -351,6 +349,7 @@ enum req_flag_bits {
351#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) 349#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
352 350
353#define REQ_DRV (1ULL << __REQ_DRV) 351#define REQ_DRV (1ULL << __REQ_DRV)
352#define REQ_SWAP (1ULL << __REQ_SWAP)
354 353
355#define REQ_FAILFAST_MASK \ 354#define REQ_FAILFAST_MASK \
356 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 355 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@ -358,6 +357,14 @@ enum req_flag_bits {
358#define REQ_NOMERGE_FLAGS \ 357#define REQ_NOMERGE_FLAGS \
359 (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) 358 (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
360 359
360enum stat_group {
361 STAT_READ,
362 STAT_WRITE,
363 STAT_DISCARD,
364
365 NR_STAT_GROUPS
366};
367
361#define bio_op(bio) \ 368#define bio_op(bio) \
362 ((bio)->bi_opf & REQ_OP_MASK) 369 ((bio)->bi_opf & REQ_OP_MASK)
363#define req_op(req) \ 370#define req_op(req) \
@@ -395,6 +402,18 @@ static inline bool op_is_sync(unsigned int op)
395 (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); 402 (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
396} 403}
397 404
405static inline bool op_is_discard(unsigned int op)
406{
407 return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
408}
409
410static inline int op_stat_group(unsigned int op)
411{
412 if (op_is_discard(op))
413 return STAT_DISCARD;
414 return op_is_write(op);
415}
416
398typedef unsigned int blk_qc_t; 417typedef unsigned int blk_qc_t;
399#define BLK_QC_T_NONE -1U 418#define BLK_QC_T_NONE -1U
400#define BLK_QC_T_SHIFT 16 419#define BLK_QC_T_SHIFT 16
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79226ca8f80f..d6869e0e2b64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,8 +27,6 @@
27#include <linux/percpu-refcount.h> 27#include <linux/percpu-refcount.h>
28#include <linux/scatterlist.h> 28#include <linux/scatterlist.h>
29#include <linux/blkzoned.h> 29#include <linux/blkzoned.h>
30#include <linux/seqlock.h>
31#include <linux/u64_stats_sync.h>
32 30
33struct module; 31struct module;
34struct scsi_ioctl_command; 32struct scsi_ioctl_command;
@@ -42,7 +40,7 @@ struct bsg_job;
42struct blkcg_gq; 40struct blkcg_gq;
43struct blk_flush_queue; 41struct blk_flush_queue;
44struct pr_ops; 42struct pr_ops;
45struct rq_wb; 43struct rq_qos;
46struct blk_queue_stats; 44struct blk_queue_stats;
47struct blk_stat_callback; 45struct blk_stat_callback;
48 46
@@ -442,10 +440,8 @@ struct request_queue {
442 int nr_rqs[2]; /* # allocated [a]sync rqs */ 440 int nr_rqs[2]; /* # allocated [a]sync rqs */
443 int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ 441 int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
444 442
445 atomic_t shared_hctx_restart;
446
447 struct blk_queue_stats *stats; 443 struct blk_queue_stats *stats;
448 struct rq_wb *rq_wb; 444 struct rq_qos *rq_qos;
449 445
450 /* 446 /*
451 * If blkcg is not used, @q->root_rl serves all requests. If blkcg 447 * If blkcg is not used, @q->root_rl serves all requests. If blkcg
@@ -592,6 +588,7 @@ struct request_queue {
592 588
593 struct queue_limits limits; 589 struct queue_limits limits;
594 590
591#ifdef CONFIG_BLK_DEV_ZONED
595 /* 592 /*
596 * Zoned block device information for request dispatch control. 593 * Zoned block device information for request dispatch control.
597 * nr_zones is the total number of zones of the device. This is always 594 * nr_zones is the total number of zones of the device. This is always
@@ -612,6 +609,7 @@ struct request_queue {
612 unsigned int nr_zones; 609 unsigned int nr_zones;
613 unsigned long *seq_zones_bitmap; 610 unsigned long *seq_zones_bitmap;
614 unsigned long *seq_zones_wlock; 611 unsigned long *seq_zones_wlock;
612#endif /* CONFIG_BLK_DEV_ZONED */
615 613
616 /* 614 /*
617 * sg stuff 615 * sg stuff
@@ -800,11 +798,7 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
800 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; 798 return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
801} 799}
802 800
803static inline unsigned int blk_queue_nr_zones(struct request_queue *q) 801#ifdef CONFIG_BLK_DEV_ZONED
804{
805 return q->nr_zones;
806}
807
808static inline unsigned int blk_queue_zone_no(struct request_queue *q, 802static inline unsigned int blk_queue_zone_no(struct request_queue *q,
809 sector_t sector) 803 sector_t sector)
810{ 804{
@@ -820,6 +814,7 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
820 return false; 814 return false;
821 return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); 815 return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
822} 816}
817#endif /* CONFIG_BLK_DEV_ZONED */
823 818
824static inline bool rq_is_sync(struct request *rq) 819static inline bool rq_is_sync(struct request *rq)
825{ 820{
@@ -1070,6 +1065,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
1070 return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; 1065 return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
1071} 1066}
1072 1067
1068#ifdef CONFIG_BLK_DEV_ZONED
1073static inline unsigned int blk_rq_zone_no(struct request *rq) 1069static inline unsigned int blk_rq_zone_no(struct request *rq)
1074{ 1070{
1075 return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); 1071 return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
@@ -1079,6 +1075,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
1079{ 1075{
1080 return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); 1076 return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
1081} 1077}
1078#endif /* CONFIG_BLK_DEV_ZONED */
1082 1079
1083/* 1080/*
1084 * Some commands like WRITE SAME have a payload or data transfer size which 1081 * Some commands like WRITE SAME have a payload or data transfer size which
@@ -1437,8 +1434,6 @@ enum blk_default_limits {
1437 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, 1434 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
1438}; 1435};
1439 1436
1440#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
1441
1442static inline unsigned long queue_segment_boundary(struct request_queue *q) 1437static inline unsigned long queue_segment_boundary(struct request_queue *q)
1443{ 1438{
1444 return q->limits.seg_boundary_mask; 1439 return q->limits.seg_boundary_mask;
@@ -1639,15 +1634,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
1639 return 0; 1634 return 0;
1640} 1635}
1641 1636
1642static inline unsigned int bdev_nr_zones(struct block_device *bdev)
1643{
1644 struct request_queue *q = bdev_get_queue(bdev);
1645
1646 if (q)
1647 return blk_queue_nr_zones(q);
1648 return 0;
1649}
1650
1651static inline int queue_dma_alignment(struct request_queue *q) 1637static inline int queue_dma_alignment(struct request_queue *q)
1652{ 1638{
1653 return q ? q->dma_alignment : 511; 1639 return q ? q->dma_alignment : 511;
@@ -1877,6 +1863,28 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
1877 bip_next->bip_vec[0].bv_offset); 1863 bip_next->bip_vec[0].bv_offset);
1878} 1864}
1879 1865
1866/**
1867 * bio_integrity_intervals - Return number of integrity intervals for a bio
1868 * @bi: blk_integrity profile for device
1869 * @sectors: Size of the bio in 512-byte sectors
1870 *
1871 * Description: The block layer calculates everything in 512 byte
1872 * sectors but integrity metadata is done in terms of the data integrity
1873 * interval size of the storage device. Convert the block layer sectors
1874 * to the appropriate number of integrity intervals.
1875 */
1876static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
1877 unsigned int sectors)
1878{
1879 return sectors >> (bi->interval_exp - 9);
1880}
1881
1882static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
1883 unsigned int sectors)
1884{
1885 return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
1886}
1887
1880#else /* CONFIG_BLK_DEV_INTEGRITY */ 1888#else /* CONFIG_BLK_DEV_INTEGRITY */
1881 1889
1882struct bio; 1890struct bio;
@@ -1950,12 +1958,24 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
1950 return false; 1958 return false;
1951} 1959}
1952 1960
1961static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
1962 unsigned int sectors)
1963{
1964 return 0;
1965}
1966
1967static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
1968 unsigned int sectors)
1969{
1970 return 0;
1971}
1972
1953#endif /* CONFIG_BLK_DEV_INTEGRITY */ 1973#endif /* CONFIG_BLK_DEV_INTEGRITY */
1954 1974
1955struct block_device_operations { 1975struct block_device_operations {
1956 int (*open) (struct block_device *, fmode_t); 1976 int (*open) (struct block_device *, fmode_t);
1957 void (*release) (struct gendisk *, fmode_t); 1977 void (*release) (struct gendisk *, fmode_t);
1958 int (*rw_page)(struct block_device *, sector_t, struct page *, bool); 1978 int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
1959 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1979 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1960 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); 1980 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
1961 unsigned int (*check_events) (struct gendisk *disk, 1981 unsigned int (*check_events) (struct gendisk *disk,
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index e75dfd1f1dec..528271c60018 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -13,6 +13,7 @@
13 13
14#include <linux/fs.h> /* not really needed, later.. */ 14#include <linux/fs.h> /* not really needed, later.. */
15#include <linux/list.h> 15#include <linux/list.h>
16#include <scsi/scsi_common.h>
16#include <uapi/linux/cdrom.h> 17#include <uapi/linux/cdrom.h>
17 18
18struct packet_command 19struct packet_command
@@ -21,7 +22,7 @@ struct packet_command
21 unsigned char *buffer; 22 unsigned char *buffer;
22 unsigned int buflen; 23 unsigned int buflen;
23 int stat; 24 int stat;
24 struct request_sense *sense; 25 struct scsi_sense_hdr *sshdr;
25 unsigned char data_direction; 26 unsigned char data_direction;
26 int quiet; 27 int quiet;
27 int timeout; 28 int timeout;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c0e68f903011..ff20b677fb9f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -438,6 +438,9 @@ struct cgroup {
438 /* used to store eBPF programs */ 438 /* used to store eBPF programs */
439 struct cgroup_bpf bpf; 439 struct cgroup_bpf bpf;
440 440
441 /* If there is block congestion on this cgroup. */
442 atomic_t congestion_count;
443
441 /* ids of the ancestors at each level including self */ 444 /* ids of the ancestors at each level including self */
442 int ancestor_ids[]; 445 int ancestor_ids[];
443}; 446};
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6cb8a5789668..57864422a2c8 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -16,6 +16,7 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/percpu-refcount.h> 17#include <linux/percpu-refcount.h>
18#include <linux/uuid.h> 18#include <linux/uuid.h>
19#include <linux/blk_types.h>
19 20
20#ifdef CONFIG_BLOCK 21#ifdef CONFIG_BLOCK
21 22
@@ -82,10 +83,10 @@ struct partition {
82} __attribute__((packed)); 83} __attribute__((packed));
83 84
84struct disk_stats { 85struct disk_stats {
85 unsigned long sectors[2]; /* READs and WRITEs */ 86 unsigned long sectors[NR_STAT_GROUPS];
86 unsigned long ios[2]; 87 unsigned long ios[NR_STAT_GROUPS];
87 unsigned long merges[2]; 88 unsigned long merges[NR_STAT_GROUPS];
88 unsigned long ticks[2]; 89 unsigned long ticks[NR_STAT_GROUPS];
89 unsigned long io_ticks; 90 unsigned long io_ticks;
90 unsigned long time_in_queue; 91 unsigned long time_in_queue;
91}; 92};
@@ -353,6 +354,11 @@ static inline void free_part_stats(struct hd_struct *part)
353 354
354#endif /* CONFIG_SMP */ 355#endif /* CONFIG_SMP */
355 356
357#define part_stat_read_accum(part, field) \
358 (part_stat_read(part, field[STAT_READ]) + \
359 part_stat_read(part, field[STAT_WRITE]) + \
360 part_stat_read(part, field[STAT_DISCARD]))
361
356#define part_stat_add(cpu, part, field, addnd) do { \ 362#define part_stat_add(cpu, part, field, addnd) do { \
357 __part_stat_add((cpu), (part), field, addnd); \ 363 __part_stat_add((cpu), (part), field, addnd); \
358 if ((part)->partno) \ 364 if ((part)->partno) \
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6c6fb116e925..680d3395fc83 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
317int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, 317int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
318 gfp_t gfp_mask, struct mem_cgroup **memcgp, 318 gfp_t gfp_mask, struct mem_cgroup **memcgp,
319 bool compound); 319 bool compound);
320int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
321 gfp_t gfp_mask, struct mem_cgroup **memcgp,
322 bool compound);
320void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, 323void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
321 bool lrucare, bool compound); 324 bool lrucare, bool compound);
322void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, 325void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
@@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
789 return 0; 792 return 0;
790} 793}
791 794
795static inline int mem_cgroup_try_charge_delay(struct page *page,
796 struct mm_struct *mm,
797 gfp_t gfp_mask,
798 struct mem_cgroup **memcgp,
799 bool compound)
800{
801 *memcgp = NULL;
802 return 0;
803}
804
792static inline void mem_cgroup_commit_charge(struct page *page, 805static inline void mem_cgroup_commit_charge(struct page *page,
793 struct mem_cgroup *memcg, 806 struct mem_cgroup *memcg,
794 bool lrucare, bool compound) 807 bool lrucare, bool compound)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2950ce957656..68e91ef5494c 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -242,7 +242,12 @@ struct nvme_id_ctrl {
242 __le32 sanicap; 242 __le32 sanicap;
243 __le32 hmminds; 243 __le32 hmminds;
244 __le16 hmmaxd; 244 __le16 hmmaxd;
245 __u8 rsvd338[174]; 245 __u8 rsvd338[4];
246 __u8 anatt;
247 __u8 anacap;
248 __le32 anagrpmax;
249 __le32 nanagrpid;
250 __u8 rsvd352[160];
246 __u8 sqes; 251 __u8 sqes;
247 __u8 cqes; 252 __u8 cqes;
248 __le16 maxcmd; 253 __le16 maxcmd;
@@ -254,11 +259,12 @@ struct nvme_id_ctrl {
254 __le16 awun; 259 __le16 awun;
255 __le16 awupf; 260 __le16 awupf;
256 __u8 nvscc; 261 __u8 nvscc;
257 __u8 rsvd531; 262 __u8 nwpc;
258 __le16 acwu; 263 __le16 acwu;
259 __u8 rsvd534[2]; 264 __u8 rsvd534[2];
260 __le32 sgls; 265 __le32 sgls;
261 __u8 rsvd540[228]; 266 __le32 mnan;
267 __u8 rsvd544[224];
262 char subnqn[256]; 268 char subnqn[256];
263 __u8 rsvd1024[768]; 269 __u8 rsvd1024[768];
264 __le32 ioccsz; 270 __le32 ioccsz;
@@ -312,7 +318,11 @@ struct nvme_id_ns {
312 __le16 nabspf; 318 __le16 nabspf;
313 __le16 noiob; 319 __le16 noiob;
314 __u8 nvmcap[16]; 320 __u8 nvmcap[16];
315 __u8 rsvd64[40]; 321 __u8 rsvd64[28];
322 __le32 anagrpid;
323 __u8 rsvd96[3];
324 __u8 nsattr;
325 __u8 rsvd100[4];
316 __u8 nguid[16]; 326 __u8 nguid[16];
317 __u8 eui64[8]; 327 __u8 eui64[8];
318 struct nvme_lbaf lbaf[16]; 328 struct nvme_lbaf lbaf[16];
@@ -425,6 +435,32 @@ struct nvme_effects_log {
425 __u8 resv[2048]; 435 __u8 resv[2048];
426}; 436};
427 437
438enum nvme_ana_state {
439 NVME_ANA_OPTIMIZED = 0x01,
440 NVME_ANA_NONOPTIMIZED = 0x02,
441 NVME_ANA_INACCESSIBLE = 0x03,
442 NVME_ANA_PERSISTENT_LOSS = 0x04,
443 NVME_ANA_CHANGE = 0x0f,
444};
445
446struct nvme_ana_group_desc {
447 __le32 grpid;
448 __le32 nnsids;
449 __le64 chgcnt;
450 __u8 state;
451 __u8 rsvd17[15];
452 __le32 nsids[];
453};
454
455/* flag for the log specific field of the ANA log */
456#define NVME_ANA_LOG_RGO (1 << 0)
457
458struct nvme_ana_rsp_hdr {
459 __le64 chgcnt;
460 __le16 ngrps;
461 __le16 rsvd10[3];
462};
463
428enum { 464enum {
429 NVME_SMART_CRIT_SPARE = 1 << 0, 465 NVME_SMART_CRIT_SPARE = 1 << 0,
430 NVME_SMART_CRIT_TEMPERATURE = 1 << 1, 466 NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
@@ -444,11 +480,13 @@ enum {
444enum { 480enum {
445 NVME_AER_NOTICE_NS_CHANGED = 0x00, 481 NVME_AER_NOTICE_NS_CHANGED = 0x00,
446 NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, 482 NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
483 NVME_AER_NOTICE_ANA = 0x03,
447}; 484};
448 485
449enum { 486enum {
450 NVME_AEN_CFG_NS_ATTR = 1 << 8, 487 NVME_AEN_CFG_NS_ATTR = 1 << 8,
451 NVME_AEN_CFG_FW_ACT = 1 << 9, 488 NVME_AEN_CFG_FW_ACT = 1 << 9,
489 NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
452}; 490};
453 491
454struct nvme_lba_range_type { 492struct nvme_lba_range_type {
@@ -749,15 +787,22 @@ enum {
749 NVME_FEAT_HOST_MEM_BUF = 0x0d, 787 NVME_FEAT_HOST_MEM_BUF = 0x0d,
750 NVME_FEAT_TIMESTAMP = 0x0e, 788 NVME_FEAT_TIMESTAMP = 0x0e,
751 NVME_FEAT_KATO = 0x0f, 789 NVME_FEAT_KATO = 0x0f,
790 NVME_FEAT_HCTM = 0x10,
791 NVME_FEAT_NOPSC = 0x11,
792 NVME_FEAT_RRL = 0x12,
793 NVME_FEAT_PLM_CONFIG = 0x13,
794 NVME_FEAT_PLM_WINDOW = 0x14,
752 NVME_FEAT_SW_PROGRESS = 0x80, 795 NVME_FEAT_SW_PROGRESS = 0x80,
753 NVME_FEAT_HOST_ID = 0x81, 796 NVME_FEAT_HOST_ID = 0x81,
754 NVME_FEAT_RESV_MASK = 0x82, 797 NVME_FEAT_RESV_MASK = 0x82,
755 NVME_FEAT_RESV_PERSIST = 0x83, 798 NVME_FEAT_RESV_PERSIST = 0x83,
799 NVME_FEAT_WRITE_PROTECT = 0x84,
756 NVME_LOG_ERROR = 0x01, 800 NVME_LOG_ERROR = 0x01,
757 NVME_LOG_SMART = 0x02, 801 NVME_LOG_SMART = 0x02,
758 NVME_LOG_FW_SLOT = 0x03, 802 NVME_LOG_FW_SLOT = 0x03,
759 NVME_LOG_CHANGED_NS = 0x04, 803 NVME_LOG_CHANGED_NS = 0x04,
760 NVME_LOG_CMD_EFFECTS = 0x05, 804 NVME_LOG_CMD_EFFECTS = 0x05,
805 NVME_LOG_ANA = 0x0c,
761 NVME_LOG_DISC = 0x70, 806 NVME_LOG_DISC = 0x70,
762 NVME_LOG_RESERVATION = 0x80, 807 NVME_LOG_RESERVATION = 0x80,
763 NVME_FWACT_REPL = (0 << 3), 808 NVME_FWACT_REPL = (0 << 3),
@@ -765,6 +810,14 @@ enum {
765 NVME_FWACT_ACTV = (2 << 3), 810 NVME_FWACT_ACTV = (2 << 3),
766}; 811};
767 812
813/* NVMe Namespace Write Protect State */
814enum {
815 NVME_NS_NO_WRITE_PROTECT = 0,
816 NVME_NS_WRITE_PROTECT,
817 NVME_NS_WRITE_PROTECT_POWER_CYCLE,
818 NVME_NS_WRITE_PROTECT_PERMANENT,
819};
820
768#define NVME_MAX_CHANGED_NAMESPACES 1024 821#define NVME_MAX_CHANGED_NAMESPACES 1024
769 822
770struct nvme_identify { 823struct nvme_identify {
@@ -880,7 +933,7 @@ struct nvme_get_log_page_command {
880 __u64 rsvd2[2]; 933 __u64 rsvd2[2];
881 union nvme_data_ptr dptr; 934 union nvme_data_ptr dptr;
882 __u8 lid; 935 __u8 lid;
883 __u8 rsvd10; 936 __u8 lsp; /* upper 4 bits reserved */
884 __le16 numdl; 937 __le16 numdl;
885 __le16 numdu; 938 __le16 numdu;
886 __u16 rsvd11; 939 __u16 rsvd11;
@@ -1111,6 +1164,8 @@ enum {
1111 NVME_SC_SGL_INVALID_OFFSET = 0x16, 1164 NVME_SC_SGL_INVALID_OFFSET = 0x16,
1112 NVME_SC_SGL_INVALID_SUBTYPE = 0x17, 1165 NVME_SC_SGL_INVALID_SUBTYPE = 0x17,
1113 1166
1167 NVME_SC_NS_WRITE_PROTECTED = 0x20,
1168
1114 NVME_SC_LBA_RANGE = 0x80, 1169 NVME_SC_LBA_RANGE = 0x80,
1115 NVME_SC_CAP_EXCEEDED = 0x81, 1170 NVME_SC_CAP_EXCEEDED = 0x81,
1116 NVME_SC_NS_NOT_READY = 0x82, 1171 NVME_SC_NS_NOT_READY = 0x82,
@@ -1180,6 +1235,13 @@ enum {
1180 NVME_SC_ACCESS_DENIED = 0x286, 1235 NVME_SC_ACCESS_DENIED = 0x286,
1181 NVME_SC_UNWRITTEN_BLOCK = 0x287, 1236 NVME_SC_UNWRITTEN_BLOCK = 0x287,
1182 1237
1238 /*
1239 * Path-related Errors:
1240 */
1241 NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
1242 NVME_SC_ANA_INACCESSIBLE = 0x302,
1243 NVME_SC_ANA_TRANSITION = 0x303,
1244
1183 NVME_SC_DNR = 0x4000, 1245 NVME_SC_DNR = 0x4000,
1184}; 1246};
1185 1247
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dac5086e3815..95a5018c338e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,10 @@ struct task_struct {
734 /* disallow userland-initiated cgroup migration */ 734 /* disallow userland-initiated cgroup migration */
735 unsigned no_cgroup_migration:1; 735 unsigned no_cgroup_migration:1;
736#endif 736#endif
737#ifdef CONFIG_BLK_CGROUP
738 /* to be used once the psi infrastructure lands upstream. */
739 unsigned use_memdelay:1;
740#endif
737 741
738 unsigned long atomic_flags; /* Flags requiring atomic access. */ 742 unsigned long atomic_flags; /* Flags requiring atomic access. */
739 743
@@ -1150,6 +1154,10 @@ struct task_struct {
1150 unsigned int memcg_nr_pages_over_high; 1154 unsigned int memcg_nr_pages_over_high;
1151#endif 1155#endif
1152 1156
1157#ifdef CONFIG_BLK_CGROUP
1158 struct request_queue *throttle_queue;
1159#endif
1160
1153#ifdef CONFIG_UPROBES 1161#ifdef CONFIG_UPROBES
1154 struct uprobe_task *utask; 1162 struct uprobe_task *utask;
1155#endif 1163#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c063443d8638..1a8bd05a335e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -629,7 +629,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
629 629
630 return memcg->swappiness; 630 return memcg->swappiness;
631} 631}
632
633#else 632#else
634static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) 633static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
635{ 634{
@@ -637,6 +636,16 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
637} 636}
638#endif 637#endif
639 638
639#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
640extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
641 gfp_t gfp_mask);
642#else
643static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg,
644 int node, gfp_t gfp_mask)
645{
646}
647#endif
648
640#ifdef CONFIG_MEMCG_SWAP 649#ifdef CONFIG_MEMCG_SWAP
641extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); 650extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
642extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); 651extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c6aa8a3c42ed..b9626aa7e90c 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -37,9 +37,33 @@ struct t10_pi_tuple {
37#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff) 37#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
38#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff) 38#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
39 39
40static inline u32 t10_pi_ref_tag(struct request *rq)
41{
42#ifdef CONFIG_BLK_DEV_INTEGRITY
43 return blk_rq_pos(rq) >>
44 (rq->q->integrity.interval_exp - 9) & 0xffffffff;
45#else
46 return -1U;
47#endif
48}
49
40extern const struct blk_integrity_profile t10_pi_type1_crc; 50extern const struct blk_integrity_profile t10_pi_type1_crc;
41extern const struct blk_integrity_profile t10_pi_type1_ip; 51extern const struct blk_integrity_profile t10_pi_type1_ip;
42extern const struct blk_integrity_profile t10_pi_type3_crc; 52extern const struct blk_integrity_profile t10_pi_type3_crc;
43extern const struct blk_integrity_profile t10_pi_type3_ip; 53extern const struct blk_integrity_profile t10_pi_type3_ip;
44 54
55#ifdef CONFIG_BLK_DEV_INTEGRITY
56extern void t10_pi_prepare(struct request *rq, u8 protection_type);
57extern void t10_pi_complete(struct request *rq, u8 protection_type,
58 unsigned int intervals);
59#else
60static inline void t10_pi_complete(struct request *rq, u8 protection_type,
61 unsigned int intervals)
62{
63}
64static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
65{
66}
67#endif
68
45#endif 69#endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 4a8841963c2e..05589a3e37f4 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -51,6 +51,7 @@
51#include <linux/security.h> 51#include <linux/security.h>
52#include <linux/task_work.h> 52#include <linux/task_work.h>
53#include <linux/memcontrol.h> 53#include <linux/memcontrol.h>
54#include <linux/blk-cgroup.h>
54struct linux_binprm; 55struct linux_binprm;
55 56
56/* 57/*
@@ -192,6 +193,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
192 task_work_run(); 193 task_work_run();
193 194
194 mem_cgroup_handle_over_high(); 195 mem_cgroup_handle_over_high();
196 blkcg_maybe_throttle_current();
195} 197}
196 198
197#endif /* <linux/tracehook.h> */ 199#endif /* <linux/tracehook.h> */
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index aaf1e971c6a3..c891ada3c5c2 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -4,6 +4,7 @@
4 4
5#include <linux/dma-mapping.h> 5#include <linux/dma-mapping.h>
6#include <linux/blkdev.h> 6#include <linux/blkdev.h>
7#include <linux/t10-pi.h>
7#include <linux/list.h> 8#include <linux/list.h>
8#include <linux/types.h> 9#include <linux/types.h>
9#include <linux/timer.h> 10#include <linux/timer.h>
@@ -14,8 +15,6 @@
14struct Scsi_Host; 15struct Scsi_Host;
15struct scsi_driver; 16struct scsi_driver;
16 17
17#include <scsi/scsi_device.h>
18
19/* 18/*
20 * MAX_COMMAND_SIZE is: 19 * MAX_COMMAND_SIZE is:
21 * The longest fixed-length SCSI CDB as per the SCSI standard. 20 * The longest fixed-length SCSI CDB as per the SCSI standard.
@@ -120,11 +119,11 @@ struct scsi_cmnd {
120 struct request *request; /* The command we are 119 struct request *request; /* The command we are
121 working on */ 120 working on */
122 121
123#define SCSI_SENSE_BUFFERSIZE 96
124 unsigned char *sense_buffer; 122 unsigned char *sense_buffer;
125 /* obtained by REQUEST SENSE when 123 /* obtained by REQUEST SENSE when
126 * CHECK CONDITION is received on original 124 * CHECK CONDITION is received on original
127 * command (auto-sense) */ 125 * command (auto-sense). Length must be
126 * SCSI_SENSE_BUFFERSIZE bytes. */
128 127
129 /* Low-level done function - can be used by low-level driver to point 128 /* Low-level done function - can be used by low-level driver to point
130 * to completion function. Not used by mid/upper level code. */ 129 * to completion function. Not used by mid/upper level code. */
@@ -313,12 +312,6 @@ static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
313 return scmd->device->sector_size; 312 return scmd->device->sector_size;
314} 313}
315 314
316static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
317{
318 return blk_rq_pos(scmd->request) >>
319 (ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
320}
321
322static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd) 315static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
323{ 316{
324 return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0; 317 return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 4c36af6edd79..202f4d6a4342 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -17,6 +17,8 @@ struct scsi_sense_hdr;
17 17
18typedef __u64 __bitwise blist_flags_t; 18typedef __u64 __bitwise blist_flags_t;
19 19
20#define SCSI_SENSE_BUFFERSIZE 96
21
20struct scsi_mode_data { 22struct scsi_mode_data {
21 __u32 length; 23 __u32 length;
22 __u16 block_descriptor_length; 24 __u16 block_descriptor_length;
@@ -426,11 +428,21 @@ extern const char *scsi_device_state_name(enum scsi_device_state);
426extern int scsi_is_sdev_device(const struct device *); 428extern int scsi_is_sdev_device(const struct device *);
427extern int scsi_is_target_device(const struct device *); 429extern int scsi_is_target_device(const struct device *);
428extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); 430extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
429extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, 431extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
430 int data_direction, void *buffer, unsigned bufflen, 432 int data_direction, void *buffer, unsigned bufflen,
431 unsigned char *sense, struct scsi_sense_hdr *sshdr, 433 unsigned char *sense, struct scsi_sense_hdr *sshdr,
432 int timeout, int retries, u64 flags, 434 int timeout, int retries, u64 flags,
433 req_flags_t rq_flags, int *resid); 435 req_flags_t rq_flags, int *resid);
436/* Make sure any sense buffer is the correct size. */
437#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \
438 sshdr, timeout, retries, flags, rq_flags, resid) \
439({ \
440 BUILD_BUG_ON((sense) != NULL && \
441 sizeof(sense) != SCSI_SENSE_BUFFERSIZE); \
442 __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, \
443 sense, sshdr, timeout, retries, flags, rq_flags, \
444 resid); \
445})
434static inline int scsi_execute_req(struct scsi_device *sdev, 446static inline int scsi_execute_req(struct scsi_device *sdev,
435 const unsigned char *cmd, int data_direction, void *buffer, 447 const unsigned char *cmd, int data_direction, void *buffer,
436 unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, 448 unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 821f71a2e48f..8d19e02d752a 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -195,7 +195,7 @@ struct cache_sb {
195 }; 195 };
196 }; 196 };
197 197
198 __u32 last_mount; /* time_t */ 198 __u32 last_mount; /* time overflow in y2106 */
199 199
200 __u16 first_bucket; 200 __u16 first_bucket;
201 union { 201 union {
@@ -318,7 +318,7 @@ struct uuid_entry {
318 struct { 318 struct {
319 __u8 uuid[16]; 319 __u8 uuid[16];
320 __u8 label[32]; 320 __u8 label[32];
321 __u32 first_reg; 321 __u32 first_reg; /* time overflow in y2106 */
322 __u32 last_reg; 322 __u32 last_reg;
323 __u32 invalidated; 323 __u32 invalidated;
324 324
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index e3c70fe6bf0f..ff5a5db8906a 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -117,7 +117,7 @@ struct blk_zone_report {
117 __u32 nr_zones; 117 __u32 nr_zones;
118 __u8 reserved[4]; 118 __u8 reserved[4];
119 struct blk_zone zones[0]; 119 struct blk_zone zones[0];
120} __packed; 120};
121 121
122/** 122/**
123 * struct blk_zone_range - BLKRESETZONE ioctl request 123 * struct blk_zone_range - BLKRESETZONE ioctl request