aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 22:08:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 22:08:14 -0500
commit0910c0bdf7c291a41bc21e40a97389c9d4c1960d (patch)
tree177c4cb22ece78b18f64f548ae82b9a15edbb99c /include/linux
parent2821fe6b00a1e902fd399bb4b7e40bc3041f4d44 (diff)
parente37459b8e2c7db6735e39e019e448b76e5e77647 (diff)
Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block
Pull block IO core updates from Jens Axboe: "This is the pull request for the core changes in the block layer for 3.13. It contains: - The new blk-mq request interface. This is a new and more scalable queueing model that marries the best part of the request based interface we currently have (which is fully featured, but scales poorly) and the bio based "interface" which the new drivers for high IOPS devices end up using because it's much faster than the request based one. The bio interface has no block layer support, since it taps into the stack much earlier. This means that drivers end up having to implement a lot of functionality on their own, like tagging, timeout handling, requeue, etc. The blk-mq interface provides all these. Some drivers even provide a switch to select bio or rq and has code to handle both, since things like merging only works in the rq model and hence is faster for some workloads. This is a huge mess. Conversion of these drivers nets us a substantial code reduction. Initial results on converting SCSI to this model even shows an 8x improvement on single queue devices. So while the model was intended to work on the newer multiqueue devices, it has substantial improvements for "classic" hardware as well. This code has gone through extensive testing and development, it's now ready to go. A pull request is coming to convert virtio-blk to this model will be will be coming as well, with more drivers scheduled for 3.14 conversion. - Two blktrace fixes from Jan and Chen Gang. - A plug merge fix from Alireza Haghdoost. - Conversion of __get_cpu_var() from Christoph Lameter. - Fix for sector_div() with 64-bit divider from Geert Uytterhoeven. - A fix for a race between request completion and the timeout handling from Jeff Moyer. This is what caused the merge conflict with blk-mq/core, in case you are looking at that. - A dm stacking fix from Mike Snitzer. - A code consolidation fix and duplicated code removal from Kent Overstreet. - A handful of block bug fixes from Mikulas Patocka, fixing a loop crash and memory corruption on blk cg. - Elevator switch bug fix from Tomoki Sekiyama. A heads-up that I had to rebase this branch. Initially the immutable bio_vecs had been queued up for inclusion, but a week later, it became clear that it wasn't fully cooked yet. So the decision was made to pull this out and postpone it until 3.14. It was a straight forward rebase, just pruning out the immutable series and the later fixes of problems with it. The rest of the patches applied directly and no further changes were made" * 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits) block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: Do not call sector_div() with a 64-bit divisor kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup() block: Consolidate duplicated bio_trim() implementations block: Use rw_copy_check_uvector() block: Enable sysfs nomerge control for I/O requests in the plug list block: properly stack underlying max_segment_size to DM device elevator: acquire q->sysfs_lock in elevator_change() elevator: Fix a race in elevator switching and md device initialization block: Replace __get_cpu_var uses bdi: test bdi_init failure block: fix a probe argument to blk_register_region loop: fix crash if blk_alloc_queue fails blk-core: Fix memory corruption if blkcg_init_queue fails block: fix race between request completion and timeout handling blktrace: Send BLK_TN_PROCESS events to all running traces blk-mq: don't disallow request merges for req->special being set blk-mq: mq plug list breakage blk-mq: fix for flush deadlock ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h4
-rw-r--r--include/linux/bio.h3
-rw-r--r--include/linux/blk-mq.h183
-rw-r--r--include/linux/blk_types.h68
-rw-r--r--include/linux/blkdev.h60
-rw-r--r--include/linux/blktrace_api.h4
-rw-r--r--include/linux/percpu_ida.h23
7 files changed, 297 insertions, 48 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 5f66d519a726..24819001f5c8 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -109,7 +109,7 @@ struct backing_dev_info {
109#endif 109#endif
110}; 110};
111 111
112int bdi_init(struct backing_dev_info *bdi); 112int __must_check bdi_init(struct backing_dev_info *bdi);
113void bdi_destroy(struct backing_dev_info *bdi); 113void bdi_destroy(struct backing_dev_info *bdi);
114 114
115__printf(3, 4) 115__printf(3, 4)
@@ -117,7 +117,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
117 const char *fmt, ...); 117 const char *fmt, ...);
118int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); 118int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
119void bdi_unregister(struct backing_dev_info *bdi); 119void bdi_unregister(struct backing_dev_info *bdi);
120int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); 120int __must_check bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
121void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 121void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
122 enum wb_reason reason); 122 enum wb_reason reason);
123void bdi_start_background_writeback(struct backing_dev_info *bdi); 123void bdi_start_background_writeback(struct backing_dev_info *bdi);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ec48bac5b039..060ff695085c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -218,6 +218,7 @@ struct bio_pair {
218}; 218};
219extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); 219extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
220extern void bio_pair_release(struct bio_pair *dbio); 220extern void bio_pair_release(struct bio_pair *dbio);
221extern void bio_trim(struct bio *bio, int offset, int size);
221 222
222extern struct bio_set *bioset_create(unsigned int, unsigned int); 223extern struct bio_set *bioset_create(unsigned int, unsigned int);
223extern void bioset_free(struct bio_set *); 224extern void bioset_free(struct bio_set *);
@@ -419,6 +420,8 @@ static inline void bio_list_init(struct bio_list *bl)
419 bl->head = bl->tail = NULL; 420 bl->head = bl->tail = NULL;
420} 421}
421 422
423#define BIO_EMPTY_LIST { NULL, NULL }
424
422#define bio_list_for_each(bio, bl) \ 425#define bio_list_for_each(bio, bl) \
423 for (bio = (bl)->head; bio; bio = bio->bi_next) 426 for (bio = (bl)->head; bio; bio = bio->bi_next)
424 427
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
new file mode 100644
index 000000000000..ab0e9b2025b3
--- /dev/null
+++ b/include/linux/blk-mq.h
@@ -0,0 +1,183 @@
1#ifndef BLK_MQ_H
2#define BLK_MQ_H
3
4#include <linux/blkdev.h>
5
6struct blk_mq_tags;
7
8struct blk_mq_cpu_notifier {
9 struct list_head list;
10 void *data;
11 void (*notify)(void *data, unsigned long action, unsigned int cpu);
12};
13
14struct blk_mq_hw_ctx {
15 struct {
16 spinlock_t lock;
17 struct list_head dispatch;
18 } ____cacheline_aligned_in_smp;
19
20 unsigned long state; /* BLK_MQ_S_* flags */
21 struct delayed_work delayed_work;
22
23 unsigned long flags; /* BLK_MQ_F_* flags */
24
25 struct request_queue *queue;
26 unsigned int queue_num;
27
28 void *driver_data;
29
30 unsigned int nr_ctx;
31 struct blk_mq_ctx **ctxs;
32 unsigned int nr_ctx_map;
33 unsigned long *ctx_map;
34
35 struct request **rqs;
36 struct list_head page_list;
37 struct blk_mq_tags *tags;
38
39 unsigned long queued;
40 unsigned long run;
41#define BLK_MQ_MAX_DISPATCH_ORDER 10
42 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
43
44 unsigned int queue_depth;
45 unsigned int numa_node;
46 unsigned int cmd_size; /* per-request extra data */
47
48 struct blk_mq_cpu_notifier cpu_notifier;
49 struct kobject kobj;
50};
51
52struct blk_mq_reg {
53 struct blk_mq_ops *ops;
54 unsigned int nr_hw_queues;
55 unsigned int queue_depth;
56 unsigned int reserved_tags;
57 unsigned int cmd_size; /* per-request extra data */
58 int numa_node;
59 unsigned int timeout;
60 unsigned int flags; /* BLK_MQ_F_* */
61};
62
63typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
64typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
65typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int);
66typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
67typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
68typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
69
70struct blk_mq_ops {
71 /*
72 * Queue request
73 */
74 queue_rq_fn *queue_rq;
75
76 /*
77 * Map to specific hardware queue
78 */
79 map_queue_fn *map_queue;
80
81 /*
82 * Called on request timeout
83 */
84 rq_timed_out_fn *timeout;
85
86 /*
87 * Override for hctx allocations (should probably go)
88 */
89 alloc_hctx_fn *alloc_hctx;
90 free_hctx_fn *free_hctx;
91
92 /*
93 * Called when the block layer side of a hardware queue has been
94 * set up, allowing the driver to allocate/init matching structures.
95 * Ditto for exit/teardown.
96 */
97 init_hctx_fn *init_hctx;
98 exit_hctx_fn *exit_hctx;
99};
100
101enum {
102 BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */
103 BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */
104 BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */
105
106 BLK_MQ_F_SHOULD_MERGE = 1 << 0,
107 BLK_MQ_F_SHOULD_SORT = 1 << 1,
108 BLK_MQ_F_SHOULD_IPI = 1 << 2,
109
110 BLK_MQ_S_STOPPED = 1 << 0,
111
112 BLK_MQ_MAX_DEPTH = 2048,
113};
114
115struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
116void blk_mq_free_queue(struct request_queue *);
117int blk_mq_register_disk(struct gendisk *);
118void blk_mq_unregister_disk(struct gendisk *);
119void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
120
121void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
122
123void blk_mq_insert_request(struct request_queue *, struct request *, bool);
124void blk_mq_run_queues(struct request_queue *q, bool async);
125void blk_mq_free_request(struct request *rq);
126bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
127struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved);
128struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
129struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
130
131struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
132struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int);
133void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
134
135void blk_mq_end_io(struct request *rq, int error);
136
137void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
138void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
139void blk_mq_stop_hw_queues(struct request_queue *q);
140void blk_mq_start_stopped_hw_queues(struct request_queue *q);
141
142/*
143 * Driver command data is immediately after the request. So subtract request
144 * size to get back to the original request.
145 */
146static inline struct request *blk_mq_rq_from_pdu(void *pdu)
147{
148 return pdu - sizeof(struct request);
149}
150static inline void *blk_mq_rq_to_pdu(struct request *rq)
151{
152 return (void *) rq + sizeof(*rq);
153}
154
155static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx,
156 unsigned int tag)
157{
158 return hctx->rqs[tag];
159}
160
161#define queue_for_each_hw_ctx(q, hctx, i) \
162 for ((i) = 0, hctx = (q)->queue_hw_ctx[0]; \
163 (i) < (q)->nr_hw_queues; (i)++, hctx = (q)->queue_hw_ctx[i])
164
165#define queue_for_each_ctx(q, ctx, i) \
166 for ((i) = 0, ctx = per_cpu_ptr((q)->queue_ctx, 0); \
167 (i) < (q)->nr_queues; (i)++, ctx = per_cpu_ptr(q->queue_ctx, (i)))
168
169#define hctx_for_each_ctx(hctx, ctx, i) \
170 for ((i) = 0, ctx = (hctx)->ctxs[0]; \
171 (i) < (hctx)->nr_ctx; (i)++, ctx = (hctx)->ctxs[(i)])
172
173#define blk_ctx_sum(q, sum) \
174({ \
175 struct blk_mq_ctx *__x; \
176 unsigned int __ret = 0, __i; \
177 \
178 queue_for_each_ctx((q), __x, __i) \
179 __ret += sum; \
180 __ret; \
181})
182
183#endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fa1abeb45b76..238ef0ed62f8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,19 +178,20 @@ enum rq_flag_bits {
178 __REQ_MIXED_MERGE, /* merge of different types, fail separately */ 178 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
179 __REQ_KERNEL, /* direct IO to kernel pages */ 179 __REQ_KERNEL, /* direct IO to kernel pages */
180 __REQ_PM, /* runtime pm request */ 180 __REQ_PM, /* runtime pm request */
181 __REQ_END, /* last of chain of requests */
181 __REQ_NR_BITS, /* stops here */ 182 __REQ_NR_BITS, /* stops here */
182}; 183};
183 184
184#define REQ_WRITE (1 << __REQ_WRITE) 185#define REQ_WRITE (1ULL << __REQ_WRITE)
185#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) 186#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV)
186#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) 187#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT)
187#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) 188#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER)
188#define REQ_SYNC (1 << __REQ_SYNC) 189#define REQ_SYNC (1ULL << __REQ_SYNC)
189#define REQ_META (1 << __REQ_META) 190#define REQ_META (1ULL << __REQ_META)
190#define REQ_PRIO (1 << __REQ_PRIO) 191#define REQ_PRIO (1ULL << __REQ_PRIO)
191#define REQ_DISCARD (1 << __REQ_DISCARD) 192#define REQ_DISCARD (1ULL << __REQ_DISCARD)
192#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME) 193#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME)
193#define REQ_NOIDLE (1 << __REQ_NOIDLE) 194#define REQ_NOIDLE (1ULL << __REQ_NOIDLE)
194 195
195#define REQ_FAILFAST_MASK \ 196#define REQ_FAILFAST_MASK \
196 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 197 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@ -206,28 +207,29 @@ enum rq_flag_bits {
206#define REQ_NOMERGE_FLAGS \ 207#define REQ_NOMERGE_FLAGS \
207 (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) 208 (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
208 209
209#define REQ_RAHEAD (1 << __REQ_RAHEAD) 210#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
210#define REQ_THROTTLED (1 << __REQ_THROTTLED) 211#define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
211 212
212#define REQ_SORTED (1 << __REQ_SORTED) 213#define REQ_SORTED (1ULL << __REQ_SORTED)
213#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 214#define REQ_SOFTBARRIER (1ULL << __REQ_SOFTBARRIER)
214#define REQ_FUA (1 << __REQ_FUA) 215#define REQ_FUA (1ULL << __REQ_FUA)
215#define REQ_NOMERGE (1 << __REQ_NOMERGE) 216#define REQ_NOMERGE (1ULL << __REQ_NOMERGE)
216#define REQ_STARTED (1 << __REQ_STARTED) 217#define REQ_STARTED (1ULL << __REQ_STARTED)
217#define REQ_DONTPREP (1 << __REQ_DONTPREP) 218#define REQ_DONTPREP (1ULL << __REQ_DONTPREP)
218#define REQ_QUEUED (1 << __REQ_QUEUED) 219#define REQ_QUEUED (1ULL << __REQ_QUEUED)
219#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) 220#define REQ_ELVPRIV (1ULL << __REQ_ELVPRIV)
220#define REQ_FAILED (1 << __REQ_FAILED) 221#define REQ_FAILED (1ULL << __REQ_FAILED)
221#define REQ_QUIET (1 << __REQ_QUIET) 222#define REQ_QUIET (1ULL << __REQ_QUIET)
222#define REQ_PREEMPT (1 << __REQ_PREEMPT) 223#define REQ_PREEMPT (1ULL << __REQ_PREEMPT)
223#define REQ_ALLOCED (1 << __REQ_ALLOCED) 224#define REQ_ALLOCED (1ULL << __REQ_ALLOCED)
224#define REQ_COPY_USER (1 << __REQ_COPY_USER) 225#define REQ_COPY_USER (1ULL << __REQ_COPY_USER)
225#define REQ_FLUSH (1 << __REQ_FLUSH) 226#define REQ_FLUSH (1ULL << __REQ_FLUSH)
226#define REQ_FLUSH_SEQ (1 << __REQ_FLUSH_SEQ) 227#define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ)
227#define REQ_IO_STAT (1 << __REQ_IO_STAT) 228#define REQ_IO_STAT (1ULL << __REQ_IO_STAT)
228#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) 229#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE)
229#define REQ_SECURE (1 << __REQ_SECURE) 230#define REQ_SECURE (1ULL << __REQ_SECURE)
230#define REQ_KERNEL (1 << __REQ_KERNEL) 231#define REQ_KERNEL (1ULL << __REQ_KERNEL)
231#define REQ_PM (1 << __REQ_PM) 232#define REQ_PM (1ULL << __REQ_PM)
233#define REQ_END (1ULL << __REQ_END)
232 234
233#endif /* __LINUX_BLK_TYPES_H */ 235#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0e6f765aa1f5..f26ec20f6354 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -8,6 +8,7 @@
8#include <linux/major.h> 8#include <linux/major.h>
9#include <linux/genhd.h> 9#include <linux/genhd.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/llist.h>
11#include <linux/timer.h> 12#include <linux/timer.h>
12#include <linux/workqueue.h> 13#include <linux/workqueue.h>
13#include <linux/pagemap.h> 14#include <linux/pagemap.h>
@@ -94,12 +95,19 @@ enum rq_cmd_type_bits {
94 * as well! 95 * as well!
95 */ 96 */
96struct request { 97struct request {
97 struct list_head queuelist; 98 union {
98 struct call_single_data csd; 99 struct list_head queuelist;
100 struct llist_node ll_list;
101 };
102 union {
103 struct call_single_data csd;
104 struct work_struct mq_flush_data;
105 };
99 106
100 struct request_queue *q; 107 struct request_queue *q;
108 struct blk_mq_ctx *mq_ctx;
101 109
102 unsigned int cmd_flags; 110 u64 cmd_flags;
103 enum rq_cmd_type_bits cmd_type; 111 enum rq_cmd_type_bits cmd_type;
104 unsigned long atomic_flags; 112 unsigned long atomic_flags;
105 113
@@ -160,8 +168,6 @@ struct request {
160 168
161 unsigned short ioprio; 169 unsigned short ioprio;
162 170
163 int ref_count;
164
165 void *special; /* opaque pointer available for LLD use */ 171 void *special; /* opaque pointer available for LLD use */
166 char *buffer; /* kaddr of the current segment if available */ 172 char *buffer; /* kaddr of the current segment if available */
167 173
@@ -215,6 +221,8 @@ struct request_pm_state
215 221
216#include <linux/elevator.h> 222#include <linux/elevator.h>
217 223
224struct blk_queue_ctx;
225
218typedef void (request_fn_proc) (struct request_queue *q); 226typedef void (request_fn_proc) (struct request_queue *q);
219typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); 227typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
220typedef int (prep_rq_fn) (struct request_queue *, struct request *); 228typedef int (prep_rq_fn) (struct request_queue *, struct request *);
@@ -313,6 +321,18 @@ struct request_queue {
313 dma_drain_needed_fn *dma_drain_needed; 321 dma_drain_needed_fn *dma_drain_needed;
314 lld_busy_fn *lld_busy_fn; 322 lld_busy_fn *lld_busy_fn;
315 323
324 struct blk_mq_ops *mq_ops;
325
326 unsigned int *mq_map;
327
328 /* sw queues */
329 struct blk_mq_ctx *queue_ctx;
330 unsigned int nr_queues;
331
332 /* hw dispatch queues */
333 struct blk_mq_hw_ctx **queue_hw_ctx;
334 unsigned int nr_hw_queues;
335
316 /* 336 /*
317 * Dispatch queue sorting 337 * Dispatch queue sorting
318 */ 338 */
@@ -361,6 +381,11 @@ struct request_queue {
361 */ 381 */
362 struct kobject kobj; 382 struct kobject kobj;
363 383
384 /*
385 * mq queue kobject
386 */
387 struct kobject mq_kobj;
388
364#ifdef CONFIG_PM_RUNTIME 389#ifdef CONFIG_PM_RUNTIME
365 struct device *dev; 390 struct device *dev;
366 int rpm_status; 391 int rpm_status;
@@ -425,7 +450,13 @@ struct request_queue {
425 unsigned long flush_pending_since; 450 unsigned long flush_pending_since;
426 struct list_head flush_queue[2]; 451 struct list_head flush_queue[2];
427 struct list_head flush_data_in_flight; 452 struct list_head flush_data_in_flight;
428 struct request flush_rq; 453 union {
454 struct request flush_rq;
455 struct {
456 spinlock_t mq_flush_lock;
457 struct work_struct mq_flush_work;
458 };
459 };
429 460
430 struct mutex sysfs_lock; 461 struct mutex sysfs_lock;
431 462
@@ -437,14 +468,14 @@ struct request_queue {
437 struct bsg_class_device bsg_dev; 468 struct bsg_class_device bsg_dev;
438#endif 469#endif
439 470
440#ifdef CONFIG_BLK_CGROUP
441 struct list_head all_q_node;
442#endif
443#ifdef CONFIG_BLK_DEV_THROTTLING 471#ifdef CONFIG_BLK_DEV_THROTTLING
444 /* Throttle data */ 472 /* Throttle data */
445 struct throtl_data *td; 473 struct throtl_data *td;
446#endif 474#endif
447 struct rcu_head rcu_head; 475 struct rcu_head rcu_head;
476 wait_queue_head_t mq_freeze_wq;
477 struct percpu_counter mq_usage_counter;
478 struct list_head all_q_node;
448}; 479};
449 480
450#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 481#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -467,6 +498,7 @@ struct request_queue {
467#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ 498#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
468#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ 499#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
469#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ 500#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
501#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */
470 502
471#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 503#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
472 (1 << QUEUE_FLAG_STACKABLE) | \ 504 (1 << QUEUE_FLAG_STACKABLE) | \
@@ -539,6 +571,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
539#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) 571#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
540#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) 572#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
541#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) 573#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
574#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
542#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 575#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
543#define blk_queue_noxmerges(q) \ 576#define blk_queue_noxmerges(q) \
544 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) 577 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
@@ -570,7 +603,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
570 603
571#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 604#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
572 605
573#define rq_data_dir(rq) ((rq)->cmd_flags & 1) 606#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0)
574 607
575static inline unsigned int blk_queue_cluster(struct request_queue *q) 608static inline unsigned int blk_queue_cluster(struct request_queue *q)
576{ 609{
@@ -1013,6 +1046,7 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
1013struct blk_plug { 1046struct blk_plug {
1014 unsigned long magic; /* detect uninitialized use-cases */ 1047 unsigned long magic; /* detect uninitialized use-cases */
1015 struct list_head list; /* requests */ 1048 struct list_head list; /* requests */
1049 struct list_head mq_list; /* blk-mq requests */
1016 struct list_head cb_list; /* md requires an unplug callback */ 1050 struct list_head cb_list; /* md requires an unplug callback */
1017}; 1051};
1018#define BLK_MAX_REQUEST_COUNT 16 1052#define BLK_MAX_REQUEST_COUNT 16
@@ -1050,7 +1084,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
1050{ 1084{
1051 struct blk_plug *plug = tsk->plug; 1085 struct blk_plug *plug = tsk->plug;
1052 1086
1053 return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); 1087 return plug &&
1088 (!list_empty(&plug->list) ||
1089 !list_empty(&plug->mq_list) ||
1090 !list_empty(&plug->cb_list));
1054} 1091}
1055 1092
1056/* 1093/*
@@ -1325,6 +1362,7 @@ static inline void put_dev_sector(Sector p)
1325 1362
1326struct work_struct; 1363struct work_struct;
1327int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 1364int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
1365int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
1328 1366
1329#ifdef CONFIG_BLK_CGROUP 1367#ifdef CONFIG_BLK_CGROUP
1330/* 1368/*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7c2e030e72f1..afc1343df3c7 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -5,6 +5,7 @@
5#include <linux/relay.h> 5#include <linux/relay.h>
6#include <linux/compat.h> 6#include <linux/compat.h>
7#include <uapi/linux/blktrace_api.h> 7#include <uapi/linux/blktrace_api.h>
8#include <linux/list.h>
8 9
9#if defined(CONFIG_BLK_DEV_IO_TRACE) 10#if defined(CONFIG_BLK_DEV_IO_TRACE)
10 11
@@ -23,6 +24,7 @@ struct blk_trace {
23 struct dentry *dir; 24 struct dentry *dir;
24 struct dentry *dropped_file; 25 struct dentry *dropped_file;
25 struct dentry *msg_file; 26 struct dentry *msg_file;
27 struct list_head running_list;
26 atomic_t dropped; 28 atomic_t dropped;
27}; 29};
28 30
@@ -87,7 +89,7 @@ static inline int blk_trace_init_sysfs(struct device *dev)
87#ifdef CONFIG_COMPAT 89#ifdef CONFIG_COMPAT
88 90
89struct compat_blk_user_trace_setup { 91struct compat_blk_user_trace_setup {
90 char name[32]; 92 char name[BLKTRACE_BDEV_SIZE];
91 u16 act_mask; 93 u16 act_mask;
92 u32 buf_size; 94 u32 buf_size;
93 u32 buf_nr; 95 u32 buf_nr;
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index 0b23edbee309..1900bd0fa639 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -16,6 +16,8 @@ struct percpu_ida {
16 * percpu_ida_init() 16 * percpu_ida_init()
17 */ 17 */
18 unsigned nr_tags; 18 unsigned nr_tags;
19 unsigned percpu_max_size;
20 unsigned percpu_batch_size;
19 21
20 struct percpu_ida_cpu __percpu *tag_cpu; 22 struct percpu_ida_cpu __percpu *tag_cpu;
21 23
@@ -51,10 +53,29 @@ struct percpu_ida {
51 } ____cacheline_aligned_in_smp; 53 } ____cacheline_aligned_in_smp;
52}; 54};
53 55
56/*
57 * Number of tags we move between the percpu freelist and the global freelist at
58 * a time
59 */
60#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U
61/* Max size of percpu freelist, */
62#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
63
54int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); 64int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
55void percpu_ida_free(struct percpu_ida *pool, unsigned tag); 65void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
56 66
57void percpu_ida_destroy(struct percpu_ida *pool); 67void percpu_ida_destroy(struct percpu_ida *pool);
58int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); 68int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
69 unsigned long max_size, unsigned long batch_size);
70static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
71{
72 return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
73 IDA_DEFAULT_PCPU_BATCH_MOVE);
74}
75
76typedef int (*percpu_ida_cb)(unsigned, void *);
77int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
78 void *data);
59 79
80unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu);
60#endif /* __PERCPU_IDA_H__ */ 81#endif /* __PERCPU_IDA_H__ */