aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-18 14:53:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-18 14:53:51 -0400
commitd3dc366bbaf07c125561e90d6da4bb147741101a (patch)
tree6eb7e79a8ec9df1fa705393c6d15ccea3d104661 /block
parent511c41d9e6665a07aca94eb00983cf6d77dd87ff (diff)
parente19a8a0ad2d255316830ead05b59c5a704434cbb (diff)
Merge branch 'for-3.18/core' of git://git.kernel.dk/linux-block
Pull core block layer changes from Jens Axboe: "This is the core block IO pull request for 3.18. Apart from the new and improved flush machinery for blk-mq, this is all mostly bug fixes and cleanups. - blk-mq timeout updates and fixes from Christoph. - Removal of REQ_END, also from Christoph. We pass it through the ->queue_rq() hook for blk-mq instead, freeing up one of the request bits. The space was overly tight on 32-bit, so Martin also killed REQ_KERNEL since it's no longer used. - blk integrity updates and fixes from Martin and Gu Zheng. - Update to the flush machinery for blk-mq from Ming Lei. Now we have a per hardware context flush request, which both cleans up the code should scale better for flush intensive workloads on blk-mq. - Improve the error printing, from Rob Elliott. - Backing device improvements and cleanups from Tejun. - Fixup of a misplaced rq_complete() tracepoint from Hannes. - Make blk_get_request() return error pointers, fixing up issues where we NULL deref when a device goes bad or missing. From Joe Lawrence. - Prep work for drastically reducing the memory consumption of dm devices from Junichi Nomura. This allows creating clone bio sets without preallocating a lot of memory. - Fix a blk-mq hang on certain combinations of queue depths and hardware queues from me. - Limit memory consumption for blk-mq devices for crash dump scenarios and drivers that use crazy high depths (certain SCSI shared tag setups). We now just use a single queue and limited depth for that" * 'for-3.18/core' of git://git.kernel.dk/linux-block: (58 commits) block: Remove REQ_KERNEL blk-mq: allocate cpumask on the home node bio-integrity: remove the needless fail handle of bip_slab creating block: include func name in __get_request prints block: make blk_update_request print prefix match ratelimited prefix blk-merge: don't compute bi_phys_segments from bi_vcnt for cloned bio block: fix alignment_offset math that assumes io_min is a power-of-2 blk-mq: Make bt_clear_tag() easier to read blk-mq: fix potential hang if rolling wakeup depth is too high block: add bioset_create_nobvec() block: use bio_clone_fast() in blk_rq_prep_clone() block: misplaced rq_complete tracepoint sd: Honor block layer integrity handling flags block: Replace strnicmp with strncasecmp block: Add T10 Protection Information functions block: Don't merge requests if integrity flags differ block: Integrity checksum flag block: Relocate bio integrity flags block: Add a disk flag to block integrity profile block: Add prefix to block integrity profile flags ...
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig1
-rw-r--r--block/Makefile4
-rw-r--r--block/bio-integrity.c271
-rw-r--r--block/bio.c61
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-cgroup.h3
-rw-r--r--block/blk-core.c67
-rw-r--r--block/blk-flush.c147
-rw-r--r--block/blk-integrity.c103
-rw-r--r--block/blk-merge.c14
-rw-r--r--block/blk-mq-tag.c55
-rw-r--r--block/blk-mq.c364
-rw-r--r--block/blk-mq.h3
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/blk-timeout.c15
-rw-r--r--block/blk.h37
-rw-r--r--block/bsg.c9
-rw-r--r--block/cfq-iosched.c10
-rw-r--r--block/compat_ioctl.c4
-rw-r--r--block/ioctl.c4
-rw-r--r--block/partitions/mac.c8
-rw-r--r--block/scsi_ioctl.c11
-rw-r--r--block/t10-pi.c197
24 files changed, 791 insertions, 607 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 2429515c05c2..161491d0a879 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -77,6 +77,7 @@ config BLK_DEV_BSGLIB
77 77
78config BLK_DEV_INTEGRITY 78config BLK_DEV_INTEGRITY
79 bool "Block layer data integrity support" 79 bool "Block layer data integrity support"
80 select CRC_T10DIF if BLK_DEV_INTEGRITY
80 ---help--- 81 ---help---
81 Some storage devices allow extra information to be 82 Some storage devices allow extra information to be
82 stored/retrieved to help protect the data. The block layer 83 stored/retrieved to help protect the data. The block layer
diff --git a/block/Makefile b/block/Makefile
index a2ce6ac935ec..00ecc97629db 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -20,6 +20,6 @@ obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
20obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o 20obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
21 21
22obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 22obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
23obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
24obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o 23obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
25obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o 24obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
25
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index f14b4abbebd8..0984232e429f 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -79,6 +79,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
79 bip->bip_slab = idx; 79 bip->bip_slab = idx;
80 bip->bip_bio = bio; 80 bip->bip_bio = bio;
81 bio->bi_integrity = bip; 81 bio->bi_integrity = bip;
82 bio->bi_rw |= REQ_INTEGRITY;
82 83
83 return bip; 84 return bip;
84err: 85err:
@@ -96,11 +97,12 @@ EXPORT_SYMBOL(bio_integrity_alloc);
96 */ 97 */
97void bio_integrity_free(struct bio *bio) 98void bio_integrity_free(struct bio *bio)
98{ 99{
99 struct bio_integrity_payload *bip = bio->bi_integrity; 100 struct bio_integrity_payload *bip = bio_integrity(bio);
100 struct bio_set *bs = bio->bi_pool; 101 struct bio_set *bs = bio->bi_pool;
101 102
102 if (bip->bip_owns_buf) 103 if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
103 kfree(bip->bip_buf); 104 kfree(page_address(bip->bip_vec->bv_page) +
105 bip->bip_vec->bv_offset);
104 106
105 if (bs) { 107 if (bs) {
106 if (bip->bip_slab != BIO_POOL_NONE) 108 if (bip->bip_slab != BIO_POOL_NONE)
@@ -128,7 +130,7 @@ EXPORT_SYMBOL(bio_integrity_free);
128int bio_integrity_add_page(struct bio *bio, struct page *page, 130int bio_integrity_add_page(struct bio *bio, struct page *page,
129 unsigned int len, unsigned int offset) 131 unsigned int len, unsigned int offset)
130{ 132{
131 struct bio_integrity_payload *bip = bio->bi_integrity; 133 struct bio_integrity_payload *bip = bio_integrity(bio);
132 struct bio_vec *iv; 134 struct bio_vec *iv;
133 135
134 if (bip->bip_vcnt >= bip->bip_max_vcnt) { 136 if (bip->bip_vcnt >= bip->bip_max_vcnt) {
@@ -147,24 +149,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
147} 149}
148EXPORT_SYMBOL(bio_integrity_add_page); 150EXPORT_SYMBOL(bio_integrity_add_page);
149 151
150static int bdev_integrity_enabled(struct block_device *bdev, int rw)
151{
152 struct blk_integrity *bi = bdev_get_integrity(bdev);
153
154 if (bi == NULL)
155 return 0;
156
157 if (rw == READ && bi->verify_fn != NULL &&
158 (bi->flags & INTEGRITY_FLAG_READ))
159 return 1;
160
161 if (rw == WRITE && bi->generate_fn != NULL &&
162 (bi->flags & INTEGRITY_FLAG_WRITE))
163 return 1;
164
165 return 0;
166}
167
168/** 152/**
169 * bio_integrity_enabled - Check whether integrity can be passed 153 * bio_integrity_enabled - Check whether integrity can be passed
170 * @bio: bio to check 154 * @bio: bio to check
@@ -174,171 +158,86 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw)
174 * set prior to calling. The functions honors the write_generate and 158 * set prior to calling. The functions honors the write_generate and
175 * read_verify flags in sysfs. 159 * read_verify flags in sysfs.
176 */ 160 */
177int bio_integrity_enabled(struct bio *bio) 161bool bio_integrity_enabled(struct bio *bio)
178{ 162{
163 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
164
179 if (!bio_is_rw(bio)) 165 if (!bio_is_rw(bio))
180 return 0; 166 return false;
181 167
182 /* Already protected? */ 168 /* Already protected? */
183 if (bio_integrity(bio)) 169 if (bio_integrity(bio))
184 return 0; 170 return false;
171
172 if (bi == NULL)
173 return false;
174
175 if (bio_data_dir(bio) == READ && bi->verify_fn != NULL &&
176 (bi->flags & BLK_INTEGRITY_VERIFY))
177 return true;
178
179 if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL &&
180 (bi->flags & BLK_INTEGRITY_GENERATE))
181 return true;
185 182
186 return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio)); 183 return false;
187} 184}
188EXPORT_SYMBOL(bio_integrity_enabled); 185EXPORT_SYMBOL(bio_integrity_enabled);
189 186
190/** 187/**
191 * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto 188 * bio_integrity_intervals - Return number of integrity intervals for a bio
192 * @bi: blk_integrity profile for device 189 * @bi: blk_integrity profile for device
193 * @sectors: Number of 512 sectors to convert 190 * @sectors: Size of the bio in 512-byte sectors
194 * 191 *
195 * Description: The block layer calculates everything in 512 byte 192 * Description: The block layer calculates everything in 512 byte
196 * sectors but integrity metadata is done in terms of the hardware 193 * sectors but integrity metadata is done in terms of the data integrity
197 * sector size of the storage device. Convert the block layer sectors 194 * interval size of the storage device. Convert the block layer sectors
198 * to physical sectors. 195 * to the appropriate number of integrity intervals.
199 */ 196 */
200static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi, 197static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
201 unsigned int sectors) 198 unsigned int sectors)
202{ 199{
203 /* At this point there are only 512b or 4096b DIF/EPP devices */ 200 return sectors >> (ilog2(bi->interval) - 9);
204 if (bi->sector_size == 4096)
205 return sectors >>= 3;
206
207 return sectors;
208} 201}
209 202
210static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, 203static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
211 unsigned int sectors) 204 unsigned int sectors)
212{ 205{
213 return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size; 206 return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
214} 207}
215 208
216/** 209/**
217 * bio_integrity_tag_size - Retrieve integrity tag space 210 * bio_integrity_process - Process integrity metadata for a bio
218 * @bio: bio to inspect
219 *
220 * Description: Returns the maximum number of tag bytes that can be
221 * attached to this bio. Filesystems can use this to determine how
222 * much metadata to attach to an I/O.
223 */
224unsigned int bio_integrity_tag_size(struct bio *bio)
225{
226 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
227
228 BUG_ON(bio->bi_iter.bi_size == 0);
229
230 return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
231}
232EXPORT_SYMBOL(bio_integrity_tag_size);
233
234static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
235 int set)
236{
237 struct bio_integrity_payload *bip = bio->bi_integrity;
238 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
239 unsigned int nr_sectors;
240
241 BUG_ON(bip->bip_buf == NULL);
242
243 if (bi->tag_size == 0)
244 return -1;
245
246 nr_sectors = bio_integrity_hw_sectors(bi,
247 DIV_ROUND_UP(len, bi->tag_size));
248
249 if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
250 printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
251 nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
252 return -1;
253 }
254
255 if (set)
256 bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
257 else
258 bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
259
260 return 0;
261}
262
263/**
264 * bio_integrity_set_tag - Attach a tag buffer to a bio
265 * @bio: bio to attach buffer to
266 * @tag_buf: Pointer to a buffer containing tag data
267 * @len: Length of the included buffer
268 *
269 * Description: Use this function to tag a bio by leveraging the extra
270 * space provided by devices formatted with integrity protection. The
271 * size of the integrity buffer must be <= to the size reported by
272 * bio_integrity_tag_size().
273 */
274int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
275{
276 BUG_ON(bio_data_dir(bio) != WRITE);
277
278 return bio_integrity_tag(bio, tag_buf, len, 1);
279}
280EXPORT_SYMBOL(bio_integrity_set_tag);
281
282/**
283 * bio_integrity_get_tag - Retrieve a tag buffer from a bio
284 * @bio: bio to retrieve buffer from
285 * @tag_buf: Pointer to a buffer for the tag data
286 * @len: Length of the target buffer
287 *
288 * Description: Use this function to retrieve the tag buffer from a
289 * completed I/O. The size of the integrity buffer must be <= to the
290 * size reported by bio_integrity_tag_size().
291 */
292int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
293{
294 BUG_ON(bio_data_dir(bio) != READ);
295
296 return bio_integrity_tag(bio, tag_buf, len, 0);
297}
298EXPORT_SYMBOL(bio_integrity_get_tag);
299
300/**
301 * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
302 * @bio: bio to generate/verify integrity metadata for 211 * @bio: bio to generate/verify integrity metadata for
303 * @operate: operate number, 1 for generate, 0 for verify 212 * @proc_fn: Pointer to the relevant processing function
304 */ 213 */
305static int bio_integrity_generate_verify(struct bio *bio, int operate) 214static int bio_integrity_process(struct bio *bio,
215 integrity_processing_fn *proc_fn)
306{ 216{
307 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 217 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
308 struct blk_integrity_exchg bix; 218 struct blk_integrity_iter iter;
309 struct bio_vec *bv; 219 struct bio_vec *bv;
310 sector_t sector; 220 struct bio_integrity_payload *bip = bio_integrity(bio);
311 unsigned int sectors, ret = 0, i; 221 unsigned int i, ret = 0;
312 void *prot_buf = bio->bi_integrity->bip_buf; 222 void *prot_buf = page_address(bip->bip_vec->bv_page) +
313 223 bip->bip_vec->bv_offset;
314 if (operate)
315 sector = bio->bi_iter.bi_sector;
316 else
317 sector = bio->bi_integrity->bip_iter.bi_sector;
318 224
319 bix.disk_name = bio->bi_bdev->bd_disk->disk_name; 225 iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
320 bix.sector_size = bi->sector_size; 226 iter.interval = bi->interval;
227 iter.seed = bip_get_seed(bip);
228 iter.prot_buf = prot_buf;
321 229
322 bio_for_each_segment_all(bv, bio, i) { 230 bio_for_each_segment_all(bv, bio, i) {
323 void *kaddr = kmap_atomic(bv->bv_page); 231 void *kaddr = kmap_atomic(bv->bv_page);
324 bix.data_buf = kaddr + bv->bv_offset;
325 bix.data_size = bv->bv_len;
326 bix.prot_buf = prot_buf;
327 bix.sector = sector;
328
329 if (operate)
330 bi->generate_fn(&bix);
331 else {
332 ret = bi->verify_fn(&bix);
333 if (ret) {
334 kunmap_atomic(kaddr);
335 return ret;
336 }
337 }
338 232
339 sectors = bv->bv_len / bi->sector_size; 233 iter.data_buf = kaddr + bv->bv_offset;
340 sector += sectors; 234 iter.data_size = bv->bv_len;
341 prot_buf += sectors * bi->tuple_size; 235
236 ret = proc_fn(&iter);
237 if (ret) {
238 kunmap_atomic(kaddr);
239 return ret;
240 }
342 241
343 kunmap_atomic(kaddr); 242 kunmap_atomic(kaddr);
344 } 243 }
@@ -346,28 +245,6 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
346} 245}
347 246
348/** 247/**
349 * bio_integrity_generate - Generate integrity metadata for a bio
350 * @bio: bio to generate integrity metadata for
351 *
352 * Description: Generates integrity metadata for a bio by calling the
353 * block device's generation callback function. The bio must have a
354 * bip attached with enough room to accommodate the generated
355 * integrity metadata.
356 */
357static void bio_integrity_generate(struct bio *bio)
358{
359 bio_integrity_generate_verify(bio, 1);
360}
361
362static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
363{
364 if (bi)
365 return bi->tuple_size;
366
367 return 0;
368}
369
370/**
371 * bio_integrity_prep - Prepare bio for integrity I/O 248 * bio_integrity_prep - Prepare bio for integrity I/O
372 * @bio: bio to prepare 249 * @bio: bio to prepare
373 * 250 *
@@ -387,17 +264,17 @@ int bio_integrity_prep(struct bio *bio)
387 unsigned long start, end; 264 unsigned long start, end;
388 unsigned int len, nr_pages; 265 unsigned int len, nr_pages;
389 unsigned int bytes, offset, i; 266 unsigned int bytes, offset, i;
390 unsigned int sectors; 267 unsigned int intervals;
391 268
392 bi = bdev_get_integrity(bio->bi_bdev); 269 bi = bdev_get_integrity(bio->bi_bdev);
393 q = bdev_get_queue(bio->bi_bdev); 270 q = bdev_get_queue(bio->bi_bdev);
394 BUG_ON(bi == NULL); 271 BUG_ON(bi == NULL);
395 BUG_ON(bio_integrity(bio)); 272 BUG_ON(bio_integrity(bio));
396 273
397 sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio)); 274 intervals = bio_integrity_intervals(bi, bio_sectors(bio));
398 275
399 /* Allocate kernel buffer for protection data */ 276 /* Allocate kernel buffer for protection data */
400 len = sectors * blk_integrity_tuple_size(bi); 277 len = intervals * bi->tuple_size;
401 buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); 278 buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
402 if (unlikely(buf == NULL)) { 279 if (unlikely(buf == NULL)) {
403 printk(KERN_ERR "could not allocate integrity buffer\n"); 280 printk(KERN_ERR "could not allocate integrity buffer\n");
@@ -416,10 +293,12 @@ int bio_integrity_prep(struct bio *bio)
416 return -EIO; 293 return -EIO;
417 } 294 }
418 295
419 bip->bip_owns_buf = 1; 296 bip->bip_flags |= BIP_BLOCK_INTEGRITY;
420 bip->bip_buf = buf;
421 bip->bip_iter.bi_size = len; 297 bip->bip_iter.bi_size = len;
422 bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; 298 bip_set_seed(bip, bio->bi_iter.bi_sector);
299
300 if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
301 bip->bip_flags |= BIP_IP_CHECKSUM;
423 302
424 /* Map it */ 303 /* Map it */
425 offset = offset_in_page(buf); 304 offset = offset_in_page(buf);
@@ -455,26 +334,13 @@ int bio_integrity_prep(struct bio *bio)
455 334
456 /* Auto-generate integrity metadata if this is a write */ 335 /* Auto-generate integrity metadata if this is a write */
457 if (bio_data_dir(bio) == WRITE) 336 if (bio_data_dir(bio) == WRITE)
458 bio_integrity_generate(bio); 337 bio_integrity_process(bio, bi->generate_fn);
459 338
460 return 0; 339 return 0;
461} 340}
462EXPORT_SYMBOL(bio_integrity_prep); 341EXPORT_SYMBOL(bio_integrity_prep);
463 342
464/** 343/**
465 * bio_integrity_verify - Verify integrity metadata for a bio
466 * @bio: bio to verify
467 *
468 * Description: This function is called to verify the integrity of a
469 * bio. The data in the bio io_vec is compared to the integrity
470 * metadata returned by the HBA.
471 */
472static int bio_integrity_verify(struct bio *bio)
473{
474 return bio_integrity_generate_verify(bio, 0);
475}
476
477/**
478 * bio_integrity_verify_fn - Integrity I/O completion worker 344 * bio_integrity_verify_fn - Integrity I/O completion worker
479 * @work: Work struct stored in bio to be verified 345 * @work: Work struct stored in bio to be verified
480 * 346 *
@@ -487,9 +353,10 @@ static void bio_integrity_verify_fn(struct work_struct *work)
487 struct bio_integrity_payload *bip = 353 struct bio_integrity_payload *bip =
488 container_of(work, struct bio_integrity_payload, bip_work); 354 container_of(work, struct bio_integrity_payload, bip_work);
489 struct bio *bio = bip->bip_bio; 355 struct bio *bio = bip->bip_bio;
356 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
490 int error; 357 int error;
491 358
492 error = bio_integrity_verify(bio); 359 error = bio_integrity_process(bio, bi->verify_fn);
493 360
494 /* Restore original bio completion handler */ 361 /* Restore original bio completion handler */
495 bio->bi_end_io = bip->bip_end_io; 362 bio->bi_end_io = bip->bip_end_io;
@@ -510,7 +377,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
510 */ 377 */
511void bio_integrity_endio(struct bio *bio, int error) 378void bio_integrity_endio(struct bio *bio, int error)
512{ 379{
513 struct bio_integrity_payload *bip = bio->bi_integrity; 380 struct bio_integrity_payload *bip = bio_integrity(bio);
514 381
515 BUG_ON(bip->bip_bio != bio); 382 BUG_ON(bip->bip_bio != bio);
516 383
@@ -541,7 +408,7 @@ EXPORT_SYMBOL(bio_integrity_endio);
541 */ 408 */
542void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) 409void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
543{ 410{
544 struct bio_integrity_payload *bip = bio->bi_integrity; 411 struct bio_integrity_payload *bip = bio_integrity(bio);
545 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 412 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
546 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); 413 unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
547 414
@@ -563,7 +430,7 @@ EXPORT_SYMBOL(bio_integrity_advance);
563void bio_integrity_trim(struct bio *bio, unsigned int offset, 430void bio_integrity_trim(struct bio *bio, unsigned int offset,
564 unsigned int sectors) 431 unsigned int sectors)
565{ 432{
566 struct bio_integrity_payload *bip = bio->bi_integrity; 433 struct bio_integrity_payload *bip = bio_integrity(bio);
567 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 434 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
568 435
569 bio_integrity_advance(bio, offset << 9); 436 bio_integrity_advance(bio, offset << 9);
@@ -582,7 +449,7 @@ EXPORT_SYMBOL(bio_integrity_trim);
582int bio_integrity_clone(struct bio *bio, struct bio *bio_src, 449int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
583 gfp_t gfp_mask) 450 gfp_t gfp_mask)
584{ 451{
585 struct bio_integrity_payload *bip_src = bio_src->bi_integrity; 452 struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
586 struct bio_integrity_payload *bip; 453 struct bio_integrity_payload *bip;
587 454
588 BUG_ON(bip_src == NULL); 455 BUG_ON(bip_src == NULL);
@@ -646,6 +513,4 @@ void __init bio_integrity_init(void)
646 sizeof(struct bio_integrity_payload) + 513 sizeof(struct bio_integrity_payload) +
647 sizeof(struct bio_vec) * BIP_INLINE_VECS, 514 sizeof(struct bio_vec) * BIP_INLINE_VECS,
648 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 515 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
649 if (!bip_slab)
650 panic("Failed to create slab\n");
651} 516}
diff --git a/block/bio.c b/block/bio.c
index 3e6331d25d90..3e6e1986a5b2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -428,6 +428,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
428 front_pad = 0; 428 front_pad = 0;
429 inline_vecs = nr_iovecs; 429 inline_vecs = nr_iovecs;
430 } else { 430 } else {
431 /* should not use nobvec bioset for nr_iovecs > 0 */
432 if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
433 return NULL;
431 /* 434 /*
432 * generic_make_request() converts recursion to iteration; this 435 * generic_make_request() converts recursion to iteration; this
433 * means if we're running beneath it, any bios we allocate and 436 * means if we're running beneath it, any bios we allocate and
@@ -1900,20 +1903,9 @@ void bioset_free(struct bio_set *bs)
1900} 1903}
1901EXPORT_SYMBOL(bioset_free); 1904EXPORT_SYMBOL(bioset_free);
1902 1905
1903/** 1906static struct bio_set *__bioset_create(unsigned int pool_size,
1904 * bioset_create - Create a bio_set 1907 unsigned int front_pad,
1905 * @pool_size: Number of bio and bio_vecs to cache in the mempool 1908 bool create_bvec_pool)
1906 * @front_pad: Number of bytes to allocate in front of the returned bio
1907 *
1908 * Description:
1909 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1910 * to ask for a number of bytes to be allocated in front of the bio.
1911 * Front pad allocation is useful for embedding the bio inside
1912 * another structure, to avoid allocating extra data to go with the bio.
1913 * Note that the bio must be embedded at the END of that structure always,
1914 * or things will break badly.
1915 */
1916struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1917{ 1909{
1918 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); 1910 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1919 struct bio_set *bs; 1911 struct bio_set *bs;
@@ -1938,9 +1930,11 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1938 if (!bs->bio_pool) 1930 if (!bs->bio_pool)
1939 goto bad; 1931 goto bad;
1940 1932
1941 bs->bvec_pool = biovec_create_pool(pool_size); 1933 if (create_bvec_pool) {
1942 if (!bs->bvec_pool) 1934 bs->bvec_pool = biovec_create_pool(pool_size);
1943 goto bad; 1935 if (!bs->bvec_pool)
1936 goto bad;
1937 }
1944 1938
1945 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); 1939 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
1946 if (!bs->rescue_workqueue) 1940 if (!bs->rescue_workqueue)
@@ -1951,8 +1945,41 @@ bad:
1951 bioset_free(bs); 1945 bioset_free(bs);
1952 return NULL; 1946 return NULL;
1953} 1947}
1948
1949/**
1950 * bioset_create - Create a bio_set
1951 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1952 * @front_pad: Number of bytes to allocate in front of the returned bio
1953 *
1954 * Description:
1955 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1956 * to ask for a number of bytes to be allocated in front of the bio.
1957 * Front pad allocation is useful for embedding the bio inside
1958 * another structure, to avoid allocating extra data to go with the bio.
1959 * Note that the bio must be embedded at the END of that structure always,
1960 * or things will break badly.
1961 */
1962struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1963{
1964 return __bioset_create(pool_size, front_pad, true);
1965}
1954EXPORT_SYMBOL(bioset_create); 1966EXPORT_SYMBOL(bioset_create);
1955 1967
1968/**
1969 * bioset_create_nobvec - Create a bio_set without bio_vec mempool
1970 * @pool_size: Number of bio to cache in the mempool
1971 * @front_pad: Number of bytes to allocate in front of the returned bio
1972 *
1973 * Description:
1974 * Same functionality as bioset_create() except that mempool is not
1975 * created for bio_vecs. Saving some memory for bio_clone_fast() users.
1976 */
1977struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
1978{
1979 return __bioset_create(pool_size, front_pad, false);
1980}
1981EXPORT_SYMBOL(bioset_create_nobvec);
1982
1956#ifdef CONFIG_BLK_CGROUP 1983#ifdef CONFIG_BLK_CGROUP
1957/** 1984/**
1958 * bio_associate_current - associate a bio with %current 1985 * bio_associate_current - associate a bio with %current
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e17da947f6bd..0ac817b750db 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -822,7 +822,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
822static struct cgroup_subsys_state * 822static struct cgroup_subsys_state *
823blkcg_css_alloc(struct cgroup_subsys_state *parent_css) 823blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
824{ 824{
825 static atomic64_t id_seq = ATOMIC64_INIT(0);
826 struct blkcg *blkcg; 825 struct blkcg *blkcg;
827 826
828 if (!parent_css) { 827 if (!parent_css) {
@@ -836,7 +835,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
836 835
837 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; 836 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
838 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; 837 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
839 blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
840done: 838done:
841 spin_lock_init(&blkcg->lock); 839 spin_lock_init(&blkcg->lock);
842 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); 840 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index d3fd7aa3d2a3..c567865b5f1d 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -50,9 +50,6 @@ struct blkcg {
50 struct blkcg_gq *blkg_hint; 50 struct blkcg_gq *blkg_hint;
51 struct hlist_head blkg_list; 51 struct hlist_head blkg_list;
52 52
53 /* for policies to test whether associated blkcg has changed */
54 uint64_t id;
55
56 /* TODO: per-policy storage in blkcg */ 53 /* TODO: per-policy storage in blkcg */
57 unsigned int cfq_weight; /* belongs to cfq */ 54 unsigned int cfq_weight; /* belongs to cfq */
58 unsigned int cfq_leaf_weight; 55 unsigned int cfq_leaf_weight;
diff --git a/block/blk-core.c b/block/blk-core.c
index 9c888bd22b00..0421b53e6431 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q)
83 * @bdev: device 83 * @bdev: device
84 * 84 *
85 * Locates the passed device's request queue and returns the address of its 85 * Locates the passed device's request queue and returns the address of its
86 * backing_dev_info 86 * backing_dev_info. This function can only be called if @bdev is opened
87 * 87 * and the return value is never NULL.
88 * Will return NULL if the request queue cannot be located.
89 */ 88 */
90struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) 89struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
91{ 90{
92 struct backing_dev_info *ret = NULL;
93 struct request_queue *q = bdev_get_queue(bdev); 91 struct request_queue *q = bdev_get_queue(bdev);
94 92
95 if (q) 93 return &q->backing_dev_info;
96 ret = &q->backing_dev_info;
97 return ret;
98} 94}
99EXPORT_SYMBOL(blk_get_backing_dev_info); 95EXPORT_SYMBOL(blk_get_backing_dev_info);
100 96
@@ -394,11 +390,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
394 * be drained. Check all the queues and counters. 390 * be drained. Check all the queues and counters.
395 */ 391 */
396 if (drain_all) { 392 if (drain_all) {
393 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
397 drain |= !list_empty(&q->queue_head); 394 drain |= !list_empty(&q->queue_head);
398 for (i = 0; i < 2; i++) { 395 for (i = 0; i < 2; i++) {
399 drain |= q->nr_rqs[i]; 396 drain |= q->nr_rqs[i];
400 drain |= q->in_flight[i]; 397 drain |= q->in_flight[i];
401 drain |= !list_empty(&q->flush_queue[i]); 398 if (fq)
399 drain |= !list_empty(&fq->flush_queue[i]);
402 } 400 }
403 } 401 }
404 402
@@ -604,9 +602,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
604#ifdef CONFIG_BLK_CGROUP 602#ifdef CONFIG_BLK_CGROUP
605 INIT_LIST_HEAD(&q->blkg_list); 603 INIT_LIST_HEAD(&q->blkg_list);
606#endif 604#endif
607 INIT_LIST_HEAD(&q->flush_queue[0]);
608 INIT_LIST_HEAD(&q->flush_queue[1]);
609 INIT_LIST_HEAD(&q->flush_data_in_flight);
610 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); 605 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
611 606
612 kobject_init(&q->kobj, &blk_queue_ktype); 607 kobject_init(&q->kobj, &blk_queue_ktype);
@@ -709,8 +704,8 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
709 if (!q) 704 if (!q)
710 return NULL; 705 return NULL;
711 706
712 q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); 707 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
713 if (!q->flush_rq) 708 if (!q->fq)
714 return NULL; 709 return NULL;
715 710
716 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) 711 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
@@ -746,7 +741,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
746 return q; 741 return q;
747 742
748fail: 743fail:
749 kfree(q->flush_rq); 744 blk_free_flush_queue(q->fq);
750 return NULL; 745 return NULL;
751} 746}
752EXPORT_SYMBOL(blk_init_allocated_queue); 747EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -934,8 +929,8 @@ static struct io_context *rq_ioc(struct bio *bio)
934 * pressure or if @q is dead. 929 * pressure or if @q is dead.
935 * 930 *
936 * Must be called with @q->queue_lock held and, 931 * Must be called with @q->queue_lock held and,
937 * Returns %NULL on failure, with @q->queue_lock held. 932 * Returns ERR_PTR on failure, with @q->queue_lock held.
938 * Returns !%NULL on success, with @q->queue_lock *not held*. 933 * Returns request pointer on success, with @q->queue_lock *not held*.
939 */ 934 */
940static struct request *__get_request(struct request_list *rl, int rw_flags, 935static struct request *__get_request(struct request_list *rl, int rw_flags,
941 struct bio *bio, gfp_t gfp_mask) 936 struct bio *bio, gfp_t gfp_mask)
@@ -949,7 +944,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
949 int may_queue; 944 int may_queue;
950 945
951 if (unlikely(blk_queue_dying(q))) 946 if (unlikely(blk_queue_dying(q)))
952 return NULL; 947 return ERR_PTR(-ENODEV);
953 948
954 may_queue = elv_may_queue(q, rw_flags); 949 may_queue = elv_may_queue(q, rw_flags);
955 if (may_queue == ELV_MQUEUE_NO) 950 if (may_queue == ELV_MQUEUE_NO)
@@ -974,7 +969,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
974 * process is not a "batcher", and not 969 * process is not a "batcher", and not
975 * exempted by the IO scheduler 970 * exempted by the IO scheduler
976 */ 971 */
977 return NULL; 972 return ERR_PTR(-ENOMEM);
978 } 973 }
979 } 974 }
980 } 975 }
@@ -992,7 +987,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
992 * allocated with any setting of ->nr_requests 987 * allocated with any setting of ->nr_requests
993 */ 988 */
994 if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) 989 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
995 return NULL; 990 return ERR_PTR(-ENOMEM);
996 991
997 q->nr_rqs[is_sync]++; 992 q->nr_rqs[is_sync]++;
998 rl->count[is_sync]++; 993 rl->count[is_sync]++;
@@ -1065,8 +1060,8 @@ fail_elvpriv:
1065 * shouldn't stall IO. Treat this request as !elvpriv. This will 1060 * shouldn't stall IO. Treat this request as !elvpriv. This will
1066 * disturb iosched and blkcg but weird is bettern than dead. 1061 * disturb iosched and blkcg but weird is bettern than dead.
1067 */ 1062 */
1068 printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n", 1063 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
1069 dev_name(q->backing_dev_info.dev)); 1064 __func__, dev_name(q->backing_dev_info.dev));
1070 1065
1071 rq->cmd_flags &= ~REQ_ELVPRIV; 1066 rq->cmd_flags &= ~REQ_ELVPRIV;
1072 rq->elv.icq = NULL; 1067 rq->elv.icq = NULL;
@@ -1097,7 +1092,7 @@ fail_alloc:
1097rq_starved: 1092rq_starved:
1098 if (unlikely(rl->count[is_sync] == 0)) 1093 if (unlikely(rl->count[is_sync] == 0))
1099 rl->starved[is_sync] = 1; 1094 rl->starved[is_sync] = 1;
1100 return NULL; 1095 return ERR_PTR(-ENOMEM);
1101} 1096}
1102 1097
1103/** 1098/**
@@ -1111,8 +1106,8 @@ rq_starved:
1111 * function keeps retrying under memory pressure and fails iff @q is dead. 1106 * function keeps retrying under memory pressure and fails iff @q is dead.
1112 * 1107 *
1113 * Must be called with @q->queue_lock held and, 1108 * Must be called with @q->queue_lock held and,
1114 * Returns %NULL on failure, with @q->queue_lock held. 1109 * Returns ERR_PTR on failure, with @q->queue_lock held.
1115 * Returns !%NULL on success, with @q->queue_lock *not held*. 1110 * Returns request pointer on success, with @q->queue_lock *not held*.
1116 */ 1111 */
1117static struct request *get_request(struct request_queue *q, int rw_flags, 1112static struct request *get_request(struct request_queue *q, int rw_flags,
1118 struct bio *bio, gfp_t gfp_mask) 1113 struct bio *bio, gfp_t gfp_mask)
@@ -1125,12 +1120,12 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
1125 rl = blk_get_rl(q, bio); /* transferred to @rq on success */ 1120 rl = blk_get_rl(q, bio); /* transferred to @rq on success */
1126retry: 1121retry:
1127 rq = __get_request(rl, rw_flags, bio, gfp_mask); 1122 rq = __get_request(rl, rw_flags, bio, gfp_mask);
1128 if (rq) 1123 if (!IS_ERR(rq))
1129 return rq; 1124 return rq;
1130 1125
1131 if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { 1126 if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
1132 blk_put_rl(rl); 1127 blk_put_rl(rl);
1133 return NULL; 1128 return rq;
1134 } 1129 }
1135 1130
1136 /* wait on @rl and retry */ 1131 /* wait on @rl and retry */
@@ -1167,7 +1162,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1167 1162
1168 spin_lock_irq(q->queue_lock); 1163 spin_lock_irq(q->queue_lock);
1169 rq = get_request(q, rw, NULL, gfp_mask); 1164 rq = get_request(q, rw, NULL, gfp_mask);
1170 if (!rq) 1165 if (IS_ERR(rq))
1171 spin_unlock_irq(q->queue_lock); 1166 spin_unlock_irq(q->queue_lock);
1172 /* q->queue_lock is unlocked at this point */ 1167 /* q->queue_lock is unlocked at this point */
1173 1168
@@ -1219,8 +1214,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
1219{ 1214{
1220 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); 1215 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
1221 1216
1222 if (unlikely(!rq)) 1217 if (IS_ERR(rq))
1223 return ERR_PTR(-ENOMEM); 1218 return rq;
1224 1219
1225 blk_rq_set_block_pc(rq); 1220 blk_rq_set_block_pc(rq);
1226 1221
@@ -1614,8 +1609,8 @@ get_rq:
1614 * Returns with the queue unlocked. 1609 * Returns with the queue unlocked.
1615 */ 1610 */
1616 req = get_request(q, rw_flags, bio, GFP_NOIO); 1611 req = get_request(q, rw_flags, bio, GFP_NOIO);
1617 if (unlikely(!req)) { 1612 if (IS_ERR(req)) {
1618 bio_endio(bio, -ENODEV); /* @q is dead */ 1613 bio_endio(bio, PTR_ERR(req)); /* @q is dead */
1619 goto out_unlock; 1614 goto out_unlock;
1620 } 1615 }
1621 1616
@@ -2405,11 +2400,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2405{ 2400{
2406 int total_bytes; 2401 int total_bytes;
2407 2402
2403 trace_block_rq_complete(req->q, req, nr_bytes);
2404
2408 if (!req->bio) 2405 if (!req->bio)
2409 return false; 2406 return false;
2410 2407
2411 trace_block_rq_complete(req->q, req, nr_bytes);
2412
2413 /* 2408 /*
2414 * For fs requests, rq is just carrier of independent bio's 2409 * For fs requests, rq is just carrier of independent bio's
2415 * and each partial completion should be handled separately. 2410 * and each partial completion should be handled separately.
@@ -2449,8 +2444,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2449 error_type = "I/O"; 2444 error_type = "I/O";
2450 break; 2445 break;
2451 } 2446 }
2452 printk_ratelimited(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", 2447 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
2453 error_type, req->rq_disk ? 2448 __func__, error_type, req->rq_disk ?
2454 req->rq_disk->disk_name : "?", 2449 req->rq_disk->disk_name : "?",
2455 (unsigned long long)blk_rq_pos(req)); 2450 (unsigned long long)blk_rq_pos(req));
2456 2451
@@ -2931,7 +2926,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2931 blk_rq_init(NULL, rq); 2926 blk_rq_init(NULL, rq);
2932 2927
2933 __rq_for_each_bio(bio_src, rq_src) { 2928 __rq_for_each_bio(bio_src, rq_src) {
2934 bio = bio_clone_bioset(bio_src, gfp_mask, bs); 2929 bio = bio_clone_fast(bio_src, gfp_mask, bs);
2935 if (!bio) 2930 if (!bio)
2936 goto free_and_out; 2931 goto free_and_out;
2937 2932
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3cb5e9e7108a..20badd7b9d1b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -28,7 +28,7 @@
28 * 28 *
29 * The actual execution of flush is double buffered. Whenever a request 29 * The actual execution of flush is double buffered. Whenever a request
30 * needs to execute PRE or POSTFLUSH, it queues at 30 * needs to execute PRE or POSTFLUSH, it queues at
31 * q->flush_queue[q->flush_pending_idx]. Once certain criteria are met, a 31 * fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a
32 * flush is issued and the pending_idx is toggled. When the flush 32 * flush is issued and the pending_idx is toggled. When the flush
33 * completes, all the requests which were pending are proceeded to the next 33 * completes, all the requests which were pending are proceeded to the next
34 * step. This allows arbitrary merging of different types of FLUSH/FUA 34 * step. This allows arbitrary merging of different types of FLUSH/FUA
@@ -91,7 +91,8 @@ enum {
91 FLUSH_PENDING_TIMEOUT = 5 * HZ, 91 FLUSH_PENDING_TIMEOUT = 5 * HZ,
92}; 92};
93 93
94static bool blk_kick_flush(struct request_queue *q); 94static bool blk_kick_flush(struct request_queue *q,
95 struct blk_flush_queue *fq);
95 96
96static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) 97static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
97{ 98{
@@ -126,8 +127,6 @@ static void blk_flush_restore_request(struct request *rq)
126 /* make @rq a normal request */ 127 /* make @rq a normal request */
127 rq->cmd_flags &= ~REQ_FLUSH_SEQ; 128 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
128 rq->end_io = rq->flush.saved_end_io; 129 rq->end_io = rq->flush.saved_end_io;
129
130 blk_clear_rq_complete(rq);
131} 130}
132 131
133static bool blk_flush_queue_rq(struct request *rq, bool add_front) 132static bool blk_flush_queue_rq(struct request *rq, bool add_front)
@@ -150,6 +149,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
150/** 149/**
151 * blk_flush_complete_seq - complete flush sequence 150 * blk_flush_complete_seq - complete flush sequence
152 * @rq: FLUSH/FUA request being sequenced 151 * @rq: FLUSH/FUA request being sequenced
152 * @fq: flush queue
153 * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero) 153 * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
154 * @error: whether an error occurred 154 * @error: whether an error occurred
155 * 155 *
@@ -157,16 +157,17 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
157 * completion and trigger the next step. 157 * completion and trigger the next step.
158 * 158 *
159 * CONTEXT: 159 * CONTEXT:
160 * spin_lock_irq(q->queue_lock or q->mq_flush_lock) 160 * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
161 * 161 *
162 * RETURNS: 162 * RETURNS:
163 * %true if requests were added to the dispatch queue, %false otherwise. 163 * %true if requests were added to the dispatch queue, %false otherwise.
164 */ 164 */
165static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, 165static bool blk_flush_complete_seq(struct request *rq,
166 int error) 166 struct blk_flush_queue *fq,
167 unsigned int seq, int error)
167{ 168{
168 struct request_queue *q = rq->q; 169 struct request_queue *q = rq->q;
169 struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; 170 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
170 bool queued = false, kicked; 171 bool queued = false, kicked;
171 172
172 BUG_ON(rq->flush.seq & seq); 173 BUG_ON(rq->flush.seq & seq);
@@ -182,12 +183,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
182 case REQ_FSEQ_POSTFLUSH: 183 case REQ_FSEQ_POSTFLUSH:
183 /* queue for flush */ 184 /* queue for flush */
184 if (list_empty(pending)) 185 if (list_empty(pending))
185 q->flush_pending_since = jiffies; 186 fq->flush_pending_since = jiffies;
186 list_move_tail(&rq->flush.list, pending); 187 list_move_tail(&rq->flush.list, pending);
187 break; 188 break;
188 189
189 case REQ_FSEQ_DATA: 190 case REQ_FSEQ_DATA:
190 list_move_tail(&rq->flush.list, &q->flush_data_in_flight); 191 list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
191 queued = blk_flush_queue_rq(rq, true); 192 queued = blk_flush_queue_rq(rq, true);
192 break; 193 break;
193 194
@@ -202,7 +203,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
202 list_del_init(&rq->flush.list); 203 list_del_init(&rq->flush.list);
203 blk_flush_restore_request(rq); 204 blk_flush_restore_request(rq);
204 if (q->mq_ops) 205 if (q->mq_ops)
205 blk_mq_end_io(rq, error); 206 blk_mq_end_request(rq, error);
206 else 207 else
207 __blk_end_request_all(rq, error); 208 __blk_end_request_all(rq, error);
208 break; 209 break;
@@ -211,7 +212,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
211 BUG(); 212 BUG();
212 } 213 }
213 214
214 kicked = blk_kick_flush(q); 215 kicked = blk_kick_flush(q, fq);
215 return kicked | queued; 216 return kicked | queued;
216} 217}
217 218
@@ -222,17 +223,18 @@ static void flush_end_io(struct request *flush_rq, int error)
222 bool queued = false; 223 bool queued = false;
223 struct request *rq, *n; 224 struct request *rq, *n;
224 unsigned long flags = 0; 225 unsigned long flags = 0;
226 struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
225 227
226 if (q->mq_ops) { 228 if (q->mq_ops) {
227 spin_lock_irqsave(&q->mq_flush_lock, flags); 229 spin_lock_irqsave(&fq->mq_flush_lock, flags);
228 q->flush_rq->tag = -1; 230 flush_rq->tag = -1;
229 } 231 }
230 232
231 running = &q->flush_queue[q->flush_running_idx]; 233 running = &fq->flush_queue[fq->flush_running_idx];
232 BUG_ON(q->flush_pending_idx == q->flush_running_idx); 234 BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
233 235
234 /* account completion of the flush request */ 236 /* account completion of the flush request */
235 q->flush_running_idx ^= 1; 237 fq->flush_running_idx ^= 1;
236 238
237 if (!q->mq_ops) 239 if (!q->mq_ops)
238 elv_completed_request(q, flush_rq); 240 elv_completed_request(q, flush_rq);
@@ -242,7 +244,7 @@ static void flush_end_io(struct request *flush_rq, int error)
242 unsigned int seq = blk_flush_cur_seq(rq); 244 unsigned int seq = blk_flush_cur_seq(rq);
243 245
244 BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); 246 BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
245 queued |= blk_flush_complete_seq(rq, seq, error); 247 queued |= blk_flush_complete_seq(rq, fq, seq, error);
246 } 248 }
247 249
248 /* 250 /*
@@ -256,71 +258,81 @@ static void flush_end_io(struct request *flush_rq, int error)
256 * directly into request_fn may confuse the driver. Always use 258 * directly into request_fn may confuse the driver. Always use
257 * kblockd. 259 * kblockd.
258 */ 260 */
259 if (queued || q->flush_queue_delayed) { 261 if (queued || fq->flush_queue_delayed) {
260 WARN_ON(q->mq_ops); 262 WARN_ON(q->mq_ops);
261 blk_run_queue_async(q); 263 blk_run_queue_async(q);
262 } 264 }
263 q->flush_queue_delayed = 0; 265 fq->flush_queue_delayed = 0;
264 if (q->mq_ops) 266 if (q->mq_ops)
265 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 267 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
266} 268}
267 269
268/** 270/**
269 * blk_kick_flush - consider issuing flush request 271 * blk_kick_flush - consider issuing flush request
270 * @q: request_queue being kicked 272 * @q: request_queue being kicked
273 * @fq: flush queue
271 * 274 *
272 * Flush related states of @q have changed, consider issuing flush request. 275 * Flush related states of @q have changed, consider issuing flush request.
273 * Please read the comment at the top of this file for more info. 276 * Please read the comment at the top of this file for more info.
274 * 277 *
275 * CONTEXT: 278 * CONTEXT:
276 * spin_lock_irq(q->queue_lock or q->mq_flush_lock) 279 * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
277 * 280 *
278 * RETURNS: 281 * RETURNS:
279 * %true if flush was issued, %false otherwise. 282 * %true if flush was issued, %false otherwise.
280 */ 283 */
281static bool blk_kick_flush(struct request_queue *q) 284static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
282{ 285{
283 struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; 286 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
284 struct request *first_rq = 287 struct request *first_rq =
285 list_first_entry(pending, struct request, flush.list); 288 list_first_entry(pending, struct request, flush.list);
289 struct request *flush_rq = fq->flush_rq;
286 290
287 /* C1 described at the top of this file */ 291 /* C1 described at the top of this file */
288 if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending)) 292 if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
289 return false; 293 return false;
290 294
291 /* C2 and C3 */ 295 /* C2 and C3 */
292 if (!list_empty(&q->flush_data_in_flight) && 296 if (!list_empty(&fq->flush_data_in_flight) &&
293 time_before(jiffies, 297 time_before(jiffies,
294 q->flush_pending_since + FLUSH_PENDING_TIMEOUT)) 298 fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
295 return false; 299 return false;
296 300
297 /* 301 /*
298 * Issue flush and toggle pending_idx. This makes pending_idx 302 * Issue flush and toggle pending_idx. This makes pending_idx
299 * different from running_idx, which means flush is in flight. 303 * different from running_idx, which means flush is in flight.
300 */ 304 */
301 q->flush_pending_idx ^= 1; 305 fq->flush_pending_idx ^= 1;
302 306
303 blk_rq_init(q, q->flush_rq); 307 blk_rq_init(q, flush_rq);
304 if (q->mq_ops) 308
305 blk_mq_clone_flush_request(q->flush_rq, first_rq); 309 /*
310 * Borrow tag from the first request since they can't
311 * be in flight at the same time.
312 */
313 if (q->mq_ops) {
314 flush_rq->mq_ctx = first_rq->mq_ctx;
315 flush_rq->tag = first_rq->tag;
316 }
306 317
307 q->flush_rq->cmd_type = REQ_TYPE_FS; 318 flush_rq->cmd_type = REQ_TYPE_FS;
308 q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 319 flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
309 q->flush_rq->rq_disk = first_rq->rq_disk; 320 flush_rq->rq_disk = first_rq->rq_disk;
310 q->flush_rq->end_io = flush_end_io; 321 flush_rq->end_io = flush_end_io;
311 322
312 return blk_flush_queue_rq(q->flush_rq, false); 323 return blk_flush_queue_rq(flush_rq, false);
313} 324}
314 325
315static void flush_data_end_io(struct request *rq, int error) 326static void flush_data_end_io(struct request *rq, int error)
316{ 327{
317 struct request_queue *q = rq->q; 328 struct request_queue *q = rq->q;
329 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
318 330
319 /* 331 /*
320 * After populating an empty queue, kick it to avoid stall. Read 332 * After populating an empty queue, kick it to avoid stall. Read
321 * the comment in flush_end_io(). 333 * the comment in flush_end_io().
322 */ 334 */
323 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) 335 if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
324 blk_run_queue_async(q); 336 blk_run_queue_async(q);
325} 337}
326 338
@@ -328,20 +340,20 @@ static void mq_flush_data_end_io(struct request *rq, int error)
328{ 340{
329 struct request_queue *q = rq->q; 341 struct request_queue *q = rq->q;
330 struct blk_mq_hw_ctx *hctx; 342 struct blk_mq_hw_ctx *hctx;
331 struct blk_mq_ctx *ctx; 343 struct blk_mq_ctx *ctx = rq->mq_ctx;
332 unsigned long flags; 344 unsigned long flags;
345 struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
333 346
334 ctx = rq->mq_ctx;
335 hctx = q->mq_ops->map_queue(q, ctx->cpu); 347 hctx = q->mq_ops->map_queue(q, ctx->cpu);
336 348
337 /* 349 /*
338 * After populating an empty queue, kick it to avoid stall. Read 350 * After populating an empty queue, kick it to avoid stall. Read
339 * the comment in flush_end_io(). 351 * the comment in flush_end_io().
340 */ 352 */
341 spin_lock_irqsave(&q->mq_flush_lock, flags); 353 spin_lock_irqsave(&fq->mq_flush_lock, flags);
342 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) 354 if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
343 blk_mq_run_hw_queue(hctx, true); 355 blk_mq_run_hw_queue(hctx, true);
344 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 356 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
345} 357}
346 358
347/** 359/**
@@ -361,6 +373,7 @@ void blk_insert_flush(struct request *rq)
361 struct request_queue *q = rq->q; 373 struct request_queue *q = rq->q;
362 unsigned int fflags = q->flush_flags; /* may change, cache */ 374 unsigned int fflags = q->flush_flags; /* may change, cache */
363 unsigned int policy = blk_flush_policy(fflags, rq); 375 unsigned int policy = blk_flush_policy(fflags, rq);
376 struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
364 377
365 /* 378 /*
366 * @policy now records what operations need to be done. Adjust 379 * @policy now records what operations need to be done. Adjust
@@ -378,7 +391,7 @@ void blk_insert_flush(struct request *rq)
378 */ 391 */
379 if (!policy) { 392 if (!policy) {
380 if (q->mq_ops) 393 if (q->mq_ops)
381 blk_mq_end_io(rq, 0); 394 blk_mq_end_request(rq, 0);
382 else 395 else
383 __blk_end_bidi_request(rq, 0, 0, 0); 396 __blk_end_bidi_request(rq, 0, 0, 0);
384 return; 397 return;
@@ -411,14 +424,14 @@ void blk_insert_flush(struct request *rq)
411 if (q->mq_ops) { 424 if (q->mq_ops) {
412 rq->end_io = mq_flush_data_end_io; 425 rq->end_io = mq_flush_data_end_io;
413 426
414 spin_lock_irq(&q->mq_flush_lock); 427 spin_lock_irq(&fq->mq_flush_lock);
415 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 428 blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
416 spin_unlock_irq(&q->mq_flush_lock); 429 spin_unlock_irq(&fq->mq_flush_lock);
417 return; 430 return;
418 } 431 }
419 rq->end_io = flush_data_end_io; 432 rq->end_io = flush_data_end_io;
420 433
421 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 434 blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
422} 435}
423 436
424/** 437/**
@@ -474,7 +487,43 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
474} 487}
475EXPORT_SYMBOL(blkdev_issue_flush); 488EXPORT_SYMBOL(blkdev_issue_flush);
476 489
477void blk_mq_init_flush(struct request_queue *q) 490struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
491 int node, int cmd_size)
492{
493 struct blk_flush_queue *fq;
494 int rq_sz = sizeof(struct request);
495
496 fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
497 if (!fq)
498 goto fail;
499
500 if (q->mq_ops) {
501 spin_lock_init(&fq->mq_flush_lock);
502 rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
503 }
504
505 fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
506 if (!fq->flush_rq)
507 goto fail_rq;
508
509 INIT_LIST_HEAD(&fq->flush_queue[0]);
510 INIT_LIST_HEAD(&fq->flush_queue[1]);
511 INIT_LIST_HEAD(&fq->flush_data_in_flight);
512
513 return fq;
514
515 fail_rq:
516 kfree(fq);
517 fail:
518 return NULL;
519}
520
521void blk_free_flush_queue(struct blk_flush_queue *fq)
478{ 522{
479 spin_lock_init(&q->mq_flush_lock); 523 /* bio based request queue hasn't flush queue */
524 if (!fq)
525 return;
526
527 kfree(fq->flush_rq);
528 kfree(fq);
480} 529}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 7fbab84399e6..79ffb4855af0 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -154,10 +154,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
154 if (!b1 || !b2) 154 if (!b1 || !b2)
155 return -1; 155 return -1;
156 156
157 if (b1->sector_size != b2->sector_size) { 157 if (b1->interval != b2->interval) {
158 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, 158 pr_err("%s: %s/%s protection interval %u != %u\n",
159 gd1->disk_name, gd2->disk_name, 159 __func__, gd1->disk_name, gd2->disk_name,
160 b1->sector_size, b2->sector_size); 160 b1->interval, b2->interval);
161 return -1; 161 return -1;
162 } 162 }
163 163
@@ -186,37 +186,53 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
186} 186}
187EXPORT_SYMBOL(blk_integrity_compare); 187EXPORT_SYMBOL(blk_integrity_compare);
188 188
189int blk_integrity_merge_rq(struct request_queue *q, struct request *req, 189bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
190 struct request *next) 190 struct request *next)
191{ 191{
192 if (blk_integrity_rq(req) != blk_integrity_rq(next)) 192 if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0)
193 return -1; 193 return true;
194
195 if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0)
196 return false;
197
198 if (bio_integrity(req->bio)->bip_flags !=
199 bio_integrity(next->bio)->bip_flags)
200 return false;
194 201
195 if (req->nr_integrity_segments + next->nr_integrity_segments > 202 if (req->nr_integrity_segments + next->nr_integrity_segments >
196 q->limits.max_integrity_segments) 203 q->limits.max_integrity_segments)
197 return -1; 204 return false;
198 205
199 return 0; 206 return true;
200} 207}
201EXPORT_SYMBOL(blk_integrity_merge_rq); 208EXPORT_SYMBOL(blk_integrity_merge_rq);
202 209
203int blk_integrity_merge_bio(struct request_queue *q, struct request *req, 210bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
204 struct bio *bio) 211 struct bio *bio)
205{ 212{
206 int nr_integrity_segs; 213 int nr_integrity_segs;
207 struct bio *next = bio->bi_next; 214 struct bio *next = bio->bi_next;
208 215
216 if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL)
217 return true;
218
219 if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL)
220 return false;
221
222 if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags)
223 return false;
224
209 bio->bi_next = NULL; 225 bio->bi_next = NULL;
210 nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); 226 nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
211 bio->bi_next = next; 227 bio->bi_next = next;
212 228
213 if (req->nr_integrity_segments + nr_integrity_segs > 229 if (req->nr_integrity_segments + nr_integrity_segs >
214 q->limits.max_integrity_segments) 230 q->limits.max_integrity_segments)
215 return -1; 231 return false;
216 232
217 req->nr_integrity_segments += nr_integrity_segs; 233 req->nr_integrity_segments += nr_integrity_segs;
218 234
219 return 0; 235 return true;
220} 236}
221EXPORT_SYMBOL(blk_integrity_merge_bio); 237EXPORT_SYMBOL(blk_integrity_merge_bio);
222 238
@@ -269,42 +285,48 @@ static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
269 return sprintf(page, "0\n"); 285 return sprintf(page, "0\n");
270} 286}
271 287
272static ssize_t integrity_read_store(struct blk_integrity *bi, 288static ssize_t integrity_verify_store(struct blk_integrity *bi,
273 const char *page, size_t count) 289 const char *page, size_t count)
274{ 290{
275 char *p = (char *) page; 291 char *p = (char *) page;
276 unsigned long val = simple_strtoul(p, &p, 10); 292 unsigned long val = simple_strtoul(p, &p, 10);
277 293
278 if (val) 294 if (val)
279 bi->flags |= INTEGRITY_FLAG_READ; 295 bi->flags |= BLK_INTEGRITY_VERIFY;
280 else 296 else
281 bi->flags &= ~INTEGRITY_FLAG_READ; 297 bi->flags &= ~BLK_INTEGRITY_VERIFY;
282 298
283 return count; 299 return count;
284} 300}
285 301
286static ssize_t integrity_read_show(struct blk_integrity *bi, char *page) 302static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page)
287{ 303{
288 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0); 304 return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0);
289} 305}
290 306
291static ssize_t integrity_write_store(struct blk_integrity *bi, 307static ssize_t integrity_generate_store(struct blk_integrity *bi,
292 const char *page, size_t count) 308 const char *page, size_t count)
293{ 309{
294 char *p = (char *) page; 310 char *p = (char *) page;
295 unsigned long val = simple_strtoul(p, &p, 10); 311 unsigned long val = simple_strtoul(p, &p, 10);
296 312
297 if (val) 313 if (val)
298 bi->flags |= INTEGRITY_FLAG_WRITE; 314 bi->flags |= BLK_INTEGRITY_GENERATE;
299 else 315 else
300 bi->flags &= ~INTEGRITY_FLAG_WRITE; 316 bi->flags &= ~BLK_INTEGRITY_GENERATE;
301 317
302 return count; 318 return count;
303} 319}
304 320
305static ssize_t integrity_write_show(struct blk_integrity *bi, char *page) 321static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page)
322{
323 return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
324}
325
326static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
306{ 327{
307 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0); 328 return sprintf(page, "%u\n",
329 (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0);
308} 330}
309 331
310static struct integrity_sysfs_entry integrity_format_entry = { 332static struct integrity_sysfs_entry integrity_format_entry = {
@@ -317,23 +339,29 @@ static struct integrity_sysfs_entry integrity_tag_size_entry = {
317 .show = integrity_tag_size_show, 339 .show = integrity_tag_size_show,
318}; 340};
319 341
320static struct integrity_sysfs_entry integrity_read_entry = { 342static struct integrity_sysfs_entry integrity_verify_entry = {
321 .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR }, 343 .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
322 .show = integrity_read_show, 344 .show = integrity_verify_show,
323 .store = integrity_read_store, 345 .store = integrity_verify_store,
324}; 346};
325 347
326static struct integrity_sysfs_entry integrity_write_entry = { 348static struct integrity_sysfs_entry integrity_generate_entry = {
327 .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR }, 349 .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
328 .show = integrity_write_show, 350 .show = integrity_generate_show,
329 .store = integrity_write_store, 351 .store = integrity_generate_store,
352};
353
354static struct integrity_sysfs_entry integrity_device_entry = {
355 .attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
356 .show = integrity_device_show,
330}; 357};
331 358
332static struct attribute *integrity_attrs[] = { 359static struct attribute *integrity_attrs[] = {
333 &integrity_format_entry.attr, 360 &integrity_format_entry.attr,
334 &integrity_tag_size_entry.attr, 361 &integrity_tag_size_entry.attr,
335 &integrity_read_entry.attr, 362 &integrity_verify_entry.attr,
336 &integrity_write_entry.attr, 363 &integrity_generate_entry.attr,
364 &integrity_device_entry.attr,
337 NULL, 365 NULL,
338}; 366};
339 367
@@ -406,8 +434,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
406 434
407 kobject_uevent(&bi->kobj, KOBJ_ADD); 435 kobject_uevent(&bi->kobj, KOBJ_ADD);
408 436
409 bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE; 437 bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE;
410 bi->sector_size = queue_logical_block_size(disk->queue); 438 bi->interval = queue_logical_block_size(disk->queue);
411 disk->integrity = bi; 439 disk->integrity = bi;
412 } else 440 } else
413 bi = disk->integrity; 441 bi = disk->integrity;
@@ -418,9 +446,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
418 bi->generate_fn = template->generate_fn; 446 bi->generate_fn = template->generate_fn;
419 bi->verify_fn = template->verify_fn; 447 bi->verify_fn = template->verify_fn;
420 bi->tuple_size = template->tuple_size; 448 bi->tuple_size = template->tuple_size;
421 bi->set_tag_fn = template->set_tag_fn;
422 bi->get_tag_fn = template->get_tag_fn;
423 bi->tag_size = template->tag_size; 449 bi->tag_size = template->tag_size;
450 bi->flags |= template->flags;
424 } else 451 } else
425 bi->name = bi_unsupported_name; 452 bi->name = bi_unsupported_name;
426 453
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 77881798f793..ba99351c0f58 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -97,14 +97,18 @@ void blk_recalc_rq_segments(struct request *rq)
97 97
98void blk_recount_segments(struct request_queue *q, struct bio *bio) 98void blk_recount_segments(struct request_queue *q, struct bio *bio)
99{ 99{
100 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && 100 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
101 &q->queue_flags);
102
103 if (no_sg_merge && !bio_flagged(bio, BIO_CLONED) &&
101 bio->bi_vcnt < queue_max_segments(q)) 104 bio->bi_vcnt < queue_max_segments(q))
102 bio->bi_phys_segments = bio->bi_vcnt; 105 bio->bi_phys_segments = bio->bi_vcnt;
103 else { 106 else {
104 struct bio *nxt = bio->bi_next; 107 struct bio *nxt = bio->bi_next;
105 108
106 bio->bi_next = NULL; 109 bio->bi_next = NULL;
107 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); 110 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio,
111 no_sg_merge);
108 bio->bi_next = nxt; 112 bio->bi_next = nxt;
109 } 113 }
110 114
@@ -313,7 +317,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
313 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 317 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
314 goto no_merge; 318 goto no_merge;
315 319
316 if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) 320 if (blk_integrity_merge_bio(q, req, bio) == false)
317 goto no_merge; 321 goto no_merge;
318 322
319 /* 323 /*
@@ -410,7 +414,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
410 if (total_phys_segments > queue_max_segments(q)) 414 if (total_phys_segments > queue_max_segments(q))
411 return 0; 415 return 0;
412 416
413 if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) 417 if (blk_integrity_merge_rq(q, req, next) == false)
414 return 0; 418 return 0;
415 419
416 /* Merge is OK... */ 420 /* Merge is OK... */
@@ -590,7 +594,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
590 return false; 594 return false;
591 595
592 /* only merge integrity protected bio into ditto rq */ 596 /* only merge integrity protected bio into ditto rq */
593 if (bio_integrity(bio) != blk_integrity_rq(rq)) 597 if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
594 return false; 598 return false;
595 599
596 /* must be using the same buffer */ 600 /* must be using the same buffer */
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c1b92426c95e..8317175a3009 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -351,15 +351,12 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
351 return; 351 return;
352 352
353 wait_cnt = atomic_dec_return(&bs->wait_cnt); 353 wait_cnt = atomic_dec_return(&bs->wait_cnt);
354 if (unlikely(wait_cnt < 0))
355 wait_cnt = atomic_inc_return(&bs->wait_cnt);
354 if (wait_cnt == 0) { 356 if (wait_cnt == 0) {
355wake:
356 atomic_add(bt->wake_cnt, &bs->wait_cnt); 357 atomic_add(bt->wake_cnt, &bs->wait_cnt);
357 bt_index_atomic_inc(&bt->wake_index); 358 bt_index_atomic_inc(&bt->wake_index);
358 wake_up(&bs->wait); 359 wake_up(&bs->wait);
359 } else if (wait_cnt < 0) {
360 wait_cnt = atomic_inc_return(&bs->wait_cnt);
361 if (!wait_cnt)
362 goto wake;
363 } 360 }
364} 361}
365 362
@@ -392,45 +389,37 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
392 __blk_mq_put_reserved_tag(tags, tag); 389 __blk_mq_put_reserved_tag(tags, tag);
393} 390}
394 391
395static void bt_for_each_free(struct blk_mq_bitmap_tags *bt, 392static void bt_for_each(struct blk_mq_hw_ctx *hctx,
396 unsigned long *free_map, unsigned int off) 393 struct blk_mq_bitmap_tags *bt, unsigned int off,
394 busy_iter_fn *fn, void *data, bool reserved)
397{ 395{
398 int i; 396 struct request *rq;
397 int bit, i;
399 398
400 for (i = 0; i < bt->map_nr; i++) { 399 for (i = 0; i < bt->map_nr; i++) {
401 struct blk_align_bitmap *bm = &bt->map[i]; 400 struct blk_align_bitmap *bm = &bt->map[i];
402 int bit = 0;
403 401
404 do { 402 for (bit = find_first_bit(&bm->word, bm->depth);
405 bit = find_next_zero_bit(&bm->word, bm->depth, bit); 403 bit < bm->depth;
406 if (bit >= bm->depth) 404 bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
407 break; 405 rq = blk_mq_tag_to_rq(hctx->tags, off + bit);
408 406 if (rq->q == hctx->queue)
409 __set_bit(bit + off, free_map); 407 fn(hctx, rq, data, reserved);
410 bit++; 408 }
411 } while (1);
412 409
413 off += (1 << bt->bits_per_word); 410 off += (1 << bt->bits_per_word);
414 } 411 }
415} 412}
416 413
417void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, 414void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
418 void (*fn)(void *, unsigned long *), void *data) 415 void *priv)
419{ 416{
420 unsigned long *tag_map; 417 struct blk_mq_tags *tags = hctx->tags;
421 size_t map_size;
422
423 map_size = ALIGN(tags->nr_tags, BITS_PER_LONG) / BITS_PER_LONG;
424 tag_map = kzalloc(map_size * sizeof(unsigned long), GFP_ATOMIC);
425 if (!tag_map)
426 return;
427 418
428 bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags);
429 if (tags->nr_reserved_tags) 419 if (tags->nr_reserved_tags)
430 bt_for_each_free(&tags->breserved_tags, tag_map, 0); 420 bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
431 421 bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
432 fn(data, tag_map); 422 false);
433 kfree(tag_map);
434} 423}
435EXPORT_SYMBOL(blk_mq_tag_busy_iter); 424EXPORT_SYMBOL(blk_mq_tag_busy_iter);
436 425
@@ -463,8 +452,8 @@ static void bt_update_count(struct blk_mq_bitmap_tags *bt,
463 } 452 }
464 453
465 bt->wake_cnt = BT_WAIT_BATCH; 454 bt->wake_cnt = BT_WAIT_BATCH;
466 if (bt->wake_cnt > depth / 4) 455 if (bt->wake_cnt > depth / BT_WAIT_QUEUES)
467 bt->wake_cnt = max(1U, depth / 4); 456 bt->wake_cnt = max(1U, depth / BT_WAIT_QUEUES);
468 457
469 bt->depth = depth; 458 bt->depth = depth;
470} 459}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 38f4a165640d..68929bad9a6a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -20,6 +20,7 @@
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/sched/sysctl.h> 21#include <linux/sched/sysctl.h>
22#include <linux/delay.h> 22#include <linux/delay.h>
23#include <linux/crash_dump.h>
23 24
24#include <trace/events/block.h> 25#include <trace/events/block.h>
25 26
@@ -223,9 +224,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
223 struct blk_mq_hw_ctx *hctx; 224 struct blk_mq_hw_ctx *hctx;
224 struct request *rq; 225 struct request *rq;
225 struct blk_mq_alloc_data alloc_data; 226 struct blk_mq_alloc_data alloc_data;
227 int ret;
226 228
227 if (blk_mq_queue_enter(q)) 229 ret = blk_mq_queue_enter(q);
228 return NULL; 230 if (ret)
231 return ERR_PTR(ret);
229 232
230 ctx = blk_mq_get_ctx(q); 233 ctx = blk_mq_get_ctx(q);
231 hctx = q->mq_ops->map_queue(q, ctx->cpu); 234 hctx = q->mq_ops->map_queue(q, ctx->cpu);
@@ -245,6 +248,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
245 ctx = alloc_data.ctx; 248 ctx = alloc_data.ctx;
246 } 249 }
247 blk_mq_put_ctx(ctx); 250 blk_mq_put_ctx(ctx);
251 if (!rq)
252 return ERR_PTR(-EWOULDBLOCK);
248 return rq; 253 return rq;
249} 254}
250EXPORT_SYMBOL(blk_mq_alloc_request); 255EXPORT_SYMBOL(blk_mq_alloc_request);
@@ -276,27 +281,7 @@ void blk_mq_free_request(struct request *rq)
276 __blk_mq_free_request(hctx, ctx, rq); 281 __blk_mq_free_request(hctx, ctx, rq);
277} 282}
278 283
279/* 284inline void __blk_mq_end_request(struct request *rq, int error)
280 * Clone all relevant state from a request that has been put on hold in
281 * the flush state machine into the preallocated flush request that hangs
282 * off the request queue.
283 *
284 * For a driver the flush request should be invisible, that's why we are
285 * impersonating the original request here.
286 */
287void blk_mq_clone_flush_request(struct request *flush_rq,
288 struct request *orig_rq)
289{
290 struct blk_mq_hw_ctx *hctx =
291 orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
292
293 flush_rq->mq_ctx = orig_rq->mq_ctx;
294 flush_rq->tag = orig_rq->tag;
295 memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
296 hctx->cmd_size);
297}
298
299inline void __blk_mq_end_io(struct request *rq, int error)
300{ 285{
301 blk_account_io_done(rq); 286 blk_account_io_done(rq);
302 287
@@ -308,15 +293,15 @@ inline void __blk_mq_end_io(struct request *rq, int error)
308 blk_mq_free_request(rq); 293 blk_mq_free_request(rq);
309 } 294 }
310} 295}
311EXPORT_SYMBOL(__blk_mq_end_io); 296EXPORT_SYMBOL(__blk_mq_end_request);
312 297
313void blk_mq_end_io(struct request *rq, int error) 298void blk_mq_end_request(struct request *rq, int error)
314{ 299{
315 if (blk_update_request(rq, error, blk_rq_bytes(rq))) 300 if (blk_update_request(rq, error, blk_rq_bytes(rq)))
316 BUG(); 301 BUG();
317 __blk_mq_end_io(rq, error); 302 __blk_mq_end_request(rq, error);
318} 303}
319EXPORT_SYMBOL(blk_mq_end_io); 304EXPORT_SYMBOL(blk_mq_end_request);
320 305
321static void __blk_mq_complete_request_remote(void *data) 306static void __blk_mq_complete_request_remote(void *data)
322{ 307{
@@ -356,7 +341,7 @@ void __blk_mq_complete_request(struct request *rq)
356 struct request_queue *q = rq->q; 341 struct request_queue *q = rq->q;
357 342
358 if (!q->softirq_done_fn) 343 if (!q->softirq_done_fn)
359 blk_mq_end_io(rq, rq->errors); 344 blk_mq_end_request(rq, rq->errors);
360 else 345 else
361 blk_mq_ipi_complete_request(rq); 346 blk_mq_ipi_complete_request(rq);
362} 347}
@@ -380,7 +365,7 @@ void blk_mq_complete_request(struct request *rq)
380} 365}
381EXPORT_SYMBOL(blk_mq_complete_request); 366EXPORT_SYMBOL(blk_mq_complete_request);
382 367
383static void blk_mq_start_request(struct request *rq, bool last) 368void blk_mq_start_request(struct request *rq)
384{ 369{
385 struct request_queue *q = rq->q; 370 struct request_queue *q = rq->q;
386 371
@@ -417,35 +402,24 @@ static void blk_mq_start_request(struct request *rq, bool last)
417 */ 402 */
418 rq->nr_phys_segments++; 403 rq->nr_phys_segments++;
419 } 404 }
420
421 /*
422 * Flag the last request in the series so that drivers know when IO
423 * should be kicked off, if they don't do it on a per-request basis.
424 *
425 * Note: the flag isn't the only condition drivers should do kick off.
426 * If drive is busy, the last request might not have the bit set.
427 */
428 if (last)
429 rq->cmd_flags |= REQ_END;
430} 405}
406EXPORT_SYMBOL(blk_mq_start_request);
431 407
432static void __blk_mq_requeue_request(struct request *rq) 408static void __blk_mq_requeue_request(struct request *rq)
433{ 409{
434 struct request_queue *q = rq->q; 410 struct request_queue *q = rq->q;
435 411
436 trace_block_rq_requeue(q, rq); 412 trace_block_rq_requeue(q, rq);
437 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
438
439 rq->cmd_flags &= ~REQ_END;
440 413
441 if (q->dma_drain_size && blk_rq_bytes(rq)) 414 if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
442 rq->nr_phys_segments--; 415 if (q->dma_drain_size && blk_rq_bytes(rq))
416 rq->nr_phys_segments--;
417 }
443} 418}
444 419
445void blk_mq_requeue_request(struct request *rq) 420void blk_mq_requeue_request(struct request *rq)
446{ 421{
447 __blk_mq_requeue_request(rq); 422 __blk_mq_requeue_request(rq);
448 blk_clear_rq_complete(rq);
449 423
450 BUG_ON(blk_queued_rq(rq)); 424 BUG_ON(blk_queued_rq(rq));
451 blk_mq_add_to_requeue_list(rq, true); 425 blk_mq_add_to_requeue_list(rq, true);
@@ -514,78 +488,35 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
514} 488}
515EXPORT_SYMBOL(blk_mq_kick_requeue_list); 489EXPORT_SYMBOL(blk_mq_kick_requeue_list);
516 490
517static inline bool is_flush_request(struct request *rq, unsigned int tag) 491static inline bool is_flush_request(struct request *rq,
492 struct blk_flush_queue *fq, unsigned int tag)
518{ 493{
519 return ((rq->cmd_flags & REQ_FLUSH_SEQ) && 494 return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
520 rq->q->flush_rq->tag == tag); 495 fq->flush_rq->tag == tag);
521} 496}
522 497
523struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) 498struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
524{ 499{
525 struct request *rq = tags->rqs[tag]; 500 struct request *rq = tags->rqs[tag];
501 /* mq_ctx of flush rq is always cloned from the corresponding req */
502 struct blk_flush_queue *fq = blk_get_flush_queue(rq->q, rq->mq_ctx);
526 503
527 if (!is_flush_request(rq, tag)) 504 if (!is_flush_request(rq, fq, tag))
528 return rq; 505 return rq;
529 506
530 return rq->q->flush_rq; 507 return fq->flush_rq;
531} 508}
532EXPORT_SYMBOL(blk_mq_tag_to_rq); 509EXPORT_SYMBOL(blk_mq_tag_to_rq);
533 510
534struct blk_mq_timeout_data { 511struct blk_mq_timeout_data {
535 struct blk_mq_hw_ctx *hctx; 512 unsigned long next;
536 unsigned long *next; 513 unsigned int next_set;
537 unsigned int *next_set;
538}; 514};
539 515
540static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) 516void blk_mq_rq_timed_out(struct request *req, bool reserved)
541{ 517{
542 struct blk_mq_timeout_data *data = __data; 518 struct blk_mq_ops *ops = req->q->mq_ops;
543 struct blk_mq_hw_ctx *hctx = data->hctx; 519 enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
544 unsigned int tag;
545
546 /* It may not be in flight yet (this is where
547 * the REQ_ATOMIC_STARTED flag comes in). The requests are
548 * statically allocated, so we know it's always safe to access the
549 * memory associated with a bit offset into ->rqs[].
550 */
551 tag = 0;
552 do {
553 struct request *rq;
554
555 tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag);
556 if (tag >= hctx->tags->nr_tags)
557 break;
558
559 rq = blk_mq_tag_to_rq(hctx->tags, tag++);
560 if (rq->q != hctx->queue)
561 continue;
562 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
563 continue;
564
565 blk_rq_check_expired(rq, data->next, data->next_set);
566 } while (1);
567}
568
569static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx,
570 unsigned long *next,
571 unsigned int *next_set)
572{
573 struct blk_mq_timeout_data data = {
574 .hctx = hctx,
575 .next = next,
576 .next_set = next_set,
577 };
578
579 /*
580 * Ask the tagging code to iterate busy requests, so we can
581 * check them for timeout.
582 */
583 blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
584}
585
586static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
587{
588 struct request_queue *q = rq->q;
589 520
590 /* 521 /*
591 * We know that complete is set at this point. If STARTED isn't set 522 * We know that complete is set at this point. If STARTED isn't set
@@ -596,21 +527,54 @@ static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
596 * we both flags will get cleared. So check here again, and ignore 527 * we both flags will get cleared. So check here again, and ignore
597 * a timeout event with a request that isn't active. 528 * a timeout event with a request that isn't active.
598 */ 529 */
599 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) 530 if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags))
600 return BLK_EH_NOT_HANDLED; 531 return;
532
533 if (ops->timeout)
534 ret = ops->timeout(req, reserved);
535
536 switch (ret) {
537 case BLK_EH_HANDLED:
538 __blk_mq_complete_request(req);
539 break;
540 case BLK_EH_RESET_TIMER:
541 blk_add_timer(req);
542 blk_clear_rq_complete(req);
543 break;
544 case BLK_EH_NOT_HANDLED:
545 break;
546 default:
547 printk(KERN_ERR "block: bad eh return: %d\n", ret);
548 break;
549 }
550}
551
552static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
553 struct request *rq, void *priv, bool reserved)
554{
555 struct blk_mq_timeout_data *data = priv;
601 556
602 if (!q->mq_ops->timeout) 557 if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
603 return BLK_EH_RESET_TIMER; 558 return;
604 559
605 return q->mq_ops->timeout(rq); 560 if (time_after_eq(jiffies, rq->deadline)) {
561 if (!blk_mark_rq_complete(rq))
562 blk_mq_rq_timed_out(rq, reserved);
563 } else if (!data->next_set || time_after(data->next, rq->deadline)) {
564 data->next = rq->deadline;
565 data->next_set = 1;
566 }
606} 567}
607 568
608static void blk_mq_rq_timer(unsigned long data) 569static void blk_mq_rq_timer(unsigned long priv)
609{ 570{
610 struct request_queue *q = (struct request_queue *) data; 571 struct request_queue *q = (struct request_queue *)priv;
572 struct blk_mq_timeout_data data = {
573 .next = 0,
574 .next_set = 0,
575 };
611 struct blk_mq_hw_ctx *hctx; 576 struct blk_mq_hw_ctx *hctx;
612 unsigned long next = 0; 577 int i;
613 int i, next_set = 0;
614 578
615 queue_for_each_hw_ctx(q, hctx, i) { 579 queue_for_each_hw_ctx(q, hctx, i) {
616 /* 580 /*
@@ -620,12 +584,12 @@ static void blk_mq_rq_timer(unsigned long data)
620 if (!hctx->nr_ctx || !hctx->tags) 584 if (!hctx->nr_ctx || !hctx->tags)
621 continue; 585 continue;
622 586
623 blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); 587 blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
624 } 588 }
625 589
626 if (next_set) { 590 if (data.next_set) {
627 next = blk_rq_timeout(round_jiffies_up(next)); 591 data.next = blk_rq_timeout(round_jiffies_up(data.next));
628 mod_timer(&q->timeout, next); 592 mod_timer(&q->timeout, data.next);
629 } else { 593 } else {
630 queue_for_each_hw_ctx(q, hctx, i) 594 queue_for_each_hw_ctx(q, hctx, i)
631 blk_mq_tag_idle(hctx); 595 blk_mq_tag_idle(hctx);
@@ -751,9 +715,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
751 rq = list_first_entry(&rq_list, struct request, queuelist); 715 rq = list_first_entry(&rq_list, struct request, queuelist);
752 list_del_init(&rq->queuelist); 716 list_del_init(&rq->queuelist);
753 717
754 blk_mq_start_request(rq, list_empty(&rq_list)); 718 ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
755
756 ret = q->mq_ops->queue_rq(hctx, rq);
757 switch (ret) { 719 switch (ret) {
758 case BLK_MQ_RQ_QUEUE_OK: 720 case BLK_MQ_RQ_QUEUE_OK:
759 queued++; 721 queued++;
@@ -766,7 +728,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
766 pr_err("blk-mq: bad return on queue: %d\n", ret); 728 pr_err("blk-mq: bad return on queue: %d\n", ret);
767 case BLK_MQ_RQ_QUEUE_ERROR: 729 case BLK_MQ_RQ_QUEUE_ERROR:
768 rq->errors = -EIO; 730 rq->errors = -EIO;
769 blk_mq_end_io(rq, rq->errors); 731 blk_mq_end_request(rq, rq->errors);
770 break; 732 break;
771 } 733 }
772 734
@@ -1194,14 +1156,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1194 int ret; 1156 int ret;
1195 1157
1196 blk_mq_bio_to_request(rq, bio); 1158 blk_mq_bio_to_request(rq, bio);
1197 blk_mq_start_request(rq, true);
1198 1159
1199 /* 1160 /*
1200 * For OK queue, we are done. For error, kill it. Any other 1161 * For OK queue, we are done. For error, kill it. Any other
1201 * error (busy), just add it to our list as we previously 1162 * error (busy), just add it to our list as we previously
1202 * would have done 1163 * would have done
1203 */ 1164 */
1204 ret = q->mq_ops->queue_rq(data.hctx, rq); 1165 ret = q->mq_ops->queue_rq(data.hctx, rq, true);
1205 if (ret == BLK_MQ_RQ_QUEUE_OK) 1166 if (ret == BLK_MQ_RQ_QUEUE_OK)
1206 goto done; 1167 goto done;
1207 else { 1168 else {
@@ -1209,7 +1170,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1209 1170
1210 if (ret == BLK_MQ_RQ_QUEUE_ERROR) { 1171 if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
1211 rq->errors = -EIO; 1172 rq->errors = -EIO;
1212 blk_mq_end_io(rq, rq->errors); 1173 blk_mq_end_request(rq, rq->errors);
1213 goto done; 1174 goto done;
1214 } 1175 }
1215 } 1176 }
@@ -1531,6 +1492,28 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
1531 return NOTIFY_OK; 1492 return NOTIFY_OK;
1532} 1493}
1533 1494
1495static void blk_mq_exit_hctx(struct request_queue *q,
1496 struct blk_mq_tag_set *set,
1497 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
1498{
1499 unsigned flush_start_tag = set->queue_depth;
1500
1501 blk_mq_tag_idle(hctx);
1502
1503 if (set->ops->exit_request)
1504 set->ops->exit_request(set->driver_data,
1505 hctx->fq->flush_rq, hctx_idx,
1506 flush_start_tag + hctx_idx);
1507
1508 if (set->ops->exit_hctx)
1509 set->ops->exit_hctx(hctx, hctx_idx);
1510
1511 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1512 blk_free_flush_queue(hctx->fq);
1513 kfree(hctx->ctxs);
1514 blk_mq_free_bitmap(&hctx->ctx_map);
1515}
1516
1534static void blk_mq_exit_hw_queues(struct request_queue *q, 1517static void blk_mq_exit_hw_queues(struct request_queue *q,
1535 struct blk_mq_tag_set *set, int nr_queue) 1518 struct blk_mq_tag_set *set, int nr_queue)
1536{ 1519{
@@ -1540,17 +1523,8 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
1540 queue_for_each_hw_ctx(q, hctx, i) { 1523 queue_for_each_hw_ctx(q, hctx, i) {
1541 if (i == nr_queue) 1524 if (i == nr_queue)
1542 break; 1525 break;
1543 1526 blk_mq_exit_hctx(q, set, hctx, i);
1544 blk_mq_tag_idle(hctx);
1545
1546 if (set->ops->exit_hctx)
1547 set->ops->exit_hctx(hctx, i);
1548
1549 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1550 kfree(hctx->ctxs);
1551 blk_mq_free_bitmap(&hctx->ctx_map);
1552 } 1527 }
1553
1554} 1528}
1555 1529
1556static void blk_mq_free_hw_queues(struct request_queue *q, 1530static void blk_mq_free_hw_queues(struct request_queue *q,
@@ -1565,53 +1539,88 @@ static void blk_mq_free_hw_queues(struct request_queue *q,
1565 } 1539 }
1566} 1540}
1567 1541
1568static int blk_mq_init_hw_queues(struct request_queue *q, 1542static int blk_mq_init_hctx(struct request_queue *q,
1569 struct blk_mq_tag_set *set) 1543 struct blk_mq_tag_set *set,
1544 struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
1570{ 1545{
1571 struct blk_mq_hw_ctx *hctx; 1546 int node;
1572 unsigned int i; 1547 unsigned flush_start_tag = set->queue_depth;
1548
1549 node = hctx->numa_node;
1550 if (node == NUMA_NO_NODE)
1551 node = hctx->numa_node = set->numa_node;
1552
1553 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
1554 INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
1555 spin_lock_init(&hctx->lock);
1556 INIT_LIST_HEAD(&hctx->dispatch);
1557 hctx->queue = q;
1558 hctx->queue_num = hctx_idx;
1559 hctx->flags = set->flags;
1560 hctx->cmd_size = set->cmd_size;
1561
1562 blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
1563 blk_mq_hctx_notify, hctx);
1564 blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
1565
1566 hctx->tags = set->tags[hctx_idx];
1573 1567
1574 /* 1568 /*
1575 * Initialize hardware queues 1569 * Allocate space for all possible cpus to avoid allocation at
1570 * runtime
1576 */ 1571 */
1577 queue_for_each_hw_ctx(q, hctx, i) { 1572 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
1578 int node; 1573 GFP_KERNEL, node);
1574 if (!hctx->ctxs)
1575 goto unregister_cpu_notifier;
1579 1576
1580 node = hctx->numa_node; 1577 if (blk_mq_alloc_bitmap(&hctx->ctx_map, node))
1581 if (node == NUMA_NO_NODE) 1578 goto free_ctxs;
1582 node = hctx->numa_node = set->numa_node;
1583 1579
1584 INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); 1580 hctx->nr_ctx = 0;
1585 INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
1586 spin_lock_init(&hctx->lock);
1587 INIT_LIST_HEAD(&hctx->dispatch);
1588 hctx->queue = q;
1589 hctx->queue_num = i;
1590 hctx->flags = set->flags;
1591 hctx->cmd_size = set->cmd_size;
1592 1581
1593 blk_mq_init_cpu_notifier(&hctx->cpu_notifier, 1582 if (set->ops->init_hctx &&
1594 blk_mq_hctx_notify, hctx); 1583 set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
1595 blk_mq_register_cpu_notifier(&hctx->cpu_notifier); 1584 goto free_bitmap;
1596 1585
1597 hctx->tags = set->tags[i]; 1586 hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
1587 if (!hctx->fq)
1588 goto exit_hctx;
1598 1589
1599 /* 1590 if (set->ops->init_request &&
1600 * Allocate space for all possible cpus to avoid allocation at 1591 set->ops->init_request(set->driver_data,
1601 * runtime 1592 hctx->fq->flush_rq, hctx_idx,
1602 */ 1593 flush_start_tag + hctx_idx, node))
1603 hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), 1594 goto free_fq;
1604 GFP_KERNEL, node);
1605 if (!hctx->ctxs)
1606 break;
1607 1595
1608 if (blk_mq_alloc_bitmap(&hctx->ctx_map, node)) 1596 return 0;
1609 break;
1610 1597
1611 hctx->nr_ctx = 0; 1598 free_fq:
1599 kfree(hctx->fq);
1600 exit_hctx:
1601 if (set->ops->exit_hctx)
1602 set->ops->exit_hctx(hctx, hctx_idx);
1603 free_bitmap:
1604 blk_mq_free_bitmap(&hctx->ctx_map);
1605 free_ctxs:
1606 kfree(hctx->ctxs);
1607 unregister_cpu_notifier:
1608 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1609
1610 return -1;
1611}
1612
1613static int blk_mq_init_hw_queues(struct request_queue *q,
1614 struct blk_mq_tag_set *set)
1615{
1616 struct blk_mq_hw_ctx *hctx;
1617 unsigned int i;
1612 1618
1613 if (set->ops->init_hctx && 1619 /*
1614 set->ops->init_hctx(hctx, set->driver_data, i)) 1620 * Initialize hardware queues
1621 */
1622 queue_for_each_hw_ctx(q, hctx, i) {
1623 if (blk_mq_init_hctx(q, set, hctx, i))
1615 break; 1624 break;
1616 } 1625 }
1617 1626
@@ -1765,6 +1774,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1765 if (!ctx) 1774 if (!ctx)
1766 return ERR_PTR(-ENOMEM); 1775 return ERR_PTR(-ENOMEM);
1767 1776
1777 /*
1778 * If a crashdump is active, then we are potentially in a very
1779 * memory constrained environment. Limit us to 1 queue and
1780 * 64 tags to prevent using too much memory.
1781 */
1782 if (is_kdump_kernel()) {
1783 set->nr_hw_queues = 1;
1784 set->queue_depth = min(64U, set->queue_depth);
1785 }
1786
1768 hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, 1787 hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
1769 set->numa_node); 1788 set->numa_node);
1770 1789
@@ -1783,7 +1802,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1783 if (!hctxs[i]) 1802 if (!hctxs[i])
1784 goto err_hctxs; 1803 goto err_hctxs;
1785 1804
1786 if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) 1805 if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
1806 node))
1787 goto err_hctxs; 1807 goto err_hctxs;
1788 1808
1789 atomic_set(&hctxs[i]->nr_active, 0); 1809 atomic_set(&hctxs[i]->nr_active, 0);
@@ -1830,7 +1850,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1830 else 1850 else
1831 blk_queue_make_request(q, blk_sq_make_request); 1851 blk_queue_make_request(q, blk_sq_make_request);
1832 1852
1833 blk_queue_rq_timed_out(q, blk_mq_rq_timed_out);
1834 if (set->timeout) 1853 if (set->timeout)
1835 blk_queue_rq_timeout(q, set->timeout); 1854 blk_queue_rq_timeout(q, set->timeout);
1836 1855
@@ -1842,17 +1861,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1842 if (set->ops->complete) 1861 if (set->ops->complete)
1843 blk_queue_softirq_done(q, set->ops->complete); 1862 blk_queue_softirq_done(q, set->ops->complete);
1844 1863
1845 blk_mq_init_flush(q);
1846 blk_mq_init_cpu_queues(q, set->nr_hw_queues); 1864 blk_mq_init_cpu_queues(q, set->nr_hw_queues);
1847 1865
1848 q->flush_rq = kzalloc(round_up(sizeof(struct request) +
1849 set->cmd_size, cache_line_size()),
1850 GFP_KERNEL);
1851 if (!q->flush_rq)
1852 goto err_hw;
1853
1854 if (blk_mq_init_hw_queues(q, set)) 1866 if (blk_mq_init_hw_queues(q, set))
1855 goto err_flush_rq; 1867 goto err_hw;
1856 1868
1857 mutex_lock(&all_q_mutex); 1869 mutex_lock(&all_q_mutex);
1858 list_add_tail(&q->all_q_node, &all_q_list); 1870 list_add_tail(&q->all_q_node, &all_q_list);
@@ -1864,8 +1876,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1864 1876
1865 return q; 1877 return q;
1866 1878
1867err_flush_rq:
1868 kfree(q->flush_rq);
1869err_hw: 1879err_hw:
1870 blk_cleanup_queue(q); 1880 blk_cleanup_queue(q);
1871err_hctxs: 1881err_hctxs:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ca4964a6295d..d567d5283ffa 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,7 +27,6 @@ struct blk_mq_ctx {
27 27
28void __blk_mq_complete_request(struct request *rq); 28void __blk_mq_complete_request(struct request *rq);
29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
30void blk_mq_init_flush(struct request_queue *q);
31void blk_mq_freeze_queue(struct request_queue *q); 30void blk_mq_freeze_queue(struct request_queue *q);
32void blk_mq_free_queue(struct request_queue *q); 31void blk_mq_free_queue(struct request_queue *q);
33void blk_mq_clone_flush_request(struct request *flush_rq, 32void blk_mq_clone_flush_request(struct request *flush_rq,
@@ -60,6 +59,8 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
60extern int blk_mq_sysfs_register(struct request_queue *q); 59extern int blk_mq_sysfs_register(struct request_queue *q);
61extern void blk_mq_sysfs_unregister(struct request_queue *q); 60extern void blk_mq_sysfs_unregister(struct request_queue *q);
62 61
62extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
63
63/* 64/*
64 * Basic implementation of sparser bitmap, allowing the user to spread 65 * Basic implementation of sparser bitmap, allowing the user to spread
65 * the bits over more cachelines. 66 * the bits over more cachelines.
diff --git a/block/blk-settings.c b/block/blk-settings.c
index f1a1795a5683..aa02247d227e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -574,7 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
574 bottom = max(b->physical_block_size, b->io_min) + alignment; 574 bottom = max(b->physical_block_size, b->io_min) + alignment;
575 575
576 /* Verify that top and bottom intervals line up */ 576 /* Verify that top and bottom intervals line up */
577 if (max(top, bottom) & (min(top, bottom) - 1)) { 577 if (max(top, bottom) % min(top, bottom)) {
578 t->misaligned = 1; 578 t->misaligned = 1;
579 ret = -1; 579 ret = -1;
580 } 580 }
@@ -619,7 +619,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
619 619
620 /* Find lowest common alignment_offset */ 620 /* Find lowest common alignment_offset */
621 t->alignment_offset = lcm(t->alignment_offset, alignment) 621 t->alignment_offset = lcm(t->alignment_offset, alignment)
622 & (max(t->physical_block_size, t->io_min) - 1); 622 % max(t->physical_block_size, t->io_min);
623 623
624 /* Verify that new alignment_offset is on a logical block boundary */ 624 /* Verify that new alignment_offset is on a logical block boundary */
625 if (t->alignment_offset & (t->logical_block_size - 1)) { 625 if (t->alignment_offset & (t->logical_block_size - 1)) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 521ae9089c50..1fac43408911 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -519,8 +519,8 @@ static void blk_release_queue(struct kobject *kobj)
519 519
520 if (q->mq_ops) 520 if (q->mq_ops)
521 blk_mq_free_queue(q); 521 blk_mq_free_queue(q);
522 522 else
523 kfree(q->flush_rq); 523 blk_free_flush_queue(q->fq);
524 524
525 blk_trace_shutdown(q); 525 blk_trace_shutdown(q);
526 526
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 95a09590ccfd..56c025894cdf 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -90,10 +90,7 @@ static void blk_rq_timed_out(struct request *req)
90 switch (ret) { 90 switch (ret) {
91 case BLK_EH_HANDLED: 91 case BLK_EH_HANDLED:
92 /* Can we use req->errors here? */ 92 /* Can we use req->errors here? */
93 if (q->mq_ops) 93 __blk_complete_request(req);
94 __blk_mq_complete_request(req);
95 else
96 __blk_complete_request(req);
97 break; 94 break;
98 case BLK_EH_RESET_TIMER: 95 case BLK_EH_RESET_TIMER:
99 blk_add_timer(req); 96 blk_add_timer(req);
@@ -113,7 +110,7 @@ static void blk_rq_timed_out(struct request *req)
113 } 110 }
114} 111}
115 112
116void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, 113static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
117 unsigned int *next_set) 114 unsigned int *next_set)
118{ 115{
119 if (time_after_eq(jiffies, rq->deadline)) { 116 if (time_after_eq(jiffies, rq->deadline)) {
@@ -162,7 +159,10 @@ void blk_abort_request(struct request *req)
162 if (blk_mark_rq_complete(req)) 159 if (blk_mark_rq_complete(req))
163 return; 160 return;
164 blk_delete_timer(req); 161 blk_delete_timer(req);
165 blk_rq_timed_out(req); 162 if (req->q->mq_ops)
163 blk_mq_rq_timed_out(req, false);
164 else
165 blk_rq_timed_out(req);
166} 166}
167EXPORT_SYMBOL_GPL(blk_abort_request); 167EXPORT_SYMBOL_GPL(blk_abort_request);
168 168
@@ -190,7 +190,8 @@ void blk_add_timer(struct request *req)
190 struct request_queue *q = req->q; 190 struct request_queue *q = req->q;
191 unsigned long expiry; 191 unsigned long expiry;
192 192
193 if (!q->rq_timed_out_fn) 193 /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
194 if (!q->mq_ops && !q->rq_timed_out_fn)
194 return; 195 return;
195 196
196 BUG_ON(!list_empty(&req->timeout_list)); 197 BUG_ON(!list_empty(&req->timeout_list));
diff --git a/block/blk.h b/block/blk.h
index 6748c4f8d7a1..43b036185712 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -2,6 +2,8 @@
2#define BLK_INTERNAL_H 2#define BLK_INTERNAL_H
3 3
4#include <linux/idr.h> 4#include <linux/idr.h>
5#include <linux/blk-mq.h>
6#include "blk-mq.h"
5 7
6/* Amount of time in which a process may batch requests */ 8/* Amount of time in which a process may batch requests */
7#define BLK_BATCH_TIME (HZ/50UL) 9#define BLK_BATCH_TIME (HZ/50UL)
@@ -12,16 +14,44 @@
12/* Max future timer expiry for timeouts */ 14/* Max future timer expiry for timeouts */
13#define BLK_MAX_TIMEOUT (5 * HZ) 15#define BLK_MAX_TIMEOUT (5 * HZ)
14 16
17struct blk_flush_queue {
18 unsigned int flush_queue_delayed:1;
19 unsigned int flush_pending_idx:1;
20 unsigned int flush_running_idx:1;
21 unsigned long flush_pending_since;
22 struct list_head flush_queue[2];
23 struct list_head flush_data_in_flight;
24 struct request *flush_rq;
25 spinlock_t mq_flush_lock;
26};
27
15extern struct kmem_cache *blk_requestq_cachep; 28extern struct kmem_cache *blk_requestq_cachep;
16extern struct kmem_cache *request_cachep; 29extern struct kmem_cache *request_cachep;
17extern struct kobj_type blk_queue_ktype; 30extern struct kobj_type blk_queue_ktype;
18extern struct ida blk_queue_ida; 31extern struct ida blk_queue_ida;
19 32
33static inline struct blk_flush_queue *blk_get_flush_queue(
34 struct request_queue *q, struct blk_mq_ctx *ctx)
35{
36 struct blk_mq_hw_ctx *hctx;
37
38 if (!q->mq_ops)
39 return q->fq;
40
41 hctx = q->mq_ops->map_queue(q, ctx->cpu);
42
43 return hctx->fq;
44}
45
20static inline void __blk_get_queue(struct request_queue *q) 46static inline void __blk_get_queue(struct request_queue *q)
21{ 47{
22 kobject_get(&q->kobj); 48 kobject_get(&q->kobj);
23} 49}
24 50
51struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
52 int node, int cmd_size);
53void blk_free_flush_queue(struct blk_flush_queue *q);
54
25int blk_init_rl(struct request_list *rl, struct request_queue *q, 55int blk_init_rl(struct request_list *rl, struct request_queue *q,
26 gfp_t gfp_mask); 56 gfp_t gfp_mask);
27void blk_exit_rl(struct request_list *rl); 57void blk_exit_rl(struct request_list *rl);
@@ -38,8 +68,6 @@ bool __blk_end_bidi_request(struct request *rq, int error,
38 unsigned int nr_bytes, unsigned int bidi_bytes); 68 unsigned int nr_bytes, unsigned int bidi_bytes);
39 69
40void blk_rq_timed_out_timer(unsigned long data); 70void blk_rq_timed_out_timer(unsigned long data);
41void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
42 unsigned int *next_set);
43unsigned long blk_rq_timeout(unsigned long timeout); 71unsigned long blk_rq_timeout(unsigned long timeout);
44void blk_add_timer(struct request *req); 72void blk_add_timer(struct request *req);
45void blk_delete_timer(struct request *); 73void blk_delete_timer(struct request *);
@@ -88,6 +116,7 @@ void blk_insert_flush(struct request *rq);
88static inline struct request *__elv_next_request(struct request_queue *q) 116static inline struct request *__elv_next_request(struct request_queue *q)
89{ 117{
90 struct request *rq; 118 struct request *rq;
119 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
91 120
92 while (1) { 121 while (1) {
93 if (!list_empty(&q->queue_head)) { 122 if (!list_empty(&q->queue_head)) {
@@ -110,9 +139,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
110 * should be restarted later. Please see flush_end_io() for 139 * should be restarted later. Please see flush_end_io() for
111 * details. 140 * details.
112 */ 141 */
113 if (q->flush_pending_idx != q->flush_running_idx && 142 if (fq->flush_pending_idx != fq->flush_running_idx &&
114 !queue_flush_queueable(q)) { 143 !queue_flush_queueable(q)) {
115 q->flush_queue_delayed = 1; 144 fq->flush_queue_delayed = 1;
116 return NULL; 145 return NULL;
117 } 146 }
118 if (unlikely(blk_queue_bypass(q)) || 147 if (unlikely(blk_queue_bypass(q)) ||
diff --git a/block/bsg.c b/block/bsg.c
index ff46addde5d8..276e869e686c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -270,8 +270,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
270 * map scatter-gather elements separately and string them to request 270 * map scatter-gather elements separately and string them to request
271 */ 271 */
272 rq = blk_get_request(q, rw, GFP_KERNEL); 272 rq = blk_get_request(q, rw, GFP_KERNEL);
273 if (!rq) 273 if (IS_ERR(rq))
274 return ERR_PTR(-ENOMEM); 274 return rq;
275 blk_rq_set_block_pc(rq); 275 blk_rq_set_block_pc(rq);
276 276
277 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); 277 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
@@ -285,8 +285,9 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
285 } 285 }
286 286
287 next_rq = blk_get_request(q, READ, GFP_KERNEL); 287 next_rq = blk_get_request(q, READ, GFP_KERNEL);
288 if (!next_rq) { 288 if (IS_ERR(next_rq)) {
289 ret = -ENOMEM; 289 ret = PTR_ERR(next_rq);
290 next_rq = NULL;
290 goto out; 291 goto out;
291 } 292 }
292 rq->next_rq = next_rq; 293 rq->next_rq = next_rq;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3f31cf9508e6..6f2751d305de 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -299,7 +299,7 @@ struct cfq_io_cq {
299 struct cfq_ttime ttime; 299 struct cfq_ttime ttime;
300 int ioprio; /* the current ioprio */ 300 int ioprio; /* the current ioprio */
301#ifdef CONFIG_CFQ_GROUP_IOSCHED 301#ifdef CONFIG_CFQ_GROUP_IOSCHED
302 uint64_t blkcg_id; /* the current blkcg ID */ 302 uint64_t blkcg_serial_nr; /* the current blkcg serial */
303#endif 303#endif
304}; 304};
305 305
@@ -3547,17 +3547,17 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
3547{ 3547{
3548 struct cfq_data *cfqd = cic_to_cfqd(cic); 3548 struct cfq_data *cfqd = cic_to_cfqd(cic);
3549 struct cfq_queue *sync_cfqq; 3549 struct cfq_queue *sync_cfqq;
3550 uint64_t id; 3550 uint64_t serial_nr;
3551 3551
3552 rcu_read_lock(); 3552 rcu_read_lock();
3553 id = bio_blkcg(bio)->id; 3553 serial_nr = bio_blkcg(bio)->css.serial_nr;
3554 rcu_read_unlock(); 3554 rcu_read_unlock();
3555 3555
3556 /* 3556 /*
3557 * Check whether blkcg has changed. The condition may trigger 3557 * Check whether blkcg has changed. The condition may trigger
3558 * spuriously on a newly created cic but there's no harm. 3558 * spuriously on a newly created cic but there's no harm.
3559 */ 3559 */
3560 if (unlikely(!cfqd) || likely(cic->blkcg_id == id)) 3560 if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
3561 return; 3561 return;
3562 3562
3563 sync_cfqq = cic_to_cfqq(cic, 1); 3563 sync_cfqq = cic_to_cfqq(cic, 1);
@@ -3571,7 +3571,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
3571 cfq_put_queue(sync_cfqq); 3571 cfq_put_queue(sync_cfqq);
3572 } 3572 }
3573 3573
3574 cic->blkcg_id = id; 3574 cic->blkcg_serial_nr = serial_nr;
3575} 3575}
3576#else 3576#else
3577static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { } 3577static inline void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) { }
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 18b282ce361e..f678c733df40 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
709 if (!arg) 709 if (!arg)
710 return -EINVAL; 710 return -EINVAL;
711 bdi = blk_get_backing_dev_info(bdev); 711 bdi = blk_get_backing_dev_info(bdev);
712 if (bdi == NULL)
713 return -ENOTTY;
714 return compat_put_long(arg, 712 return compat_put_long(arg,
715 (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); 713 (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
716 case BLKROGET: /* compatible */ 714 case BLKROGET: /* compatible */
@@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
731 if (!capable(CAP_SYS_ADMIN)) 729 if (!capable(CAP_SYS_ADMIN))
732 return -EACCES; 730 return -EACCES;
733 bdi = blk_get_backing_dev_info(bdev); 731 bdi = blk_get_backing_dev_info(bdev);
734 if (bdi == NULL)
735 return -ENOTTY;
736 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; 732 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
737 return 0; 733 return 0;
738 case BLKGETSIZE: 734 case BLKGETSIZE:
diff --git a/block/ioctl.c b/block/ioctl.c
index d6cda8147c91..6c7bf903742f 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
356 if (!arg) 356 if (!arg)
357 return -EINVAL; 357 return -EINVAL;
358 bdi = blk_get_backing_dev_info(bdev); 358 bdi = blk_get_backing_dev_info(bdev);
359 if (bdi == NULL)
360 return -ENOTTY;
361 return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512); 359 return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
362 case BLKROGET: 360 case BLKROGET:
363 return put_int(arg, bdev_read_only(bdev) != 0); 361 return put_int(arg, bdev_read_only(bdev) != 0);
@@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
386 if(!capable(CAP_SYS_ADMIN)) 384 if(!capable(CAP_SYS_ADMIN))
387 return -EACCES; 385 return -EACCES;
388 bdi = blk_get_backing_dev_info(bdev); 386 bdi = blk_get_backing_dev_info(bdev);
389 if (bdi == NULL)
390 return -ENOTTY;
391 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; 387 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
392 return 0; 388 return 0;
393 case BLKBSZSET: 389 case BLKBSZSET:
diff --git a/block/partitions/mac.c b/block/partitions/mac.c
index 76d8ba6379a9..c2c48ec64b27 100644
--- a/block/partitions/mac.c
+++ b/block/partitions/mac.c
@@ -81,7 +81,7 @@ int mac_partition(struct parsed_partitions *state)
81 be32_to_cpu(part->start_block) * (secsize/512), 81 be32_to_cpu(part->start_block) * (secsize/512),
82 be32_to_cpu(part->block_count) * (secsize/512)); 82 be32_to_cpu(part->block_count) * (secsize/512));
83 83
84 if (!strnicmp(part->type, "Linux_RAID", 10)) 84 if (!strncasecmp(part->type, "Linux_RAID", 10))
85 state->parts[slot].flags = ADDPART_FLAG_RAID; 85 state->parts[slot].flags = ADDPART_FLAG_RAID;
86#ifdef CONFIG_PPC_PMAC 86#ifdef CONFIG_PPC_PMAC
87 /* 87 /*
@@ -100,7 +100,7 @@ int mac_partition(struct parsed_partitions *state)
100 goodness++; 100 goodness++;
101 101
102 if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0 102 if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0
103 || (strnicmp(part->type, "Linux", 5) == 0 103 || (strncasecmp(part->type, "Linux", 5) == 0
104 && strcasecmp(part->type, "Linux_swap") != 0)) { 104 && strcasecmp(part->type, "Linux_swap") != 0)) {
105 int i, l; 105 int i, l;
106 106
@@ -109,13 +109,13 @@ int mac_partition(struct parsed_partitions *state)
109 if (strcmp(part->name, "/") == 0) 109 if (strcmp(part->name, "/") == 0)
110 goodness++; 110 goodness++;
111 for (i = 0; i <= l - 4; ++i) { 111 for (i = 0; i <= l - 4; ++i) {
112 if (strnicmp(part->name + i, "root", 112 if (strncasecmp(part->name + i, "root",
113 4) == 0) { 113 4) == 0) {
114 goodness += 2; 114 goodness += 2;
115 break; 115 break;
116 } 116 }
117 } 117 }
118 if (strnicmp(part->name, "swap", 4) == 0) 118 if (strncasecmp(part->name, "swap", 4) == 0)
119 goodness--; 119 goodness--;
120 } 120 }
121 121
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9b8eaeca6a79..abb2e65b24cc 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -316,8 +316,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
316 316
317 ret = -ENOMEM; 317 ret = -ENOMEM;
318 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); 318 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
319 if (!rq) 319 if (IS_ERR(rq))
320 goto out; 320 return PTR_ERR(rq);
321 blk_rq_set_block_pc(rq); 321 blk_rq_set_block_pc(rq);
322 322
323 if (hdr->cmd_len > BLK_MAX_CDB) { 323 if (hdr->cmd_len > BLK_MAX_CDB) {
@@ -387,7 +387,6 @@ out_free_cdb:
387 kfree(rq->cmd); 387 kfree(rq->cmd);
388out_put_request: 388out_put_request:
389 blk_put_request(rq); 389 blk_put_request(rq);
390out:
391 return ret; 390 return ret;
392} 391}
393 392
@@ -457,8 +456,8 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
457 } 456 }
458 457
459 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); 458 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
460 if (!rq) { 459 if (IS_ERR(rq)) {
461 err = -ENOMEM; 460 err = PTR_ERR(rq);
462 goto error; 461 goto error;
463 } 462 }
464 blk_rq_set_block_pc(rq); 463 blk_rq_set_block_pc(rq);
@@ -548,6 +547,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
548 int err; 547 int err;
549 548
550 rq = blk_get_request(q, WRITE, __GFP_WAIT); 549 rq = blk_get_request(q, WRITE, __GFP_WAIT);
550 if (IS_ERR(rq))
551 return PTR_ERR(rq);
551 blk_rq_set_block_pc(rq); 552 blk_rq_set_block_pc(rq);
552 rq->timeout = BLK_DEFAULT_SG_TIMEOUT; 553 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
553 rq->cmd[0] = cmd; 554 rq->cmd[0] = cmd;
diff --git a/block/t10-pi.c b/block/t10-pi.c
new file mode 100644
index 000000000000..24d6e9715318
--- /dev/null
+++ b/block/t10-pi.c
@@ -0,0 +1,197 @@
1/*
2 * t10_pi.c - Functions for generating and verifying T10 Protection
3 * Information.
4 *
5 * Copyright (C) 2007, 2008, 2014 Oracle Corporation
6 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; see the file COPYING. If not, write to
19 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
20 * USA.
21 *
22 */
23
24#include <linux/t10-pi.h>
25#include <linux/blkdev.h>
26#include <linux/crc-t10dif.h>
27#include <net/checksum.h>
28
29typedef __be16 (csum_fn) (void *, unsigned int);
30
31static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
32static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
33
34static __be16 t10_pi_crc_fn(void *data, unsigned int len)
35{
36 return cpu_to_be16(crc_t10dif(data, len));
37}
38
39static __be16 t10_pi_ip_fn(void *data, unsigned int len)
40{
41 return (__force __be16)ip_compute_csum(data, len);
42}
43
44/*
45 * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
46 * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
47 * tag.
48 */
49static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
50 unsigned int type)
51{
52 unsigned int i;
53
54 for (i = 0 ; i < iter->data_size ; i += iter->interval) {
55 struct t10_pi_tuple *pi = iter->prot_buf;
56
57 pi->guard_tag = fn(iter->data_buf, iter->interval);
58 pi->app_tag = 0;
59
60 if (type == 1)
61 pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
62 else
63 pi->ref_tag = 0;
64
65 iter->data_buf += iter->interval;
66 iter->prot_buf += sizeof(struct t10_pi_tuple);
67 iter->seed++;
68 }
69
70 return 0;
71}
72
73static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
74 unsigned int type)
75{
76 unsigned int i;
77
78 for (i = 0 ; i < iter->data_size ; i += iter->interval) {
79 struct t10_pi_tuple *pi = iter->prot_buf;
80 __be16 csum;
81
82 switch (type) {
83 case 1:
84 case 2:
85 if (pi->app_tag == APP_ESCAPE)
86 goto next;
87
88 if (be32_to_cpu(pi->ref_tag) !=
89 lower_32_bits(iter->seed)) {
90 pr_err("%s: ref tag error at location %llu " \
91 "(rcvd %u)\n", iter->disk_name,
92 (unsigned long long)
93 iter->seed, be32_to_cpu(pi->ref_tag));
94 return -EILSEQ;
95 }
96 break;
97 case 3:
98 if (pi->app_tag == APP_ESCAPE &&
99 pi->ref_tag == REF_ESCAPE)
100 goto next;
101 break;
102 }
103
104 csum = fn(iter->data_buf, iter->interval);
105
106 if (pi->guard_tag != csum) {
107 pr_err("%s: guard tag error at sector %llu " \
108 "(rcvd %04x, want %04x)\n", iter->disk_name,
109 (unsigned long long)iter->seed,
110 be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
111 return -EILSEQ;
112 }
113
114next:
115 iter->data_buf += iter->interval;
116 iter->prot_buf += sizeof(struct t10_pi_tuple);
117 iter->seed++;
118 }
119
120 return 0;
121}
122
123static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
124{
125 return t10_pi_generate(iter, t10_pi_crc_fn, 1);
126}
127
128static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
129{
130 return t10_pi_generate(iter, t10_pi_ip_fn, 1);
131}
132
133static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
134{
135 return t10_pi_verify(iter, t10_pi_crc_fn, 1);
136}
137
138static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
139{
140 return t10_pi_verify(iter, t10_pi_ip_fn, 1);
141}
142
143static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
144{
145 return t10_pi_generate(iter, t10_pi_crc_fn, 3);
146}
147
148static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
149{
150 return t10_pi_generate(iter, t10_pi_ip_fn, 3);
151}
152
153static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
154{
155 return t10_pi_verify(iter, t10_pi_crc_fn, 3);
156}
157
158static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
159{
160 return t10_pi_verify(iter, t10_pi_ip_fn, 3);
161}
162
163struct blk_integrity t10_pi_type1_crc = {
164 .name = "T10-DIF-TYPE1-CRC",
165 .generate_fn = t10_pi_type1_generate_crc,
166 .verify_fn = t10_pi_type1_verify_crc,
167 .tuple_size = sizeof(struct t10_pi_tuple),
168 .tag_size = 0,
169};
170EXPORT_SYMBOL(t10_pi_type1_crc);
171
172struct blk_integrity t10_pi_type1_ip = {
173 .name = "T10-DIF-TYPE1-IP",
174 .generate_fn = t10_pi_type1_generate_ip,
175 .verify_fn = t10_pi_type1_verify_ip,
176 .tuple_size = sizeof(struct t10_pi_tuple),
177 .tag_size = 0,
178};
179EXPORT_SYMBOL(t10_pi_type1_ip);
180
181struct blk_integrity t10_pi_type3_crc = {
182 .name = "T10-DIF-TYPE3-CRC",
183 .generate_fn = t10_pi_type3_generate_crc,
184 .verify_fn = t10_pi_type3_verify_crc,
185 .tuple_size = sizeof(struct t10_pi_tuple),
186 .tag_size = 0,
187};
188EXPORT_SYMBOL(t10_pi_type3_crc);
189
190struct blk_integrity t10_pi_type3_ip = {
191 .name = "T10-DIF-TYPE3-IP",
192 .generate_fn = t10_pi_type3_generate_ip,
193 .verify_fn = t10_pi_type3_verify_ip,
194 .tuple_size = sizeof(struct t10_pi_tuple),
195 .tag_size = 0,
196};
197EXPORT_SYMBOL(t10_pi_type3_ip);