aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/brd.c109
-rw-r--r--drivers/block/loop.c416
-rw-r--r--drivers/block/loop.h18
-rw-r--r--drivers/block/null_blk.c2
-rw-r--r--drivers/block/nvme-core.c9
5 files changed, 265 insertions, 289 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 89e90ec52f28..c01b921b1b4a 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -438,19 +438,18 @@ static const struct block_device_operations brd_fops = {
438/* 438/*
439 * And now the modules code and kernel interface. 439 * And now the modules code and kernel interface.
440 */ 440 */
441static int rd_nr; 441static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
442int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
443static int max_part;
444static int part_shift;
445static int part_show = 0;
446module_param(rd_nr, int, S_IRUGO); 442module_param(rd_nr, int, S_IRUGO);
447MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); 443MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
444
445int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
448module_param(rd_size, int, S_IRUGO); 446module_param(rd_size, int, S_IRUGO);
449MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); 447MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
448
449static int max_part = 1;
450module_param(max_part, int, S_IRUGO); 450module_param(max_part, int, S_IRUGO);
451MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk"); 451MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
452module_param(part_show, int, S_IRUGO); 452
453MODULE_PARM_DESC(part_show, "Control RAM disk visibility in /proc/partitions");
454MODULE_LICENSE("GPL"); 453MODULE_LICENSE("GPL");
455MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); 454MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
456MODULE_ALIAS("rd"); 455MODULE_ALIAS("rd");
@@ -487,25 +486,33 @@ static struct brd_device *brd_alloc(int i)
487 brd->brd_queue = blk_alloc_queue(GFP_KERNEL); 486 brd->brd_queue = blk_alloc_queue(GFP_KERNEL);
488 if (!brd->brd_queue) 487 if (!brd->brd_queue)
489 goto out_free_dev; 488 goto out_free_dev;
489
490 blk_queue_make_request(brd->brd_queue, brd_make_request); 490 blk_queue_make_request(brd->brd_queue, brd_make_request);
491 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 491 blk_queue_max_hw_sectors(brd->brd_queue, 1024);
492 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); 492 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
493 493
494 /* This is so fdisk will align partitions on 4k, because of
495 * direct_access API needing 4k alignment, returning a PFN
496 * (This is only a problem on very small devices <= 4M,
497 * otherwise fdisk will align on 1M. Regardless this call
498 * is harmless)
499 */
500 blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
501
494 brd->brd_queue->limits.discard_granularity = PAGE_SIZE; 502 brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
495 brd->brd_queue->limits.max_discard_sectors = UINT_MAX; 503 brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
496 brd->brd_queue->limits.discard_zeroes_data = 1; 504 brd->brd_queue->limits.discard_zeroes_data = 1;
497 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue); 505 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
498 506
499 disk = brd->brd_disk = alloc_disk(1 << part_shift); 507 disk = brd->brd_disk = alloc_disk(max_part);
500 if (!disk) 508 if (!disk)
501 goto out_free_queue; 509 goto out_free_queue;
502 disk->major = RAMDISK_MAJOR; 510 disk->major = RAMDISK_MAJOR;
503 disk->first_minor = i << part_shift; 511 disk->first_minor = i * max_part;
504 disk->fops = &brd_fops; 512 disk->fops = &brd_fops;
505 disk->private_data = brd; 513 disk->private_data = brd;
506 disk->queue = brd->brd_queue; 514 disk->queue = brd->brd_queue;
507 if (!part_show) 515 disk->flags = GENHD_FL_EXT_DEVT;
508 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
509 sprintf(disk->disk_name, "ram%d", i); 516 sprintf(disk->disk_name, "ram%d", i);
510 set_capacity(disk, rd_size * 2); 517 set_capacity(disk, rd_size * 2);
511 518
@@ -527,10 +534,11 @@ static void brd_free(struct brd_device *brd)
527 kfree(brd); 534 kfree(brd);
528} 535}
529 536
530static struct brd_device *brd_init_one(int i) 537static struct brd_device *brd_init_one(int i, bool *new)
531{ 538{
532 struct brd_device *brd; 539 struct brd_device *brd;
533 540
541 *new = false;
534 list_for_each_entry(brd, &brd_devices, brd_list) { 542 list_for_each_entry(brd, &brd_devices, brd_list) {
535 if (brd->brd_number == i) 543 if (brd->brd_number == i)
536 goto out; 544 goto out;
@@ -541,6 +549,7 @@ static struct brd_device *brd_init_one(int i)
541 add_disk(brd->brd_disk); 549 add_disk(brd->brd_disk);
542 list_add_tail(&brd->brd_list, &brd_devices); 550 list_add_tail(&brd->brd_list, &brd_devices);
543 } 551 }
552 *new = true;
544out: 553out:
545 return brd; 554 return brd;
546} 555}
@@ -556,70 +565,46 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
556{ 565{
557 struct brd_device *brd; 566 struct brd_device *brd;
558 struct kobject *kobj; 567 struct kobject *kobj;
568 bool new;
559 569
560 mutex_lock(&brd_devices_mutex); 570 mutex_lock(&brd_devices_mutex);
561 brd = brd_init_one(MINOR(dev) >> part_shift); 571 brd = brd_init_one(MINOR(dev) / max_part, &new);
562 kobj = brd ? get_disk(brd->brd_disk) : NULL; 572 kobj = brd ? get_disk(brd->brd_disk) : NULL;
563 mutex_unlock(&brd_devices_mutex); 573 mutex_unlock(&brd_devices_mutex);
564 574
565 *part = 0; 575 if (new)
576 *part = 0;
577
566 return kobj; 578 return kobj;
567} 579}
568 580
569static int __init brd_init(void) 581static int __init brd_init(void)
570{ 582{
571 int i, nr;
572 unsigned long range;
573 struct brd_device *brd, *next; 583 struct brd_device *brd, *next;
584 int i;
574 585
575 /* 586 /*
576 * brd module now has a feature to instantiate underlying device 587 * brd module now has a feature to instantiate underlying device
577 * structure on-demand, provided that there is an access dev node. 588 * structure on-demand, provided that there is an access dev node.
578 * However, this will not work well with user space tool that doesn't
579 * know about such "feature". In order to not break any existing
580 * tool, we do the following:
581 * 589 *
582 * (1) if rd_nr is specified, create that many upfront, and this 590 * (1) if rd_nr is specified, create that many upfront. else
583 * also becomes a hard limit. 591 * it defaults to CONFIG_BLK_DEV_RAM_COUNT
584 * (2) if rd_nr is not specified, create CONFIG_BLK_DEV_RAM_COUNT 592 * (2) User can further extend brd devices by create dev node themselves
585 * (default 16) rd device on module load, user can further 593 * and have kernel automatically instantiate actual device
586 * extend brd device by create dev node themselves and have 594 * on-demand. Example:
587 * kernel automatically instantiate actual device on-demand. 595 * mknod /path/devnod_name b 1 X # 1 is the rd major
596 * fdisk -l /path/devnod_name
597 * If (X / max_part) was not already created it will be created
598 * dynamically.
588 */ 599 */
589 600
590 part_shift = 0;
591 if (max_part > 0) {
592 part_shift = fls(max_part);
593
594 /*
595 * Adjust max_part according to part_shift as it is exported
596 * to user space so that user can decide correct minor number
597 * if [s]he want to create more devices.
598 *
599 * Note that -1 is required because partition 0 is reserved
600 * for the whole disk.
601 */
602 max_part = (1UL << part_shift) - 1;
603 }
604
605 if ((1UL << part_shift) > DISK_MAX_PARTS)
606 return -EINVAL;
607
608 if (rd_nr > 1UL << (MINORBITS - part_shift))
609 return -EINVAL;
610
611 if (rd_nr) {
612 nr = rd_nr;
613 range = rd_nr << part_shift;
614 } else {
615 nr = CONFIG_BLK_DEV_RAM_COUNT;
616 range = 1UL << MINORBITS;
617 }
618
619 if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) 601 if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
620 return -EIO; 602 return -EIO;
621 603
622 for (i = 0; i < nr; i++) { 604 if (unlikely(!max_part))
605 max_part = 1;
606
607 for (i = 0; i < rd_nr; i++) {
623 brd = brd_alloc(i); 608 brd = brd_alloc(i);
624 if (!brd) 609 if (!brd)
625 goto out_free; 610 goto out_free;
@@ -631,10 +616,10 @@ static int __init brd_init(void)
631 list_for_each_entry(brd, &brd_devices, brd_list) 616 list_for_each_entry(brd, &brd_devices, brd_list)
632 add_disk(brd->brd_disk); 617 add_disk(brd->brd_disk);
633 618
634 blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range, 619 blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
635 THIS_MODULE, brd_probe, NULL, NULL); 620 THIS_MODULE, brd_probe, NULL, NULL);
636 621
637 printk(KERN_INFO "brd: module loaded\n"); 622 pr_info("brd: module loaded\n");
638 return 0; 623 return 0;
639 624
640out_free: 625out_free:
@@ -644,21 +629,21 @@ out_free:
644 } 629 }
645 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 630 unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
646 631
632 pr_info("brd: module NOT loaded !!!\n");
647 return -ENOMEM; 633 return -ENOMEM;
648} 634}
649 635
650static void __exit brd_exit(void) 636static void __exit brd_exit(void)
651{ 637{
652 unsigned long range;
653 struct brd_device *brd, *next; 638 struct brd_device *brd, *next;
654 639
655 range = rd_nr ? rd_nr << part_shift : 1UL << MINORBITS;
656
657 list_for_each_entry_safe(brd, next, &brd_devices, brd_list) 640 list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
658 brd_del_one(brd); 641 brd_del_one(brd);
659 642
660 blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range); 643 blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS);
661 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 644 unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
645
646 pr_info("brd: module unloaded\n");
662} 647}
663 648
664module_init(brd_init); 649module_init(brd_init);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6cb1beb47c25..d1f168b73634 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -85,6 +85,8 @@ static DEFINE_MUTEX(loop_index_mutex);
85static int max_part; 85static int max_part;
86static int part_shift; 86static int part_shift;
87 87
88static struct workqueue_struct *loop_wq;
89
88/* 90/*
89 * Transfer functions 91 * Transfer functions
90 */ 92 */
@@ -284,12 +286,12 @@ static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
284 return ret; 286 return ret;
285} 287}
286 288
287static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) 289static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos)
288{ 290{
289 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, 291 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
290 struct page *page); 292 struct page *page);
291 struct bio_vec bvec; 293 struct bio_vec bvec;
292 struct bvec_iter iter; 294 struct req_iterator iter;
293 struct page *page = NULL; 295 struct page *page = NULL;
294 int ret = 0; 296 int ret = 0;
295 297
@@ -303,7 +305,7 @@ static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
303 do_lo_send = do_lo_send_direct_write; 305 do_lo_send = do_lo_send_direct_write;
304 } 306 }
305 307
306 bio_for_each_segment(bvec, bio, iter) { 308 rq_for_each_segment(bvec, rq, iter) {
307 ret = do_lo_send(lo, &bvec, pos, page); 309 ret = do_lo_send(lo, &bvec, pos, page);
308 if (ret < 0) 310 if (ret < 0)
309 break; 311 break;
@@ -391,19 +393,22 @@ do_lo_receive(struct loop_device *lo,
391} 393}
392 394
393static int 395static int
394lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) 396lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos)
395{ 397{
396 struct bio_vec bvec; 398 struct bio_vec bvec;
397 struct bvec_iter iter; 399 struct req_iterator iter;
398 ssize_t s; 400 ssize_t s;
399 401
400 bio_for_each_segment(bvec, bio, iter) { 402 rq_for_each_segment(bvec, rq, iter) {
401 s = do_lo_receive(lo, &bvec, bsize, pos); 403 s = do_lo_receive(lo, &bvec, bsize, pos);
402 if (s < 0) 404 if (s < 0)
403 return s; 405 return s;
404 406
405 if (s != bvec.bv_len) { 407 if (s != bvec.bv_len) {
406 zero_fill_bio(bio); 408 struct bio *bio;
409
410 __rq_for_each_bio(bio, rq)
411 zero_fill_bio(bio);
407 break; 412 break;
408 } 413 }
409 pos += bvec.bv_len; 414 pos += bvec.bv_len;
@@ -411,106 +416,58 @@ lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
411 return 0; 416 return 0;
412} 417}
413 418
414static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) 419static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
415{ 420{
416 loff_t pos; 421 /*
422 * We use punch hole to reclaim the free space used by the
423 * image a.k.a. discard. However we do not support discard if
424 * encryption is enabled, because it may give an attacker
425 * useful information.
426 */
427 struct file *file = lo->lo_backing_file;
428 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
417 int ret; 429 int ret;
418 430
419 pos = ((loff_t) bio->bi_iter.bi_sector << 9) + lo->lo_offset; 431 if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
420 432 ret = -EOPNOTSUPP;
421 if (bio_rw(bio) == WRITE) { 433 goto out;
422 struct file *file = lo->lo_backing_file; 434 }
423
424 if (bio->bi_rw & REQ_FLUSH) {
425 ret = vfs_fsync(file, 0);
426 if (unlikely(ret && ret != -EINVAL)) {
427 ret = -EIO;
428 goto out;
429 }
430 }
431
432 /*
433 * We use punch hole to reclaim the free space used by the
434 * image a.k.a. discard. However we do not support discard if
435 * encryption is enabled, because it may give an attacker
436 * useful information.
437 */
438 if (bio->bi_rw & REQ_DISCARD) {
439 struct file *file = lo->lo_backing_file;
440 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
441
442 if ((!file->f_op->fallocate) ||
443 lo->lo_encrypt_key_size) {
444 ret = -EOPNOTSUPP;
445 goto out;
446 }
447 ret = file->f_op->fallocate(file, mode, pos,
448 bio->bi_iter.bi_size);
449 if (unlikely(ret && ret != -EINVAL &&
450 ret != -EOPNOTSUPP))
451 ret = -EIO;
452 goto out;
453 }
454
455 ret = lo_send(lo, bio, pos);
456
457 if ((bio->bi_rw & REQ_FUA) && !ret) {
458 ret = vfs_fsync(file, 0);
459 if (unlikely(ret && ret != -EINVAL))
460 ret = -EIO;
461 }
462 } else
463 ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
464 435
465out: 436 ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq));
437 if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
438 ret = -EIO;
439 out:
466 return ret; 440 return ret;
467} 441}
468 442
469/* 443static int lo_req_flush(struct loop_device *lo, struct request *rq)
470 * Add bio to back of pending list
471 */
472static void loop_add_bio(struct loop_device *lo, struct bio *bio)
473{ 444{
474 lo->lo_bio_count++; 445 struct file *file = lo->lo_backing_file;
475 bio_list_add(&lo->lo_bio_list, bio); 446 int ret = vfs_fsync(file, 0);
476} 447 if (unlikely(ret && ret != -EINVAL))
448 ret = -EIO;
477 449
478/* 450 return ret;
479 * Grab first pending buffer
480 */
481static struct bio *loop_get_bio(struct loop_device *lo)
482{
483 lo->lo_bio_count--;
484 return bio_list_pop(&lo->lo_bio_list);
485} 451}
486 452
487static void loop_make_request(struct request_queue *q, struct bio *old_bio) 453static int do_req_filebacked(struct loop_device *lo, struct request *rq)
488{ 454{
489 struct loop_device *lo = q->queuedata; 455 loff_t pos;
490 int rw = bio_rw(old_bio); 456 int ret;
491
492 if (rw == READA)
493 rw = READ;
494 457
495 BUG_ON(!lo || (rw != READ && rw != WRITE)); 458 pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
496 459
497 spin_lock_irq(&lo->lo_lock); 460 if (rq->cmd_flags & REQ_WRITE) {
498 if (lo->lo_state != Lo_bound) 461 if (rq->cmd_flags & REQ_FLUSH)
499 goto out; 462 ret = lo_req_flush(lo, rq);
500 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) 463 else if (rq->cmd_flags & REQ_DISCARD)
501 goto out; 464 ret = lo_discard(lo, rq, pos);
502 if (lo->lo_bio_count >= q->nr_congestion_on) 465 else
503 wait_event_lock_irq(lo->lo_req_wait, 466 ret = lo_send(lo, rq, pos);
504 lo->lo_bio_count < q->nr_congestion_off, 467 } else
505 lo->lo_lock); 468 ret = lo_receive(lo, rq, lo->lo_blocksize, pos);
506 loop_add_bio(lo, old_bio);
507 wake_up(&lo->lo_event);
508 spin_unlock_irq(&lo->lo_lock);
509 return;
510 469
511out: 470 return ret;
512 spin_unlock_irq(&lo->lo_lock);
513 bio_io_error(old_bio);
514} 471}
515 472
516struct switch_request { 473struct switch_request {
@@ -518,57 +475,26 @@ struct switch_request {
518 struct completion wait; 475 struct completion wait;
519}; 476};
520 477
521static void do_loop_switch(struct loop_device *, struct switch_request *);
522
523static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
524{
525 if (unlikely(!bio->bi_bdev)) {
526 do_loop_switch(lo, bio->bi_private);
527 bio_put(bio);
528 } else {
529 int ret = do_bio_filebacked(lo, bio);
530 bio_endio(bio, ret);
531 }
532}
533
534/* 478/*
535 * worker thread that handles reads/writes to file backed loop devices, 479 * Do the actual switch; called from the BIO completion routine
536 * to avoid blocking in our make_request_fn. it also does loop decrypting
537 * on reads for block backed loop, as that is too heavy to do from
538 * b_end_io context where irqs may be disabled.
539 *
540 * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before
541 * calling kthread_stop(). Therefore once kthread_should_stop() is
542 * true, make_request will not place any more requests. Therefore
543 * once kthread_should_stop() is true and lo_bio is NULL, we are
544 * done with the loop.
545 */ 480 */
546static int loop_thread(void *data) 481static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
547{ 482{
548 struct loop_device *lo = data; 483 struct file *file = p->file;
549 struct bio *bio; 484 struct file *old_file = lo->lo_backing_file;
550 485 struct address_space *mapping;
551 set_user_nice(current, MIN_NICE);
552
553 while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
554
555 wait_event_interruptible(lo->lo_event,
556 !bio_list_empty(&lo->lo_bio_list) ||
557 kthread_should_stop());
558
559 if (bio_list_empty(&lo->lo_bio_list))
560 continue;
561 spin_lock_irq(&lo->lo_lock);
562 bio = loop_get_bio(lo);
563 if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
564 wake_up(&lo->lo_req_wait);
565 spin_unlock_irq(&lo->lo_lock);
566 486
567 BUG_ON(!bio); 487 /* if no new file, only flush of queued bios requested */
568 loop_handle_bio(lo, bio); 488 if (!file)
569 } 489 return;
570 490
571 return 0; 491 mapping = file->f_mapping;
492 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
493 lo->lo_backing_file = file;
494 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
495 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
496 lo->old_gfp_mask = mapping_gfp_mask(mapping);
497 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
572} 498}
573 499
574/* 500/*
@@ -579,15 +505,18 @@ static int loop_thread(void *data)
579static int loop_switch(struct loop_device *lo, struct file *file) 505static int loop_switch(struct loop_device *lo, struct file *file)
580{ 506{
581 struct switch_request w; 507 struct switch_request w;
582 struct bio *bio = bio_alloc(GFP_KERNEL, 0); 508
583 if (!bio)
584 return -ENOMEM;
585 init_completion(&w.wait);
586 w.file = file; 509 w.file = file;
587 bio->bi_private = &w; 510
588 bio->bi_bdev = NULL; 511 /* freeze queue and wait for completion of scheduled requests */
589 loop_make_request(lo->lo_queue, bio); 512 blk_mq_freeze_queue(lo->lo_queue);
590 wait_for_completion(&w.wait); 513
514 /* do the switch action */
515 do_loop_switch(lo, &w);
516
517 /* unfreeze */
518 blk_mq_unfreeze_queue(lo->lo_queue);
519
591 return 0; 520 return 0;
592} 521}
593 522
@@ -596,39 +525,10 @@ static int loop_switch(struct loop_device *lo, struct file *file)
596 */ 525 */
597static int loop_flush(struct loop_device *lo) 526static int loop_flush(struct loop_device *lo)
598{ 527{
599 /* loop not yet configured, no running thread, nothing to flush */
600 if (!lo->lo_thread)
601 return 0;
602
603 return loop_switch(lo, NULL); 528 return loop_switch(lo, NULL);
604} 529}
605 530
606/* 531/*
607 * Do the actual switch; called from the BIO completion routine
608 */
609static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
610{
611 struct file *file = p->file;
612 struct file *old_file = lo->lo_backing_file;
613 struct address_space *mapping;
614
615 /* if no new file, only flush of queued bios requested */
616 if (!file)
617 goto out;
618
619 mapping = file->f_mapping;
620 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
621 lo->lo_backing_file = file;
622 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
623 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
624 lo->old_gfp_mask = mapping_gfp_mask(mapping);
625 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
626out:
627 complete(&p->wait);
628}
629
630
631/*
632 * loop_change_fd switched the backing store of a loopback device to 532 * loop_change_fd switched the backing store of a loopback device to
633 * a new file. This is useful for operating system installers to free up 533 * a new file. This is useful for operating system installers to free up
634 * the original file and in High Availability environments to switch to 534 * the original file and in High Availability environments to switch to
@@ -889,12 +789,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
889 lo->transfer = transfer_none; 789 lo->transfer = transfer_none;
890 lo->ioctl = NULL; 790 lo->ioctl = NULL;
891 lo->lo_sizelimit = 0; 791 lo->lo_sizelimit = 0;
892 lo->lo_bio_count = 0;
893 lo->old_gfp_mask = mapping_gfp_mask(mapping); 792 lo->old_gfp_mask = mapping_gfp_mask(mapping);
894 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 793 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
895 794
896 bio_list_init(&lo->lo_bio_list);
897
898 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 795 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
899 blk_queue_flush(lo->lo_queue, REQ_FLUSH); 796 blk_queue_flush(lo->lo_queue, REQ_FLUSH);
900 797
@@ -906,14 +803,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
906 803
907 set_blocksize(bdev, lo_blocksize); 804 set_blocksize(bdev, lo_blocksize);
908 805
909 lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
910 lo->lo_number);
911 if (IS_ERR(lo->lo_thread)) {
912 error = PTR_ERR(lo->lo_thread);
913 goto out_clr;
914 }
915 lo->lo_state = Lo_bound; 806 lo->lo_state = Lo_bound;
916 wake_up_process(lo->lo_thread);
917 if (part_shift) 807 if (part_shift)
918 lo->lo_flags |= LO_FLAGS_PARTSCAN; 808 lo->lo_flags |= LO_FLAGS_PARTSCAN;
919 if (lo->lo_flags & LO_FLAGS_PARTSCAN) 809 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
@@ -925,18 +815,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
925 bdgrab(bdev); 815 bdgrab(bdev);
926 return 0; 816 return 0;
927 817
928out_clr:
929 loop_sysfs_exit(lo);
930 lo->lo_thread = NULL;
931 lo->lo_device = NULL;
932 lo->lo_backing_file = NULL;
933 lo->lo_flags = 0;
934 set_capacity(lo->lo_disk, 0);
935 invalidate_bdev(bdev);
936 bd_set_size(bdev, 0);
937 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
938 mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
939 lo->lo_state = Lo_unbound;
940 out_putf: 818 out_putf:
941 fput(file); 819 fput(file);
942 out: 820 out:
@@ -1012,11 +890,6 @@ static int loop_clr_fd(struct loop_device *lo)
1012 890
1013 spin_lock_irq(&lo->lo_lock); 891 spin_lock_irq(&lo->lo_lock);
1014 lo->lo_state = Lo_rundown; 892 lo->lo_state = Lo_rundown;
1015 spin_unlock_irq(&lo->lo_lock);
1016
1017 kthread_stop(lo->lo_thread);
1018
1019 spin_lock_irq(&lo->lo_lock);
1020 lo->lo_backing_file = NULL; 893 lo->lo_backing_file = NULL;
1021 spin_unlock_irq(&lo->lo_lock); 894 spin_unlock_irq(&lo->lo_lock);
1022 895
@@ -1028,7 +901,6 @@ static int loop_clr_fd(struct loop_device *lo)
1028 lo->lo_offset = 0; 901 lo->lo_offset = 0;
1029 lo->lo_sizelimit = 0; 902 lo->lo_sizelimit = 0;
1030 lo->lo_encrypt_key_size = 0; 903 lo->lo_encrypt_key_size = 0;
1031 lo->lo_thread = NULL;
1032 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 904 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1033 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 905 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1034 memset(lo->lo_file_name, 0, LO_NAME_SIZE); 906 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
@@ -1601,6 +1473,105 @@ int loop_unregister_transfer(int number)
1601EXPORT_SYMBOL(loop_register_transfer); 1473EXPORT_SYMBOL(loop_register_transfer);
1602EXPORT_SYMBOL(loop_unregister_transfer); 1474EXPORT_SYMBOL(loop_unregister_transfer);
1603 1475
1476static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1477 const struct blk_mq_queue_data *bd)
1478{
1479 struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
1480
1481 blk_mq_start_request(bd->rq);
1482
1483 if (cmd->rq->cmd_flags & REQ_WRITE) {
1484 struct loop_device *lo = cmd->rq->q->queuedata;
1485 bool need_sched = true;
1486
1487 spin_lock_irq(&lo->lo_lock);
1488 if (lo->write_started)
1489 need_sched = false;
1490 else
1491 lo->write_started = true;
1492 list_add_tail(&cmd->list, &lo->write_cmd_head);
1493 spin_unlock_irq(&lo->lo_lock);
1494
1495 if (need_sched)
1496 queue_work(loop_wq, &lo->write_work);
1497 } else {
1498 queue_work(loop_wq, &cmd->read_work);
1499 }
1500
1501 return BLK_MQ_RQ_QUEUE_OK;
1502}
1503
1504static void loop_handle_cmd(struct loop_cmd *cmd)
1505{
1506 const bool write = cmd->rq->cmd_flags & REQ_WRITE;
1507 struct loop_device *lo = cmd->rq->q->queuedata;
1508 int ret = -EIO;
1509
1510 if (lo->lo_state != Lo_bound)
1511 goto failed;
1512
1513 if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
1514 goto failed;
1515
1516 ret = do_req_filebacked(lo, cmd->rq);
1517
1518 failed:
1519 if (ret)
1520 cmd->rq->errors = -EIO;
1521 blk_mq_complete_request(cmd->rq);
1522}
1523
1524static void loop_queue_write_work(struct work_struct *work)
1525{
1526 struct loop_device *lo =
1527 container_of(work, struct loop_device, write_work);
1528 LIST_HEAD(cmd_list);
1529
1530 spin_lock_irq(&lo->lo_lock);
1531 repeat:
1532 list_splice_init(&lo->write_cmd_head, &cmd_list);
1533 spin_unlock_irq(&lo->lo_lock);
1534
1535 while (!list_empty(&cmd_list)) {
1536 struct loop_cmd *cmd = list_first_entry(&cmd_list,
1537 struct loop_cmd, list);
1538 list_del_init(&cmd->list);
1539 loop_handle_cmd(cmd);
1540 }
1541
1542 spin_lock_irq(&lo->lo_lock);
1543 if (!list_empty(&lo->write_cmd_head))
1544 goto repeat;
1545 lo->write_started = false;
1546 spin_unlock_irq(&lo->lo_lock);
1547}
1548
1549static void loop_queue_read_work(struct work_struct *work)
1550{
1551 struct loop_cmd *cmd =
1552 container_of(work, struct loop_cmd, read_work);
1553
1554 loop_handle_cmd(cmd);
1555}
1556
1557static int loop_init_request(void *data, struct request *rq,
1558 unsigned int hctx_idx, unsigned int request_idx,
1559 unsigned int numa_node)
1560{
1561 struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
1562
1563 cmd->rq = rq;
1564 INIT_WORK(&cmd->read_work, loop_queue_read_work);
1565
1566 return 0;
1567}
1568
1569static struct blk_mq_ops loop_mq_ops = {
1570 .queue_rq = loop_queue_rq,
1571 .map_queue = blk_mq_map_queue,
1572 .init_request = loop_init_request,
1573};
1574
1604static int loop_add(struct loop_device **l, int i) 1575static int loop_add(struct loop_device **l, int i)
1605{ 1576{
1606 struct loop_device *lo; 1577 struct loop_device *lo;
@@ -1627,16 +1598,28 @@ static int loop_add(struct loop_device **l, int i)
1627 i = err; 1598 i = err;
1628 1599
1629 err = -ENOMEM; 1600 err = -ENOMEM;
1630 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1601 lo->tag_set.ops = &loop_mq_ops;
1631 if (!lo->lo_queue) 1602 lo->tag_set.nr_hw_queues = 1;
1603 lo->tag_set.queue_depth = 128;
1604 lo->tag_set.numa_node = NUMA_NO_NODE;
1605 lo->tag_set.cmd_size = sizeof(struct loop_cmd);
1606 lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
1607 lo->tag_set.driver_data = lo;
1608
1609 err = blk_mq_alloc_tag_set(&lo->tag_set);
1610 if (err)
1632 goto out_free_idr; 1611 goto out_free_idr;
1633 1612
1634 /* 1613 lo->lo_queue = blk_mq_init_queue(&lo->tag_set);
1635 * set queue make_request_fn 1614 if (IS_ERR_OR_NULL(lo->lo_queue)) {
1636 */ 1615 err = PTR_ERR(lo->lo_queue);
1637 blk_queue_make_request(lo->lo_queue, loop_make_request); 1616 goto out_cleanup_tags;
1617 }
1638 lo->lo_queue->queuedata = lo; 1618 lo->lo_queue->queuedata = lo;
1639 1619
1620 INIT_LIST_HEAD(&lo->write_cmd_head);
1621 INIT_WORK(&lo->write_work, loop_queue_write_work);
1622
1640 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1623 disk = lo->lo_disk = alloc_disk(1 << part_shift);
1641 if (!disk) 1624 if (!disk)
1642 goto out_free_queue; 1625 goto out_free_queue;
@@ -1664,9 +1647,6 @@ static int loop_add(struct loop_device **l, int i)
1664 disk->flags |= GENHD_FL_EXT_DEVT; 1647 disk->flags |= GENHD_FL_EXT_DEVT;
1665 mutex_init(&lo->lo_ctl_mutex); 1648 mutex_init(&lo->lo_ctl_mutex);
1666 lo->lo_number = i; 1649 lo->lo_number = i;
1667 lo->lo_thread = NULL;
1668 init_waitqueue_head(&lo->lo_event);
1669 init_waitqueue_head(&lo->lo_req_wait);
1670 spin_lock_init(&lo->lo_lock); 1650 spin_lock_init(&lo->lo_lock);
1671 disk->major = LOOP_MAJOR; 1651 disk->major = LOOP_MAJOR;
1672 disk->first_minor = i << part_shift; 1652 disk->first_minor = i << part_shift;
@@ -1680,6 +1660,8 @@ static int loop_add(struct loop_device **l, int i)
1680 1660
1681out_free_queue: 1661out_free_queue:
1682 blk_cleanup_queue(lo->lo_queue); 1662 blk_cleanup_queue(lo->lo_queue);
1663out_cleanup_tags:
1664 blk_mq_free_tag_set(&lo->tag_set);
1683out_free_idr: 1665out_free_idr:
1684 idr_remove(&loop_index_idr, i); 1666 idr_remove(&loop_index_idr, i);
1685out_free_dev: 1667out_free_dev:
@@ -1692,6 +1674,7 @@ static void loop_remove(struct loop_device *lo)
1692{ 1674{
1693 del_gendisk(lo->lo_disk); 1675 del_gendisk(lo->lo_disk);
1694 blk_cleanup_queue(lo->lo_queue); 1676 blk_cleanup_queue(lo->lo_queue);
1677 blk_mq_free_tag_set(&lo->tag_set);
1695 put_disk(lo->lo_disk); 1678 put_disk(lo->lo_disk);
1696 kfree(lo); 1679 kfree(lo);
1697} 1680}
@@ -1875,6 +1858,13 @@ static int __init loop_init(void)
1875 goto misc_out; 1858 goto misc_out;
1876 } 1859 }
1877 1860
1861 loop_wq = alloc_workqueue("kloopd",
1862 WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
1863 if (!loop_wq) {
1864 err = -ENOMEM;
1865 goto misc_out;
1866 }
1867
1878 blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 1868 blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1879 THIS_MODULE, loop_probe, NULL, NULL); 1869 THIS_MODULE, loop_probe, NULL, NULL);
1880 1870
@@ -1912,6 +1902,8 @@ static void __exit loop_exit(void)
1912 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 1902 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1913 unregister_blkdev(LOOP_MAJOR, "loop"); 1903 unregister_blkdev(LOOP_MAJOR, "loop");
1914 1904
1905 destroy_workqueue(loop_wq);
1906
1915 misc_deregister(&loop_misc); 1907 misc_deregister(&loop_misc);
1916} 1908}
1917 1909
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 90df5d6485b6..301c27f8323f 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -11,8 +11,10 @@
11 11
12#include <linux/bio.h> 12#include <linux/bio.h>
13#include <linux/blkdev.h> 13#include <linux/blkdev.h>
14#include <linux/blk-mq.h>
14#include <linux/spinlock.h> 15#include <linux/spinlock.h>
15#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/workqueue.h>
16#include <uapi/linux/loop.h> 18#include <uapi/linux/loop.h>
17 19
18/* Possible states of device */ 20/* Possible states of device */
@@ -52,19 +54,23 @@ struct loop_device {
52 gfp_t old_gfp_mask; 54 gfp_t old_gfp_mask;
53 55
54 spinlock_t lo_lock; 56 spinlock_t lo_lock;
55 struct bio_list lo_bio_list; 57 struct list_head write_cmd_head;
56 unsigned int lo_bio_count; 58 struct work_struct write_work;
59 bool write_started;
57 int lo_state; 60 int lo_state;
58 struct mutex lo_ctl_mutex; 61 struct mutex lo_ctl_mutex;
59 struct task_struct *lo_thread;
60 wait_queue_head_t lo_event;
61 /* wait queue for incoming requests */
62 wait_queue_head_t lo_req_wait;
63 62
64 struct request_queue *lo_queue; 63 struct request_queue *lo_queue;
64 struct blk_mq_tag_set tag_set;
65 struct gendisk *lo_disk; 65 struct gendisk *lo_disk;
66}; 66};
67 67
68struct loop_cmd {
69 struct work_struct read_work;
70 struct request *rq;
71 struct list_head list;
72};
73
68/* Support for loadable transfer modules */ 74/* Support for loadable transfer modules */
69struct loop_func_table { 75struct loop_func_table {
70 int number; /* filter type */ 76 int number; /* filter type */
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index aa2224aa7caa..65cd61a4145e 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -579,7 +579,7 @@ static int null_add_dev(void)
579 sector_div(size, bs); 579 sector_div(size, bs);
580 set_capacity(disk, size); 580 set_capacity(disk, size);
581 581
582 disk->flags |= GENHD_FL_EXT_DEVT; 582 disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
583 disk->major = null_major; 583 disk->major = null_major;
584 disk->first_minor = nullb->index; 584 disk->first_minor = nullb->index;
585 disk->fops = &null_fops; 585 disk->fops = &null_fops;
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index f7d083bb3bd5..f4aa64160838 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1108,21 +1108,14 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
1108 1108
1109static void nvme_free_queues(struct nvme_dev *dev, int lowest) 1109static void nvme_free_queues(struct nvme_dev *dev, int lowest)
1110{ 1110{
1111 LLIST_HEAD(q_list);
1112 struct nvme_queue *nvmeq, *next;
1113 struct llist_node *entry;
1114 int i; 1111 int i;
1115 1112
1116 for (i = dev->queue_count - 1; i >= lowest; i--) { 1113 for (i = dev->queue_count - 1; i >= lowest; i--) {
1117 struct nvme_queue *nvmeq = dev->queues[i]; 1114 struct nvme_queue *nvmeq = dev->queues[i];
1118 llist_add(&nvmeq->node, &q_list);
1119 dev->queue_count--; 1115 dev->queue_count--;
1120 dev->queues[i] = NULL; 1116 dev->queues[i] = NULL;
1121 }
1122 synchronize_rcu();
1123 entry = llist_del_all(&q_list);
1124 llist_for_each_entry_safe(nvmeq, next, entry, node)
1125 nvme_free_queue(nvmeq); 1117 nvme_free_queue(nvmeq);
1118 }
1126} 1119}
1127 1120
1128/** 1121/**