aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBart Van Assche <bart.vanassche@wdc.com>2018-04-16 21:04:41 -0400
committerMartin K. Petersen <martin.petersen@oracle.com>2018-04-19 00:04:10 -0400
commitccce20fc7968d546fb1e8e147bf5cdc8afc4278a (patch)
tree03eb832a953db5bfb9801adb4e6a08a25494c6b1
parent505aa4b6a8834a2300971c5220c380c3271ebde3 (diff)
scsi: sd_zbc: Avoid that resetting a zone fails sporadically
Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is called from sd_probe_async() and since sd_revalidate_disk() calls sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called concurrently with blkdev_report_zones() and/or blkdev_reset_zones(). That can cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets q->nr_zones to zero before restoring it to the actual value, even if no drive characteristics have changed. Avoid that this can happen by making the following changes: - Protect the code that updates zone information with blk_queue_enter() and blk_queue_exit(). - Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that these functions do not modify struct scsi_disk before all zone information has been obtained. Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track legacy requests"; kernel v4.15) the request queue freezing mechanism also affects legacy request queues. Fixes: 89d947561077 ("sd: Implement support for ZBC devices") Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Damien Le Moal <damien.lemoal@wdc.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.com> Cc: stable@vger.kernel.org # v4.16 Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/sd_zbc.c140
-rw-r--r--include/linux/blkdev.h5
2 files changed, 87 insertions, 58 deletions
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 41df75eea57b..210407cd2341 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
400 * 400 *
401 * Check that all zones of the device are equal. The last zone can however 401 * Check that all zones of the device are equal. The last zone can however
402 * be smaller. The zone size must also be a power of two number of LBAs. 402 * be smaller. The zone size must also be a power of two number of LBAs.
403 *
404 * Returns the zone size in bytes upon success or an error code upon failure.
403 */ 405 */
404static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) 406static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
405{ 407{
406 u64 zone_blocks = 0; 408 u64 zone_blocks = 0;
407 sector_t block = 0; 409 sector_t block = 0;
@@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
412 int ret; 414 int ret;
413 u8 same; 415 u8 same;
414 416
415 sdkp->zone_blocks = 0;
416
417 /* Get a buffer */ 417 /* Get a buffer */
418 buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); 418 buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
419 if (!buf) 419 if (!buf)
@@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
445 445
446 /* Parse zone descriptors */ 446 /* Parse zone descriptors */
447 while (rec < buf + buf_len) { 447 while (rec < buf + buf_len) {
448 zone_blocks = get_unaligned_be64(&rec[8]); 448 u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
449 if (sdkp->zone_blocks == 0) { 449
450 sdkp->zone_blocks = zone_blocks; 450 if (zone_blocks == 0) {
451 } else if (zone_blocks != sdkp->zone_blocks && 451 zone_blocks = this_zone_blocks;
452 (block + zone_blocks < sdkp->capacity 452 } else if (this_zone_blocks != zone_blocks &&
453 || zone_blocks > sdkp->zone_blocks)) { 453 (block + this_zone_blocks < sdkp->capacity
454 zone_blocks = 0; 454 || this_zone_blocks > zone_blocks)) {
455 this_zone_blocks = 0;
455 goto out; 456 goto out;
456 } 457 }
457 block += zone_blocks; 458 block += this_zone_blocks;
458 rec += 64; 459 rec += 64;
459 } 460 }
460 461
@@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp)
467 468
468 } while (block < sdkp->capacity); 469 } while (block < sdkp->capacity);
469 470
470 zone_blocks = sdkp->zone_blocks;
471
472out: 471out:
473 if (!zone_blocks) { 472 if (!zone_blocks) {
474 if (sdkp->first_scan) 473 if (sdkp->first_scan)
@@ -488,8 +487,7 @@ out:
488 "Zone size too large\n"); 487 "Zone size too large\n");
489 ret = -ENODEV; 488 ret = -ENODEV;
490 } else { 489 } else {
491 sdkp->zone_blocks = zone_blocks; 490 ret = zone_blocks;
492 sdkp->zone_shift = ilog2(zone_blocks);
493 } 491 }
494 492
495out_free: 493out_free:
@@ -500,15 +498,14 @@ out_free:
500 498
501/** 499/**
502 * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). 500 * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
503 * @sdkp: The disk of the bitmap 501 * @nr_zones: Number of zones to allocate space for.
502 * @numa_node: NUMA node to allocate the memory from.
504 */ 503 */
505static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) 504static inline unsigned long *
505sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
506{ 506{
507 struct request_queue *q = sdkp->disk->queue; 507 return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long),
508 508 GFP_KERNEL, numa_node);
509 return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones)
510 * sizeof(unsigned long),
511 GFP_KERNEL, q->node);
512} 509}
513 510
514/** 511/**
@@ -516,6 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
516 * @sdkp: disk used 513 * @sdkp: disk used
517 * @buf: report reply buffer 514 * @buf: report reply buffer
518 * @buflen: length of @buf 515 * @buflen: length of @buf
516 * @zone_shift: logarithm base 2 of the number of blocks in a zone
519 * @seq_zones_bitmap: bitmap of sequential zones to set 517 * @seq_zones_bitmap: bitmap of sequential zones to set
520 * 518 *
521 * Parse reported zone descriptors in @buf to identify sequential zones and 519 * Parse reported zone descriptors in @buf to identify sequential zones and
@@ -525,7 +523,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp)
525 * Return the LBA after the last zone reported. 523 * Return the LBA after the last zone reported.
526 */ 524 */
527static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, 525static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
528 unsigned int buflen, 526 unsigned int buflen, u32 zone_shift,
529 unsigned long *seq_zones_bitmap) 527 unsigned long *seq_zones_bitmap)
530{ 528{
531 sector_t lba, next_lba = sdkp->capacity; 529 sector_t lba, next_lba = sdkp->capacity;
@@ -544,7 +542,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
544 if (type != ZBC_ZONE_TYPE_CONV && 542 if (type != ZBC_ZONE_TYPE_CONV &&
545 cond != ZBC_ZONE_COND_READONLY && 543 cond != ZBC_ZONE_COND_READONLY &&
546 cond != ZBC_ZONE_COND_OFFLINE) 544 cond != ZBC_ZONE_COND_OFFLINE)
547 set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap); 545 set_bit(lba >> zone_shift, seq_zones_bitmap);
548 next_lba = lba + get_unaligned_be64(&rec[8]); 546 next_lba = lba + get_unaligned_be64(&rec[8]);
549 rec += 64; 547 rec += 64;
550 } 548 }
@@ -553,12 +551,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
553} 551}
554 552
555/** 553/**
556 * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. 554 * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
557 * @sdkp: target disk 555 * @sdkp: target disk
556 * @zone_shift: logarithm base 2 of the number of blocks in a zone
557 * @nr_zones: number of zones to set up a seq zone bitmap for
558 * 558 *
559 * Allocate a zone bitmap and initialize it by identifying sequential zones. 559 * Allocate a zone bitmap and initialize it by identifying sequential zones.
560 */ 560 */
561static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) 561static unsigned long *
562sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
563 u32 nr_zones)
562{ 564{
563 struct request_queue *q = sdkp->disk->queue; 565 struct request_queue *q = sdkp->disk->queue;
564 unsigned long *seq_zones_bitmap; 566 unsigned long *seq_zones_bitmap;
@@ -566,9 +568,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
566 unsigned char *buf; 568 unsigned char *buf;
567 int ret = -ENOMEM; 569 int ret = -ENOMEM;
568 570
569 seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); 571 seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
570 if (!seq_zones_bitmap) 572 if (!seq_zones_bitmap)
571 return -ENOMEM; 573 return ERR_PTR(-ENOMEM);
572 574
573 buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); 575 buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
574 if (!buf) 576 if (!buf)
@@ -579,7 +581,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp)
579 if (ret) 581 if (ret)
580 goto out; 582 goto out;
581 lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 583 lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
582 seq_zones_bitmap); 584 zone_shift, seq_zones_bitmap);
583 } 585 }
584 586
585 if (lba != sdkp->capacity) { 587 if (lba != sdkp->capacity) {
@@ -591,12 +593,9 @@ out:
591 kfree(buf); 593 kfree(buf);
592 if (ret) { 594 if (ret) {
593 kfree(seq_zones_bitmap); 595 kfree(seq_zones_bitmap);
594 return ret; 596 return ERR_PTR(ret);
595 } 597 }
596 598 return seq_zones_bitmap;
597 q->seq_zones_bitmap = seq_zones_bitmap;
598
599 return 0;
600} 599}
601 600
602static void sd_zbc_cleanup(struct scsi_disk *sdkp) 601static void sd_zbc_cleanup(struct scsi_disk *sdkp)
@@ -612,44 +611,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp)
612 q->nr_zones = 0; 611 q->nr_zones = 0;
613} 612}
614 613
615static int sd_zbc_setup(struct scsi_disk *sdkp) 614static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
616{ 615{
617 struct request_queue *q = sdkp->disk->queue; 616 struct request_queue *q = sdkp->disk->queue;
617 u32 zone_shift = ilog2(zone_blocks);
618 u32 nr_zones;
618 int ret; 619 int ret;
619 620
620 /* READ16/WRITE16 is mandatory for ZBC disks */
621 sdkp->device->use_16_for_rw = 1;
622 sdkp->device->use_10_for_rw = 0;
623
624 /* chunk_sectors indicates the zone size */ 621 /* chunk_sectors indicates the zone size */
625 blk_queue_chunk_sectors(sdkp->disk->queue, 622 blk_queue_chunk_sectors(q,
626 logical_to_sectors(sdkp->device, sdkp->zone_blocks)); 623 logical_to_sectors(sdkp->device, zone_blocks));
627 sdkp->nr_zones = 624 nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
628 round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift;
629 625
630 /* 626 /*
631 * Initialize the device request queue information if the number 627 * Initialize the device request queue information if the number
632 * of zones changed. 628 * of zones changed.
633 */ 629 */
634 if (sdkp->nr_zones != q->nr_zones) { 630 if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
635 631 unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
636 sd_zbc_cleanup(sdkp); 632 size_t zone_bitmap_size;
637 633
638 q->nr_zones = sdkp->nr_zones; 634 if (nr_zones) {
639 if (sdkp->nr_zones) { 635 seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
640 q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); 636 q->node);
641 if (!q->seq_zones_wlock) { 637 if (!seq_zones_wlock) {
642 ret = -ENOMEM; 638 ret = -ENOMEM;
643 goto err; 639 goto err;
644 } 640 }
645 641
646 ret = sd_zbc_setup_seq_zones_bitmap(sdkp); 642 seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
647 if (ret) { 643 zone_shift, nr_zones);
648 sd_zbc_cleanup(sdkp); 644 if (IS_ERR(seq_zones_bitmap)) {
645 ret = PTR_ERR(seq_zones_bitmap);
646 kfree(seq_zones_wlock);
649 goto err; 647 goto err;
650 } 648 }
651 } 649 }
652 650 zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
651 sizeof(unsigned long);
652 blk_mq_freeze_queue(q);
653 if (q->nr_zones != nr_zones) {
654 /* READ16/WRITE16 is mandatory for ZBC disks */
655 sdkp->device->use_16_for_rw = 1;
656 sdkp->device->use_10_for_rw = 0;
657
658 sdkp->zone_blocks = zone_blocks;
659 sdkp->zone_shift = zone_shift;
660 sdkp->nr_zones = nr_zones;
661 q->nr_zones = nr_zones;
662 swap(q->seq_zones_wlock, seq_zones_wlock);
663 swap(q->seq_zones_bitmap, seq_zones_bitmap);
664 } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
665 zone_bitmap_size) != 0) {
666 memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
667 zone_bitmap_size);
668 }
669 blk_mq_unfreeze_queue(q);
670 kfree(seq_zones_wlock);
671 kfree(seq_zones_bitmap);
653 } 672 }
654 673
655 return 0; 674 return 0;
@@ -661,6 +680,7 @@ err:
661 680
662int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) 681int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
663{ 682{
683 int64_t zone_blocks;
664 int ret; 684 int ret;
665 685
666 if (!sd_is_zoned(sdkp)) 686 if (!sd_is_zoned(sdkp))
@@ -697,12 +717,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
697 * Check zone size: only devices with a constant zone size (except 717 * Check zone size: only devices with a constant zone size (except
698 * an eventual last runt zone) that is a power of 2 are supported. 718 * an eventual last runt zone) that is a power of 2 are supported.
699 */ 719 */
700 ret = sd_zbc_check_zone_size(sdkp); 720 zone_blocks = sd_zbc_check_zone_size(sdkp);
701 if (ret) 721 ret = -EFBIG;
722 if (zone_blocks != (u32)zone_blocks)
723 goto err;
724 ret = zone_blocks;
725 if (ret < 0)
702 goto err; 726 goto err;
703 727
704 /* The drive satisfies the kernel restrictions: set it up */ 728 /* The drive satisfies the kernel restrictions: set it up */
705 ret = sd_zbc_setup(sdkp); 729 ret = sd_zbc_setup(sdkp, zone_blocks);
706 if (ret) 730 if (ret)
707 goto err; 731 goto err;
708 732
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9af3e0f430bc..21e21f273a21 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -605,6 +605,11 @@ struct request_queue {
605 * initialized by the low level device driver (e.g. scsi/sd.c). 605 * initialized by the low level device driver (e.g. scsi/sd.c).
606 * Stacking drivers (device mappers) may or may not initialize 606 * Stacking drivers (device mappers) may or may not initialize
607 * these fields. 607 * these fields.
608 *
609 * Reads of this information must be protected with blk_queue_enter() /
610 * blk_queue_exit(). Modifying this information is only allowed while
611 * no requests are being processed. See also blk_mq_freeze_queue() and
612 * blk_mq_unfreeze_queue().
608 */ 613 */
609 unsigned int nr_zones; 614 unsigned int nr_zones;
610 unsigned long *seq_zones_bitmap; 615 unsigned long *seq_zones_bitmap;