diff options
| author | Bart Van Assche <bart.vanassche@wdc.com> | 2018-04-16 21:04:41 -0400 |
|---|---|---|
| committer | Martin K. Petersen <martin.petersen@oracle.com> | 2018-04-19 00:04:10 -0400 |
| commit | ccce20fc7968d546fb1e8e147bf5cdc8afc4278a (patch) | |
| tree | 03eb832a953db5bfb9801adb4e6a08a25494c6b1 | |
| parent | 505aa4b6a8834a2300971c5220c380c3271ebde3 (diff) | |
scsi: sd_zbc: Avoid that resetting a zone fails sporadically
Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is
called from sd_probe_async() and since sd_revalidate_disk() calls
sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called
concurrently with blkdev_report_zones() and/or blkdev_reset_zones(). That can
cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets
q->nr_zones to zero before restoring it to the actual value, even if no drive
characteristics have changed. Avoid that this can happen by making the
following changes:
- Protect the code that updates zone information with blk_queue_enter()
and blk_queue_exit().
- Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that
these functions do not modify struct scsi_disk before all zone
information has been obtained.
Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track
legacy requests"; kernel v4.15) the request queue freezing mechanism also
affects legacy request queues.
Fixes: 89d947561077 ("sd: Implement support for ZBC devices")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: stable@vger.kernel.org # v4.16
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
| -rw-r--r-- | drivers/scsi/sd_zbc.c | 140 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 5 |
2 files changed, 87 insertions, 58 deletions
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 41df75eea57b..210407cd2341 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c | |||
| @@ -400,8 +400,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf) | |||
| 400 | * | 400 | * |
| 401 | * Check that all zones of the device are equal. The last zone can however | 401 | * Check that all zones of the device are equal. The last zone can however |
| 402 | * be smaller. The zone size must also be a power of two number of LBAs. | 402 | * be smaller. The zone size must also be a power of two number of LBAs. |
| 403 | * | ||
| 404 | * Returns the zone size in bytes upon success or an error code upon failure. | ||
| 403 | */ | 405 | */ |
| 404 | static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) | 406 | static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp) |
| 405 | { | 407 | { |
| 406 | u64 zone_blocks = 0; | 408 | u64 zone_blocks = 0; |
| 407 | sector_t block = 0; | 409 | sector_t block = 0; |
| @@ -412,8 +414,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) | |||
| 412 | int ret; | 414 | int ret; |
| 413 | u8 same; | 415 | u8 same; |
| 414 | 416 | ||
| 415 | sdkp->zone_blocks = 0; | ||
| 416 | |||
| 417 | /* Get a buffer */ | 417 | /* Get a buffer */ |
| 418 | buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); | 418 | buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); |
| 419 | if (!buf) | 419 | if (!buf) |
| @@ -445,16 +445,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) | |||
| 445 | 445 | ||
| 446 | /* Parse zone descriptors */ | 446 | /* Parse zone descriptors */ |
| 447 | while (rec < buf + buf_len) { | 447 | while (rec < buf + buf_len) { |
| 448 | zone_blocks = get_unaligned_be64(&rec[8]); | 448 | u64 this_zone_blocks = get_unaligned_be64(&rec[8]); |
| 449 | if (sdkp->zone_blocks == 0) { | 449 | |
| 450 | sdkp->zone_blocks = zone_blocks; | 450 | if (zone_blocks == 0) { |
| 451 | } else if (zone_blocks != sdkp->zone_blocks && | 451 | zone_blocks = this_zone_blocks; |
| 452 | (block + zone_blocks < sdkp->capacity | 452 | } else if (this_zone_blocks != zone_blocks && |
| 453 | || zone_blocks > sdkp->zone_blocks)) { | 453 | (block + this_zone_blocks < sdkp->capacity |
| 454 | zone_blocks = 0; | 454 | || this_zone_blocks > zone_blocks)) { |
| 455 | this_zone_blocks = 0; | ||
| 455 | goto out; | 456 | goto out; |
| 456 | } | 457 | } |
| 457 | block += zone_blocks; | 458 | block += this_zone_blocks; |
| 458 | rec += 64; | 459 | rec += 64; |
| 459 | } | 460 | } |
| 460 | 461 | ||
| @@ -467,8 +468,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) | |||
| 467 | 468 | ||
| 468 | } while (block < sdkp->capacity); | 469 | } while (block < sdkp->capacity); |
| 469 | 470 | ||
| 470 | zone_blocks = sdkp->zone_blocks; | ||
| 471 | |||
| 472 | out: | 471 | out: |
| 473 | if (!zone_blocks) { | 472 | if (!zone_blocks) { |
| 474 | if (sdkp->first_scan) | 473 | if (sdkp->first_scan) |
| @@ -488,8 +487,7 @@ out: | |||
| 488 | "Zone size too large\n"); | 487 | "Zone size too large\n"); |
| 489 | ret = -ENODEV; | 488 | ret = -ENODEV; |
| 490 | } else { | 489 | } else { |
| 491 | sdkp->zone_blocks = zone_blocks; | 490 | ret = zone_blocks; |
| 492 | sdkp->zone_shift = ilog2(zone_blocks); | ||
| 493 | } | 491 | } |
| 494 | 492 | ||
| 495 | out_free: | 493 | out_free: |
| @@ -500,15 +498,14 @@ out_free: | |||
| 500 | 498 | ||
| 501 | /** | 499 | /** |
| 502 | * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). | 500 | * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). |
| 503 | * @sdkp: The disk of the bitmap | 501 | * @nr_zones: Number of zones to allocate space for. |
| 502 | * @numa_node: NUMA node to allocate the memory from. | ||
| 504 | */ | 503 | */ |
| 505 | static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) | 504 | static inline unsigned long * |
| 505 | sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node) | ||
| 506 | { | 506 | { |
| 507 | struct request_queue *q = sdkp->disk->queue; | 507 | return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long), |
| 508 | 508 | GFP_KERNEL, numa_node); | |
| 509 | return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones) | ||
| 510 | * sizeof(unsigned long), | ||
| 511 | GFP_KERNEL, q->node); | ||
| 512 | } | 509 | } |
| 513 | 510 | ||
| 514 | /** | 511 | /** |
| @@ -516,6 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) | |||
| 516 | * @sdkp: disk used | 513 | * @sdkp: disk used |
| 517 | * @buf: report reply buffer | 514 | * @buf: report reply buffer |
| 518 | * @buflen: length of @buf | 515 | * @buflen: length of @buf |
| 516 | * @zone_shift: logarithm base 2 of the number of blocks in a zone | ||
| 519 | * @seq_zones_bitmap: bitmap of sequential zones to set | 517 | * @seq_zones_bitmap: bitmap of sequential zones to set |
| 520 | * | 518 | * |
| 521 | * Parse reported zone descriptors in @buf to identify sequential zones and | 519 | * Parse reported zone descriptors in @buf to identify sequential zones and |
| @@ -525,7 +523,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) | |||
| 525 | * Return the LBA after the last zone reported. | 523 | * Return the LBA after the last zone reported. |
| 526 | */ | 524 | */ |
| 527 | static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, | 525 | static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, |
| 528 | unsigned int buflen, | 526 | unsigned int buflen, u32 zone_shift, |
| 529 | unsigned long *seq_zones_bitmap) | 527 | unsigned long *seq_zones_bitmap) |
| 530 | { | 528 | { |
| 531 | sector_t lba, next_lba = sdkp->capacity; | 529 | sector_t lba, next_lba = sdkp->capacity; |
| @@ -544,7 +542,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, | |||
| 544 | if (type != ZBC_ZONE_TYPE_CONV && | 542 | if (type != ZBC_ZONE_TYPE_CONV && |
| 545 | cond != ZBC_ZONE_COND_READONLY && | 543 | cond != ZBC_ZONE_COND_READONLY && |
| 546 | cond != ZBC_ZONE_COND_OFFLINE) | 544 | cond != ZBC_ZONE_COND_OFFLINE) |
| 547 | set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap); | 545 | set_bit(lba >> zone_shift, seq_zones_bitmap); |
| 548 | next_lba = lba + get_unaligned_be64(&rec[8]); | 546 | next_lba = lba + get_unaligned_be64(&rec[8]); |
| 549 | rec += 64; | 547 | rec += 64; |
| 550 | } | 548 | } |
| @@ -553,12 +551,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, | |||
| 553 | } | 551 | } |
| 554 | 552 | ||
| 555 | /** | 553 | /** |
| 556 | * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. | 554 | * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap. |
| 557 | * @sdkp: target disk | 555 | * @sdkp: target disk |
| 556 | * @zone_shift: logarithm base 2 of the number of blocks in a zone | ||
| 557 | * @nr_zones: number of zones to set up a seq zone bitmap for | ||
| 558 | * | 558 | * |
| 559 | * Allocate a zone bitmap and initialize it by identifying sequential zones. | 559 | * Allocate a zone bitmap and initialize it by identifying sequential zones. |
| 560 | */ | 560 | */ |
| 561 | static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) | 561 | static unsigned long * |
| 562 | sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift, | ||
| 563 | u32 nr_zones) | ||
| 562 | { | 564 | { |
| 563 | struct request_queue *q = sdkp->disk->queue; | 565 | struct request_queue *q = sdkp->disk->queue; |
| 564 | unsigned long *seq_zones_bitmap; | 566 | unsigned long *seq_zones_bitmap; |
| @@ -566,9 +568,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) | |||
| 566 | unsigned char *buf; | 568 | unsigned char *buf; |
| 567 | int ret = -ENOMEM; | 569 | int ret = -ENOMEM; |
| 568 | 570 | ||
| 569 | seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); | 571 | seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node); |
| 570 | if (!seq_zones_bitmap) | 572 | if (!seq_zones_bitmap) |
| 571 | return -ENOMEM; | 573 | return ERR_PTR(-ENOMEM); |
| 572 | 574 | ||
| 573 | buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); | 575 | buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); |
| 574 | if (!buf) | 576 | if (!buf) |
| @@ -579,7 +581,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) | |||
| 579 | if (ret) | 581 | if (ret) |
| 580 | goto out; | 582 | goto out; |
| 581 | lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, | 583 | lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, |
| 582 | seq_zones_bitmap); | 584 | zone_shift, seq_zones_bitmap); |
| 583 | } | 585 | } |
| 584 | 586 | ||
| 585 | if (lba != sdkp->capacity) { | 587 | if (lba != sdkp->capacity) { |
| @@ -591,12 +593,9 @@ out: | |||
| 591 | kfree(buf); | 593 | kfree(buf); |
| 592 | if (ret) { | 594 | if (ret) { |
| 593 | kfree(seq_zones_bitmap); | 595 | kfree(seq_zones_bitmap); |
| 594 | return ret; | 596 | return ERR_PTR(ret); |
| 595 | } | 597 | } |
| 596 | 598 | return seq_zones_bitmap; | |
| 597 | q->seq_zones_bitmap = seq_zones_bitmap; | ||
| 598 | |||
| 599 | return 0; | ||
| 600 | } | 599 | } |
| 601 | 600 | ||
| 602 | static void sd_zbc_cleanup(struct scsi_disk *sdkp) | 601 | static void sd_zbc_cleanup(struct scsi_disk *sdkp) |
| @@ -612,44 +611,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp) | |||
| 612 | q->nr_zones = 0; | 611 | q->nr_zones = 0; |
| 613 | } | 612 | } |
| 614 | 613 | ||
| 615 | static int sd_zbc_setup(struct scsi_disk *sdkp) | 614 | static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks) |
| 616 | { | 615 | { |
| 617 | struct request_queue *q = sdkp->disk->queue; | 616 | struct request_queue *q = sdkp->disk->queue; |
| 617 | u32 zone_shift = ilog2(zone_blocks); | ||
| 618 | u32 nr_zones; | ||
| 618 | int ret; | 619 | int ret; |
| 619 | 620 | ||
| 620 | /* READ16/WRITE16 is mandatory for ZBC disks */ | ||
| 621 | sdkp->device->use_16_for_rw = 1; | ||
| 622 | sdkp->device->use_10_for_rw = 0; | ||
| 623 | |||
| 624 | /* chunk_sectors indicates the zone size */ | 621 | /* chunk_sectors indicates the zone size */ |
| 625 | blk_queue_chunk_sectors(sdkp->disk->queue, | 622 | blk_queue_chunk_sectors(q, |
| 626 | logical_to_sectors(sdkp->device, sdkp->zone_blocks)); | 623 | logical_to_sectors(sdkp->device, zone_blocks)); |
| 627 | sdkp->nr_zones = | 624 | nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift; |
| 628 | round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift; | ||
| 629 | 625 | ||
| 630 | /* | 626 | /* |
| 631 | * Initialize the device request queue information if the number | 627 | * Initialize the device request queue information if the number |
| 632 | * of zones changed. | 628 | * of zones changed. |
| 633 | */ | 629 | */ |
| 634 | if (sdkp->nr_zones != q->nr_zones) { | 630 | if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) { |
| 635 | 631 | unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; | |
| 636 | sd_zbc_cleanup(sdkp); | 632 | size_t zone_bitmap_size; |
| 637 | 633 | ||
| 638 | q->nr_zones = sdkp->nr_zones; | 634 | if (nr_zones) { |
| 639 | if (sdkp->nr_zones) { | 635 | seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones, |
| 640 | q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); | 636 | q->node); |
| 641 | if (!q->seq_zones_wlock) { | 637 | if (!seq_zones_wlock) { |
| 642 | ret = -ENOMEM; | 638 | ret = -ENOMEM; |
| 643 | goto err; | 639 | goto err; |
| 644 | } | 640 | } |
| 645 | 641 | ||
| 646 | ret = sd_zbc_setup_seq_zones_bitmap(sdkp); | 642 | seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp, |
| 647 | if (ret) { | 643 | zone_shift, nr_zones); |
| 648 | sd_zbc_cleanup(sdkp); | 644 | if (IS_ERR(seq_zones_bitmap)) { |
| 645 | ret = PTR_ERR(seq_zones_bitmap); | ||
| 646 | kfree(seq_zones_wlock); | ||
| 649 | goto err; | 647 | goto err; |
| 650 | } | 648 | } |
| 651 | } | 649 | } |
| 652 | 650 | zone_bitmap_size = BITS_TO_LONGS(nr_zones) * | |
| 651 | sizeof(unsigned long); | ||
| 652 | blk_mq_freeze_queue(q); | ||
| 653 | if (q->nr_zones != nr_zones) { | ||
| 654 | /* READ16/WRITE16 is mandatory for ZBC disks */ | ||
| 655 | sdkp->device->use_16_for_rw = 1; | ||
| 656 | sdkp->device->use_10_for_rw = 0; | ||
| 657 | |||
| 658 | sdkp->zone_blocks = zone_blocks; | ||
| 659 | sdkp->zone_shift = zone_shift; | ||
| 660 | sdkp->nr_zones = nr_zones; | ||
| 661 | q->nr_zones = nr_zones; | ||
| 662 | swap(q->seq_zones_wlock, seq_zones_wlock); | ||
| 663 | swap(q->seq_zones_bitmap, seq_zones_bitmap); | ||
| 664 | } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap, | ||
| 665 | zone_bitmap_size) != 0) { | ||
| 666 | memcpy(q->seq_zones_bitmap, seq_zones_bitmap, | ||
| 667 | zone_bitmap_size); | ||
| 668 | } | ||
| 669 | blk_mq_unfreeze_queue(q); | ||
| 670 | kfree(seq_zones_wlock); | ||
| 671 | kfree(seq_zones_bitmap); | ||
| 653 | } | 672 | } |
| 654 | 673 | ||
| 655 | return 0; | 674 | return 0; |
| @@ -661,6 +680,7 @@ err: | |||
| 661 | 680 | ||
| 662 | int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) | 681 | int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) |
| 663 | { | 682 | { |
| 683 | int64_t zone_blocks; | ||
| 664 | int ret; | 684 | int ret; |
| 665 | 685 | ||
| 666 | if (!sd_is_zoned(sdkp)) | 686 | if (!sd_is_zoned(sdkp)) |
| @@ -697,12 +717,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) | |||
| 697 | * Check zone size: only devices with a constant zone size (except | 717 | * Check zone size: only devices with a constant zone size (except |
| 698 | * an eventual last runt zone) that is a power of 2 are supported. | 718 | * an eventual last runt zone) that is a power of 2 are supported. |
| 699 | */ | 719 | */ |
| 700 | ret = sd_zbc_check_zone_size(sdkp); | 720 | zone_blocks = sd_zbc_check_zone_size(sdkp); |
| 701 | if (ret) | 721 | ret = -EFBIG; |
| 722 | if (zone_blocks != (u32)zone_blocks) | ||
| 723 | goto err; | ||
| 724 | ret = zone_blocks; | ||
| 725 | if (ret < 0) | ||
| 702 | goto err; | 726 | goto err; |
| 703 | 727 | ||
| 704 | /* The drive satisfies the kernel restrictions: set it up */ | 728 | /* The drive satisfies the kernel restrictions: set it up */ |
| 705 | ret = sd_zbc_setup(sdkp); | 729 | ret = sd_zbc_setup(sdkp, zone_blocks); |
| 706 | if (ret) | 730 | if (ret) |
| 707 | goto err; | 731 | goto err; |
| 708 | 732 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9af3e0f430bc..21e21f273a21 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -605,6 +605,11 @@ struct request_queue { | |||
| 605 | * initialized by the low level device driver (e.g. scsi/sd.c). | 605 | * initialized by the low level device driver (e.g. scsi/sd.c). |
| 606 | * Stacking drivers (device mappers) may or may not initialize | 606 | * Stacking drivers (device mappers) may or may not initialize |
| 607 | * these fields. | 607 | * these fields. |
| 608 | * | ||
| 609 | * Reads of this information must be protected with blk_queue_enter() / | ||
| 610 | * blk_queue_exit(). Modifying this information is only allowed while | ||
| 611 | * no requests are being processed. See also blk_mq_freeze_queue() and | ||
| 612 | * blk_mq_unfreeze_queue(). | ||
| 608 | */ | 613 | */ |
| 609 | unsigned int nr_zones; | 614 | unsigned int nr_zones; |
| 610 | unsigned long *seq_zones_bitmap; | 615 | unsigned long *seq_zones_bitmap; |
