aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/hpsa.c
diff options
context:
space:
mode:
authorWebb Scales <webbnh@hp.com>2015-01-23 17:43:35 -0500
committerJames Bottomley <JBottomley@Parallels.com>2015-02-02 12:57:40 -0500
commit281a7fd03ea37c979bbba4d8376595c0288e3252 (patch)
tree0394dd2685426d192027fb5b1f0c4dbb5fd6da27 /drivers/scsi/hpsa.c
parent03383736348bb73a45f8460afca3c5f5bd1be172 (diff)
hpsa: fix race between abort handler and main i/o path
This means changing the allocator to reference count commands. The reference count is now the authoritative indicator of whether a command is allocated or not. The h->cmd_pool_bits bitmap is now only a heuristic hint to speed up the allocation process, it is no longer the authoritative record of allocated commands. Since we changed the command allocator to use reference counting as the authoritative indicator of whether a command is allocated, fail_all_outstanding_cmds needs to use the reference count not h->cmd_pool_bits for this purpose. Fix hpsa_drain_accel_commands to use the reference count as the authoritative indicator of whether a command is allocated instead of the h->cmd_pool_bits bitmap. Reviewed-by: Scott Teel <scott.teel@pmcs.com> Signed-off-by: Don Brace <don.brace@pmcs.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers/scsi/hpsa.c')
-rw-r--r--drivers/scsi/hpsa.c109
1 files changed, 62 insertions, 47 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 60f57347d53b..c95a20c5269b 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4552,6 +4552,7 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
4552 char msg[256]; /* For debug messaging. */ 4552 char msg[256]; /* For debug messaging. */
4553 int ml = 0; 4553 int ml = 0;
4554 __le32 tagupper, taglower; 4554 __le32 tagupper, taglower;
4555 int refcount;
4555 4556
4556 /* Find the controller of the command to be aborted */ 4557 /* Find the controller of the command to be aborted */
4557 h = sdev_to_hba(sc->device); 4558 h = sdev_to_hba(sc->device);
@@ -4580,9 +4581,13 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
4580 /* Get SCSI command to be aborted */ 4581 /* Get SCSI command to be aborted */
4581 abort = (struct CommandList *) sc->host_scribble; 4582 abort = (struct CommandList *) sc->host_scribble;
4582 if (abort == NULL) { 4583 if (abort == NULL) {
4583 dev_err(&h->pdev->dev, "%s FAILED, Command to abort is NULL.\n", 4584 /* This can happen if the command already completed. */
4584 msg); 4585 return SUCCESS;
4585 return FAILED; 4586 }
4587 refcount = atomic_inc_return(&abort->refcount);
4588 if (refcount == 1) { /* Command is done already. */
4589 cmd_free(h, abort);
4590 return SUCCESS;
4586 } 4591 }
4587 hpsa_get_tag(h, abort, &taglower, &tagupper); 4592 hpsa_get_tag(h, abort, &taglower, &tagupper);
4588 ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower); 4593 ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
@@ -4604,6 +4609,7 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
4604 dev_warn(&h->pdev->dev, "FAILED abort on device C%d:B%d:T%d:L%d\n", 4609 dev_warn(&h->pdev->dev, "FAILED abort on device C%d:B%d:T%d:L%d\n",
4605 h->scsi_host->host_no, 4610 h->scsi_host->host_no,
4606 dev->bus, dev->target, dev->lun); 4611 dev->bus, dev->target, dev->lun);
4612 cmd_free(h, abort);
4607 return FAILED; 4613 return FAILED;
4608 } 4614 }
4609 dev_info(&h->pdev->dev, "%s REQUEST SUCCEEDED.\n", msg); 4615 dev_info(&h->pdev->dev, "%s REQUEST SUCCEEDED.\n", msg);
@@ -4615,32 +4621,35 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
4615 */ 4621 */
4616#define ABORT_COMPLETE_WAIT_SECS 30 4622#define ABORT_COMPLETE_WAIT_SECS 30
4617 for (i = 0; i < ABORT_COMPLETE_WAIT_SECS * 10; i++) { 4623 for (i = 0; i < ABORT_COMPLETE_WAIT_SECS * 10; i++) {
4618 if (test_bit(abort->cmdindex & (BITS_PER_LONG - 1), 4624 refcount = atomic_read(&abort->refcount);
4619 h->cmd_pool_bits + 4625 if (refcount < 2) {
4620 (abort->cmdindex / BITS_PER_LONG))) 4626 cmd_free(h, abort);
4621 msleep(100);
4622 else
4623 return SUCCESS; 4627 return SUCCESS;
4628 } else {
4629 msleep(100);
4630 }
4624 } 4631 }
4625 dev_warn(&h->pdev->dev, "%s FAILED. Aborted command has not completed after %d seconds.\n", 4632 dev_warn(&h->pdev->dev, "%s FAILED. Aborted command has not completed after %d seconds.\n",
4626 msg, ABORT_COMPLETE_WAIT_SECS); 4633 msg, ABORT_COMPLETE_WAIT_SECS);
4634 cmd_free(h, abort);
4627 return FAILED; 4635 return FAILED;
4628} 4636}
4629 4637
4630
4631/* 4638/*
4632 * For operations that cannot sleep, a command block is allocated at init, 4639 * For operations that cannot sleep, a command block is allocated at init,
4633 * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track 4640 * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
4634 * which ones are free or in use. Lock must be held when calling this. 4641 * which ones are free or in use. Lock must be held when calling this.
4635 * cmd_free() is the complement. 4642 * cmd_free() is the complement.
4636 */ 4643 */
4644
4637static struct CommandList *cmd_alloc(struct ctlr_info *h) 4645static struct CommandList *cmd_alloc(struct ctlr_info *h)
4638{ 4646{
4639 struct CommandList *c; 4647 struct CommandList *c;
4640 int i; 4648 int i;
4641 union u64bit temp64; 4649 union u64bit temp64;
4642 dma_addr_t cmd_dma_handle, err_dma_handle; 4650 dma_addr_t cmd_dma_handle, err_dma_handle;
4643 int loopcount; 4651 int refcount;
4652 unsigned long offset = 0;
4644 4653
4645 /* There is some *extremely* small but non-zero chance that that 4654 /* There is some *extremely* small but non-zero chance that that
4646 * multiple threads could get in here, and one thread could 4655 * multiple threads could get in here, and one thread could
@@ -4653,23 +4662,27 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
4653 * infrequently as to be indistinguishable from never. 4662 * infrequently as to be indistinguishable from never.
4654 */ 4663 */
4655 4664
4656 loopcount = 0; 4665 for (;;) {
4657 do { 4666 i = find_next_zero_bit(h->cmd_pool_bits, h->nr_cmds, offset);
4658 i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds); 4667 if (unlikely(i == h->nr_cmds)) {
4659 if (i == h->nr_cmds) 4668 offset = 0;
4660 i = 0; 4669 continue;
4661 loopcount++; 4670 }
4662 } while (test_and_set_bit(i & (BITS_PER_LONG - 1), 4671 c = h->cmd_pool + i;
4663 h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0 && 4672 refcount = atomic_inc_return(&c->refcount);
4664 loopcount < 10); 4673 if (unlikely(refcount > 1)) {
4665 4674 cmd_free(h, c); /* already in use */
4666 /* Thread got starved? We do not expect this to ever happen. */ 4675 offset = (i + 1) % h->nr_cmds;
4667 if (loopcount >= 10) 4676 continue;
4668 return NULL; 4677 }
4669 4678 set_bit(i & (BITS_PER_LONG - 1),
4670 c = h->cmd_pool + i; 4679 h->cmd_pool_bits + (i / BITS_PER_LONG));
4671 memset(c, 0, sizeof(*c)); 4680 break; /* it's ours now. */
4672 c->Header.tag = cpu_to_le64((u64) i << DIRECT_LOOKUP_SHIFT); 4681 }
4682
4683 /* Zero out all of commandlist except the last field, refcount */
4684 memset(c, 0, offsetof(struct CommandList, refcount));
4685 c->Header.tag = cpu_to_le64((u64) (i << DIRECT_LOOKUP_SHIFT));
4673 cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(*c); 4686 cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(*c);
4674 c->err_info = h->errinfo_pool + i; 4687 c->err_info = h->errinfo_pool + i;
4675 memset(c->err_info, 0, sizeof(*c->err_info)); 4688 memset(c->err_info, 0, sizeof(*c->err_info));
@@ -4680,8 +4693,8 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
4680 4693
4681 c->busaddr = (u32) cmd_dma_handle; 4694 c->busaddr = (u32) cmd_dma_handle;
4682 temp64.val = (u64) err_dma_handle; 4695 temp64.val = (u64) err_dma_handle;
4683 c->ErrDesc.Addr = cpu_to_le64(err_dma_handle); 4696 c->ErrDesc.Addr = cpu_to_le64((u64) err_dma_handle);
4684 c->ErrDesc.Len = cpu_to_le32(sizeof(*c->err_info)); 4697 c->ErrDesc.Len = cpu_to_le32((u32) sizeof(*c->err_info));
4685 4698
4686 c->h = h; 4699 c->h = h;
4687 return c; 4700 return c;
@@ -4689,11 +4702,13 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
4689 4702
4690static void cmd_free(struct ctlr_info *h, struct CommandList *c) 4703static void cmd_free(struct ctlr_info *h, struct CommandList *c)
4691{ 4704{
4692 int i; 4705 if (atomic_dec_and_test(&c->refcount)) {
4706 int i;
4693 4707
4694 i = c - h->cmd_pool; 4708 i = c - h->cmd_pool;
4695 clear_bit(i & (BITS_PER_LONG - 1), 4709 clear_bit(i & (BITS_PER_LONG - 1),
4696 h->cmd_pool_bits + (i / BITS_PER_LONG)); 4710 h->cmd_pool_bits + (i / BITS_PER_LONG));
4711 }
4697} 4712}
4698 4713
4699#ifdef CONFIG_COMPAT 4714#ifdef CONFIG_COMPAT
@@ -6598,17 +6613,18 @@ static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h)
6598/* Called when controller lockup detected. */ 6613/* Called when controller lockup detected. */
6599static void fail_all_outstanding_cmds(struct ctlr_info *h) 6614static void fail_all_outstanding_cmds(struct ctlr_info *h)
6600{ 6615{
6601 int i; 6616 int i, refcount;
6602 struct CommandList *c = NULL; 6617 struct CommandList *c;
6603 6618
6604 flush_workqueue(h->resubmit_wq); /* ensure all cmds are fully built */ 6619 flush_workqueue(h->resubmit_wq); /* ensure all cmds are fully built */
6605 for (i = 0; i < h->nr_cmds; i++) { 6620 for (i = 0; i < h->nr_cmds; i++) {
6606 if (!test_bit(i & (BITS_PER_LONG - 1),
6607 h->cmd_pool_bits + (i / BITS_PER_LONG)))
6608 continue;
6609 c = h->cmd_pool + i; 6621 c = h->cmd_pool + i;
6610 c->err_info->CommandStatus = CMD_HARDWARE_ERR; 6622 refcount = atomic_inc_return(&c->refcount);
6611 finish_cmd(c); 6623 if (refcount > 1) {
6624 c->err_info->CommandStatus = CMD_HARDWARE_ERR;
6625 finish_cmd(c);
6626 }
6627 cmd_free(h, c);
6612 } 6628 }
6613} 6629}
6614 6630
@@ -6645,9 +6661,7 @@ static void controller_lockup_detected(struct ctlr_info *h)
6645 dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n", 6661 dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
6646 lockup_detected); 6662 lockup_detected);
6647 pci_disable_device(h->pdev); 6663 pci_disable_device(h->pdev);
6648 spin_lock_irqsave(&h->lock, flags);
6649 fail_all_outstanding_cmds(h); 6664 fail_all_outstanding_cmds(h);
6650 spin_unlock_irqrestore(&h->lock, flags);
6651} 6665}
6652 6666
6653static void detect_controller_lockup(struct ctlr_info *h) 6667static void detect_controller_lockup(struct ctlr_info *h)
@@ -7449,18 +7463,19 @@ static void hpsa_drain_accel_commands(struct ctlr_info *h)
7449{ 7463{
7450 struct CommandList *c = NULL; 7464 struct CommandList *c = NULL;
7451 int i, accel_cmds_out; 7465 int i, accel_cmds_out;
7466 int refcount;
7452 7467
7453 do { /* wait for all outstanding ioaccel commands to drain out */ 7468 do { /* wait for all outstanding ioaccel commands to drain out */
7454 accel_cmds_out = 0; 7469 accel_cmds_out = 0;
7455 for (i = 0; i < h->nr_cmds; i++) { 7470 for (i = 0; i < h->nr_cmds; i++) {
7456 if (!test_bit(i & (BITS_PER_LONG - 1),
7457 h->cmd_pool_bits + (i / BITS_PER_LONG)))
7458 continue;
7459 c = h->cmd_pool + i; 7471 c = h->cmd_pool + i;
7460 accel_cmds_out += is_accelerated_cmd(c); 7472 refcount = atomic_inc_return(&c->refcount);
7473 if (refcount > 1) /* Command is allocated */
7474 accel_cmds_out += is_accelerated_cmd(c);
7475 cmd_free(h, c);
7461 } 7476 }
7462 if (accel_cmds_out <= 0) 7477 if (accel_cmds_out <= 0)
7463 break; 7478 break;
7464 msleep(100); 7479 msleep(100);
7465 } while (1); 7480 } while (1);
7466} 7481}