diff options
author | Stephen M. Cameron <scameron@beardog.cce.hp.com> | 2012-05-01 12:43:42 -0400 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2012-05-10 04:19:39 -0400 |
commit | e85c59746957fd6e3595d02cf614370056b5816e (patch) | |
tree | 2ddf892543ebd651eb98a7671d0398e07f7c160c /drivers/scsi/hpsa.c | |
parent | 21334ea9086c31db38e76152a1e31001a0ed288a (diff) |
[SCSI] hpsa: dial down lockup detection during firmware flash
Dial back the aggressiveness of the controller lockup detection thread.
Currently it will declare the controller to be locked up if it goes
for 10 seconds with no interrupts and no change in the heartbeat
register. Dial back this to 30 seconds with no heartbeat change, and
also snoop the ioctl path and if a firmware flash command is detected,
dial it back further to 4 minutes until the firmware flash command
completes. The reason for this is that during the firmware flash
operation, the controller apparently doesn't update the heartbeat
register as frequently as it is supposed to, and we can get a false
positive.
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Diffstat (limited to 'drivers/scsi/hpsa.c')
-rw-r--r-- | drivers/scsi/hpsa.c | 39 |
1 files changed, 34 insertions, 5 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 0f0aac9f2581..796482badf13 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c | |||
@@ -569,12 +569,42 @@ static void set_performant_mode(struct ctlr_info *h, struct CommandList *c) | |||
569 | } | 569 | } |
570 | } | 570 | } |
571 | 571 | ||
572 | static int is_firmware_flash_cmd(u8 *cdb) | ||
573 | { | ||
574 | return cdb[0] == BMIC_WRITE && cdb[6] == BMIC_FLASH_FIRMWARE; | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * During firmware flash, the heartbeat register may not update as frequently | ||
579 | * as it should. So we dial down lockup detection during firmware flash. and | ||
580 | * dial it back up when firmware flash completes. | ||
581 | */ | ||
582 | #define HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH (240 * HZ) | ||
583 | #define HEARTBEAT_SAMPLE_INTERVAL (30 * HZ) | ||
584 | static void dial_down_lockup_detection_during_fw_flash(struct ctlr_info *h, | ||
585 | struct CommandList *c) | ||
586 | { | ||
587 | if (!is_firmware_flash_cmd(c->Request.CDB)) | ||
588 | return; | ||
589 | atomic_inc(&h->firmware_flash_in_progress); | ||
590 | h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH; | ||
591 | } | ||
592 | |||
593 | static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, | ||
594 | struct CommandList *c) | ||
595 | { | ||
596 | if (is_firmware_flash_cmd(c->Request.CDB) && | ||
597 | atomic_dec_and_test(&h->firmware_flash_in_progress)) | ||
598 | h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL; | ||
599 | } | ||
600 | |||
572 | static void enqueue_cmd_and_start_io(struct ctlr_info *h, | 601 | static void enqueue_cmd_and_start_io(struct ctlr_info *h, |
573 | struct CommandList *c) | 602 | struct CommandList *c) |
574 | { | 603 | { |
575 | unsigned long flags; | 604 | unsigned long flags; |
576 | 605 | ||
577 | set_performant_mode(h, c); | 606 | set_performant_mode(h, c); |
607 | dial_down_lockup_detection_during_fw_flash(h, c); | ||
578 | spin_lock_irqsave(&h->lock, flags); | 608 | spin_lock_irqsave(&h->lock, flags); |
579 | addQ(&h->reqQ, c); | 609 | addQ(&h->reqQ, c); |
580 | h->Qdepth++; | 610 | h->Qdepth++; |
@@ -3385,6 +3415,7 @@ static inline void finish_cmd(struct CommandList *c) | |||
3385 | spin_lock_irqsave(&c->h->lock, flags); | 3415 | spin_lock_irqsave(&c->h->lock, flags); |
3386 | removeQ(c); | 3416 | removeQ(c); |
3387 | spin_unlock_irqrestore(&c->h->lock, flags); | 3417 | spin_unlock_irqrestore(&c->h->lock, flags); |
3418 | dial_up_lockup_detection_on_fw_flash_complete(c->h, c); | ||
3388 | if (likely(c->cmd_type == CMD_SCSI)) | 3419 | if (likely(c->cmd_type == CMD_SCSI)) |
3389 | complete_scsi_command(c); | 3420 | complete_scsi_command(c); |
3390 | else if (c->cmd_type == CMD_IOCTL_PEND) | 3421 | else if (c->cmd_type == CMD_IOCTL_PEND) |
@@ -4562,9 +4593,6 @@ static void controller_lockup_detected(struct ctlr_info *h) | |||
4562 | spin_unlock_irqrestore(&h->lock, flags); | 4593 | spin_unlock_irqrestore(&h->lock, flags); |
4563 | } | 4594 | } |
4564 | 4595 | ||
4565 | #define HEARTBEAT_SAMPLE_INTERVAL (10 * HZ) | ||
4566 | #define HEARTBEAT_CHECK_MINIMUM_INTERVAL (HEARTBEAT_SAMPLE_INTERVAL / 2) | ||
4567 | |||
4568 | static void detect_controller_lockup(struct ctlr_info *h) | 4596 | static void detect_controller_lockup(struct ctlr_info *h) |
4569 | { | 4597 | { |
4570 | u64 now; | 4598 | u64 now; |
@@ -4575,7 +4603,7 @@ static void detect_controller_lockup(struct ctlr_info *h) | |||
4575 | now = get_jiffies_64(); | 4603 | now = get_jiffies_64(); |
4576 | /* If we've received an interrupt recently, we're ok. */ | 4604 | /* If we've received an interrupt recently, we're ok. */ |
4577 | if (time_after64(h->last_intr_timestamp + | 4605 | if (time_after64(h->last_intr_timestamp + |
4578 | (HEARTBEAT_CHECK_MINIMUM_INTERVAL), now)) | 4606 | (h->heartbeat_sample_interval), now)) |
4579 | return; | 4607 | return; |
4580 | 4608 | ||
4581 | /* | 4609 | /* |
@@ -4584,7 +4612,7 @@ static void detect_controller_lockup(struct ctlr_info *h) | |||
4584 | * otherwise don't care about signals in this thread. | 4612 | * otherwise don't care about signals in this thread. |
4585 | */ | 4613 | */ |
4586 | if (time_after64(h->last_heartbeat_timestamp + | 4614 | if (time_after64(h->last_heartbeat_timestamp + |
4587 | (HEARTBEAT_CHECK_MINIMUM_INTERVAL), now)) | 4615 | (h->heartbeat_sample_interval), now)) |
4588 | return; | 4616 | return; |
4589 | 4617 | ||
4590 | /* If heartbeat has not changed since we last looked, we're not ok. */ | 4618 | /* If heartbeat has not changed since we last looked, we're not ok. */ |
@@ -4626,6 +4654,7 @@ static void add_ctlr_to_lockup_detector_list(struct ctlr_info *h) | |||
4626 | { | 4654 | { |
4627 | unsigned long flags; | 4655 | unsigned long flags; |
4628 | 4656 | ||
4657 | h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL; | ||
4629 | spin_lock_irqsave(&lockup_detector_lock, flags); | 4658 | spin_lock_irqsave(&lockup_detector_lock, flags); |
4630 | list_add_tail(&h->lockup_list, &hpsa_ctlr_list); | 4659 | list_add_tail(&h->lockup_list, &hpsa_ctlr_list); |
4631 | spin_unlock_irqrestore(&lockup_detector_lock, flags); | 4660 | spin_unlock_irqrestore(&lockup_detector_lock, flags); |