aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi
diff options
context:
space:
mode:
authorStephen M. Cameron <scameron@beardog.cce.hp.com>2012-05-01 12:43:42 -0400
committerJames Bottomley <JBottomley@Parallels.com>2012-05-10 04:19:39 -0400
commite85c59746957fd6e3595d02cf614370056b5816e (patch)
tree2ddf892543ebd651eb98a7671d0398e07f7c160c /drivers/scsi
parent21334ea9086c31db38e76152a1e31001a0ed288a (diff)
[SCSI] hpsa: dial down lockup detection during firmware flash
Dial back the aggressiveness of the controller lockup detection thread. Currently it will declare the controller to be locked up if it goes for 10 seconds with no interrupts and no change in the heartbeat register. Dial back this to 30 seconds with no heartbeat change, and also snoop the ioctl path and if a firmware flash command is detected, dial it back further to 4 minutes until the firmware flash command completes. The reason for this is that during the firmware flash operation, the controller apparently doesn't update the heartbeat register as frequently as it is supposed to, and we can get a false positive. Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Diffstat (limited to 'drivers/scsi')
-rw-r--r--drivers/scsi/hpsa.c39
-rw-r--r--drivers/scsi/hpsa.h2
-rw-r--r--drivers/scsi/hpsa_cmd.h1
3 files changed, 37 insertions, 5 deletions
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0f0aac9f2581..796482badf13 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -569,12 +569,42 @@ static void set_performant_mode(struct ctlr_info *h, struct CommandList *c)
569 } 569 }
570} 570}
571 571
572static int is_firmware_flash_cmd(u8 *cdb)
573{
574 return cdb[0] == BMIC_WRITE && cdb[6] == BMIC_FLASH_FIRMWARE;
575}
576
577/*
578 * During firmware flash, the heartbeat register may not update as frequently
579 * as it should. So we dial down lockup detection during firmware flash. and
580 * dial it back up when firmware flash completes.
581 */
582#define HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH (240 * HZ)
583#define HEARTBEAT_SAMPLE_INTERVAL (30 * HZ)
584static void dial_down_lockup_detection_during_fw_flash(struct ctlr_info *h,
585 struct CommandList *c)
586{
587 if (!is_firmware_flash_cmd(c->Request.CDB))
588 return;
589 atomic_inc(&h->firmware_flash_in_progress);
590 h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL_DURING_FLASH;
591}
592
593static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h,
594 struct CommandList *c)
595{
596 if (is_firmware_flash_cmd(c->Request.CDB) &&
597 atomic_dec_and_test(&h->firmware_flash_in_progress))
598 h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
599}
600
572static void enqueue_cmd_and_start_io(struct ctlr_info *h, 601static void enqueue_cmd_and_start_io(struct ctlr_info *h,
573 struct CommandList *c) 602 struct CommandList *c)
574{ 603{
575 unsigned long flags; 604 unsigned long flags;
576 605
577 set_performant_mode(h, c); 606 set_performant_mode(h, c);
607 dial_down_lockup_detection_during_fw_flash(h, c);
578 spin_lock_irqsave(&h->lock, flags); 608 spin_lock_irqsave(&h->lock, flags);
579 addQ(&h->reqQ, c); 609 addQ(&h->reqQ, c);
580 h->Qdepth++; 610 h->Qdepth++;
@@ -3385,6 +3415,7 @@ static inline void finish_cmd(struct CommandList *c)
3385 spin_lock_irqsave(&c->h->lock, flags); 3415 spin_lock_irqsave(&c->h->lock, flags);
3386 removeQ(c); 3416 removeQ(c);
3387 spin_unlock_irqrestore(&c->h->lock, flags); 3417 spin_unlock_irqrestore(&c->h->lock, flags);
3418 dial_up_lockup_detection_on_fw_flash_complete(c->h, c);
3388 if (likely(c->cmd_type == CMD_SCSI)) 3419 if (likely(c->cmd_type == CMD_SCSI))
3389 complete_scsi_command(c); 3420 complete_scsi_command(c);
3390 else if (c->cmd_type == CMD_IOCTL_PEND) 3421 else if (c->cmd_type == CMD_IOCTL_PEND)
@@ -4562,9 +4593,6 @@ static void controller_lockup_detected(struct ctlr_info *h)
4562 spin_unlock_irqrestore(&h->lock, flags); 4593 spin_unlock_irqrestore(&h->lock, flags);
4563} 4594}
4564 4595
4565#define HEARTBEAT_SAMPLE_INTERVAL (10 * HZ)
4566#define HEARTBEAT_CHECK_MINIMUM_INTERVAL (HEARTBEAT_SAMPLE_INTERVAL / 2)
4567
4568static void detect_controller_lockup(struct ctlr_info *h) 4596static void detect_controller_lockup(struct ctlr_info *h)
4569{ 4597{
4570 u64 now; 4598 u64 now;
@@ -4575,7 +4603,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
4575 now = get_jiffies_64(); 4603 now = get_jiffies_64();
4576 /* If we've received an interrupt recently, we're ok. */ 4604 /* If we've received an interrupt recently, we're ok. */
4577 if (time_after64(h->last_intr_timestamp + 4605 if (time_after64(h->last_intr_timestamp +
4578 (HEARTBEAT_CHECK_MINIMUM_INTERVAL), now)) 4606 (h->heartbeat_sample_interval), now))
4579 return; 4607 return;
4580 4608
4581 /* 4609 /*
@@ -4584,7 +4612,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
4584 * otherwise don't care about signals in this thread. 4612 * otherwise don't care about signals in this thread.
4585 */ 4613 */
4586 if (time_after64(h->last_heartbeat_timestamp + 4614 if (time_after64(h->last_heartbeat_timestamp +
4587 (HEARTBEAT_CHECK_MINIMUM_INTERVAL), now)) 4615 (h->heartbeat_sample_interval), now))
4588 return; 4616 return;
4589 4617
4590 /* If heartbeat has not changed since we last looked, we're not ok. */ 4618 /* If heartbeat has not changed since we last looked, we're not ok. */
@@ -4626,6 +4654,7 @@ static void add_ctlr_to_lockup_detector_list(struct ctlr_info *h)
4626{ 4654{
4627 unsigned long flags; 4655 unsigned long flags;
4628 4656
4657 h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
4629 spin_lock_irqsave(&lockup_detector_lock, flags); 4658 spin_lock_irqsave(&lockup_detector_lock, flags);
4630 list_add_tail(&h->lockup_list, &hpsa_ctlr_list); 4659 list_add_tail(&h->lockup_list, &hpsa_ctlr_list);
4631 spin_unlock_irqrestore(&lockup_detector_lock, flags); 4660 spin_unlock_irqrestore(&lockup_detector_lock, flags);
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index fb51ef7d48cc..981647989bfd 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -129,6 +129,8 @@ struct ctlr_info {
129 u64 last_intr_timestamp; 129 u64 last_intr_timestamp;
130 u32 last_heartbeat; 130 u32 last_heartbeat;
131 u64 last_heartbeat_timestamp; 131 u64 last_heartbeat_timestamp;
132 u32 heartbeat_sample_interval;
133 atomic_t firmware_flash_in_progress;
132 u32 lockup_detected; 134 u32 lockup_detected;
133 struct list_head lockup_list; 135 struct list_head lockup_list;
134 /* Address of h->q[x] is passed to intr handler to know which queue */ 136 /* Address of h->q[x] is passed to intr handler to know which queue */
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 43f163164b24..a894f2eca7ac 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -186,6 +186,7 @@ struct SenseSubsystem_info {
186#define BMIC_WRITE 0x27 186#define BMIC_WRITE 0x27
187#define BMIC_CACHE_FLUSH 0xc2 187#define BMIC_CACHE_FLUSH 0xc2
188#define HPSA_CACHE_FLUSH 0x01 /* C2 was already being used by HPSA */ 188#define HPSA_CACHE_FLUSH 0x01 /* C2 was already being used by HPSA */
189#define BMIC_FLASH_FIRMWARE 0xF7
189 190
190/* Command List Structure */ 191/* Command List Structure */
191union SCSI3Addr { 192union SCSI3Addr {