aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/aacraid/commsup.c
diff options
context:
space:
mode:
authorMark Haverkamp <markh@osdl.org>2006-08-03 11:03:30 -0400
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-08-19 16:35:11 -0400
commit8c867b257d159ca04602d7087fa29f846785f9ea (patch)
tree4c95fc920744556d718b8d63371a6b525524cfbe /drivers/scsi/aacraid/commsup.c
parent90ee346651524eb275405d410f5d3bb6765a2d93 (diff)
[SCSI] aacraid: Reset adapter in recovery timeout
Received from Mark Salyzyn If the adapter is in blinkled (Firmware Assert) when error recovery timeout actions have been triggered, perform an adapter warm reset and restart the initialization. Signed-off-by: Mark Haverkamp <markh@osdl.org> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'drivers/scsi/aacraid/commsup.c')
-rw-r--r--drivers/scsi/aacraid/commsup.c258
1 files changed, 258 insertions, 0 deletions
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index c67da1321133..53add53be0bd 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -40,8 +40,10 @@
40#include <linux/blkdev.h> 40#include <linux/blkdev.h>
41#include <linux/delay.h> 41#include <linux/delay.h>
42#include <linux/kthread.h> 42#include <linux/kthread.h>
43#include <scsi/scsi.h>
43#include <scsi/scsi_host.h> 44#include <scsi/scsi_host.h>
44#include <scsi/scsi_device.h> 45#include <scsi/scsi_device.h>
46#include <scsi/scsi_cmnd.h>
45#include <asm/semaphore.h> 47#include <asm/semaphore.h>
46 48
47#include "aacraid.h" 49#include "aacraid.h"
@@ -1054,6 +1056,262 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
1054 1056
1055} 1057}
1056 1058
1059static int _aac_reset_adapter(struct aac_dev *aac)
1060{
1061 int index, quirks;
1062 u32 ret;
1063 int retval;
1064 struct Scsi_Host *host;
1065 struct scsi_device *dev;
1066 struct scsi_cmnd *command;
1067 struct scsi_cmnd *command_list;
1068
1069 /*
1070 * Assumptions:
1071 * - host is locked.
1072 * - in_reset is asserted, so no new i/o is getting to the
1073 * card.
1074 * - The card is dead.
1075 */
1076 host = aac->scsi_host_ptr;
1077 scsi_block_requests(host);
1078 aac_adapter_disable_int(aac);
1079 spin_unlock_irq(host->host_lock);
1080 kthread_stop(aac->thread);
1081
1082 /*
1083 * If a positive health, means in a known DEAD PANIC
1084 * state and the adapter could be reset to `try again'.
1085 */
1086 retval = aac_adapter_check_health(aac);
1087 if (retval == 0)
1088 retval = aac_adapter_sync_cmd(aac, IOP_RESET_ALWAYS,
1089 0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL);
1090 if (retval)
1091 retval = aac_adapter_sync_cmd(aac, IOP_RESET,
1092 0, 0, 0, 0, 0, 0, &ret, NULL, NULL, NULL, NULL);
1093
1094 if (retval)
1095 goto out;
1096 if (ret != 0x00000001) {
1097 retval = -ENODEV;
1098 goto out;
1099 }
1100
1101 index = aac->cardtype;
1102
1103 /*
1104 * Re-initialize the adapter, first free resources, then carefully
1105 * apply the initialization sequence to come back again. Only risk
1106 * is a change in Firmware dropping cache, it is assumed the caller
1107 * will ensure that i/o is queisced and the card is flushed in that
1108 * case.
1109 */
1110 aac_fib_map_free(aac);
1111 aac->hw_fib_va = NULL;
1112 aac->hw_fib_pa = 0;
1113 pci_free_consistent(aac->pdev, aac->comm_size, aac->comm_addr, aac->comm_phys);
1114 aac->comm_addr = NULL;
1115 aac->comm_phys = 0;
1116 kfree(aac->queues);
1117 aac->queues = NULL;
1118 free_irq(aac->pdev->irq, aac);
1119 kfree(aac->fsa_dev);
1120 aac->fsa_dev = NULL;
1121 if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) {
1122 if (((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) ||
1123 ((retval = pci_set_consistent_dma_mask(aac->pdev, DMA_32BIT_MASK))))
1124 goto out;
1125 } else {
1126 if (((retval = pci_set_dma_mask(aac->pdev, 0x7FFFFFFFULL))) ||
1127 ((retval = pci_set_consistent_dma_mask(aac->pdev, 0x7FFFFFFFULL))))
1128 goto out;
1129 }
1130 if ((retval = (*(aac_get_driver_ident(index)->init))(aac)))
1131 goto out;
1132 if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT)
1133 if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK)))
1134 goto out;
1135 aac->thread = kthread_run(aac_command_thread, aac, aac->name);
1136 if (IS_ERR(aac->thread)) {
1137 retval = PTR_ERR(aac->thread);
1138 goto out;
1139 }
1140 (void)aac_get_adapter_info(aac);
1141 quirks = aac_get_driver_ident(index)->quirks;
1142 if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) {
1143 host->sg_tablesize = 34;
1144 host->max_sectors = (host->sg_tablesize * 8) + 112;
1145 }
1146 if ((quirks & AAC_QUIRK_17SG) && (host->sg_tablesize > 17)) {
1147 host->sg_tablesize = 17;
1148 host->max_sectors = (host->sg_tablesize * 8) + 112;
1149 }
1150 aac_get_config_status(aac, 1);
1151 aac_get_containers(aac);
1152 /*
1153 * This is where the assumption that the Adapter is quiesced
1154 * is important.
1155 */
1156 command_list = NULL;
1157 __shost_for_each_device(dev, host) {
1158 unsigned long flags;
1159 spin_lock_irqsave(&dev->list_lock, flags);
1160 list_for_each_entry(command, &dev->cmd_list, list)
1161 if (command->SCp.phase == AAC_OWNER_FIRMWARE) {
1162 command->SCp.buffer = (struct scatterlist *)command_list;
1163 command_list = command;
1164 }
1165 spin_unlock_irqrestore(&dev->list_lock, flags);
1166 }
1167 while ((command = command_list)) {
1168 command_list = (struct scsi_cmnd *)command->SCp.buffer;
1169 command->SCp.buffer = NULL;
1170 command->result = DID_OK << 16
1171 | COMMAND_COMPLETE << 8
1172 | SAM_STAT_TASK_SET_FULL;
1173 command->SCp.phase = AAC_OWNER_ERROR_HANDLER;
1174 command->scsi_done(command);
1175 }
1176 retval = 0;
1177
1178out:
1179 aac->in_reset = 0;
1180 scsi_unblock_requests(host);
1181 spin_lock_irq(host->host_lock);
1182 return retval;
1183}
1184
1185int aac_check_health(struct aac_dev * aac)
1186{
1187 int BlinkLED;
1188 unsigned long time_now, flagv = 0;
1189 struct list_head * entry;
1190 struct Scsi_Host * host;
1191
1192 /* Extending the scope of fib_lock slightly to protect aac->in_reset */
1193 if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0)
1194 return 0;
1195
1196 if (aac->in_reset || !(BlinkLED = aac_adapter_check_health(aac))) {
1197 spin_unlock_irqrestore(&aac->fib_lock, flagv);
1198 return 0; /* OK */
1199 }
1200
1201 aac->in_reset = 1;
1202
1203 /* Fake up an AIF:
1204 * aac_aifcmd.command = AifCmdEventNotify = 1
1205 * aac_aifcmd.seqnum = 0xFFFFFFFF
1206 * aac_aifcmd.data[0] = AifEnExpEvent = 23
1207 * aac_aifcmd.data[1] = AifExeFirmwarePanic = 3
1208 * aac.aifcmd.data[2] = AifHighPriority = 3
1209 * aac.aifcmd.data[3] = BlinkLED
1210 */
1211
1212 time_now = jiffies/HZ;
1213 entry = aac->fib_list.next;
1214
1215 /*
1216 * For each Context that is on the
1217 * fibctxList, make a copy of the
1218 * fib, and then set the event to wake up the
1219 * thread that is waiting for it.
1220 */
1221 while (entry != &aac->fib_list) {
1222 /*
1223 * Extract the fibctx
1224 */
1225 struct aac_fib_context *fibctx = list_entry(entry, struct aac_fib_context, next);
1226 struct hw_fib * hw_fib;
1227 struct fib * fib;
1228 /*
1229 * Check if the queue is getting
1230 * backlogged
1231 */
1232 if (fibctx->count > 20) {
1233 /*
1234 * It's *not* jiffies folks,
1235 * but jiffies / HZ, so do not
1236 * panic ...
1237 */
1238 u32 time_last = fibctx->jiffies;
1239 /*
1240 * Has it been > 2 minutes
1241 * since the last read off
1242 * the queue?
1243 */
1244 if ((time_now - time_last) > aif_timeout) {
1245 entry = entry->next;
1246 aac_close_fib_context(aac, fibctx);
1247 continue;
1248 }
1249 }
1250 /*
1251 * Warning: no sleep allowed while
1252 * holding spinlock
1253 */
1254 hw_fib = kmalloc(sizeof(struct hw_fib), GFP_ATOMIC);
1255 fib = kmalloc(sizeof(struct fib), GFP_ATOMIC);
1256 if (fib && hw_fib) {
1257 struct aac_aifcmd * aif;
1258
1259 memset(hw_fib, 0, sizeof(struct hw_fib));
1260 memset(fib, 0, sizeof(struct fib));
1261 fib->hw_fib = hw_fib;
1262 fib->dev = aac;
1263 aac_fib_init(fib);
1264 fib->type = FSAFS_NTC_FIB_CONTEXT;
1265 fib->size = sizeof (struct fib);
1266 fib->data = hw_fib->data;
1267 aif = (struct aac_aifcmd *)hw_fib->data;
1268 aif->command = cpu_to_le32(AifCmdEventNotify);
1269 aif->seqnum = cpu_to_le32(0xFFFFFFFF);
1270 aif->data[0] = cpu_to_le32(AifEnExpEvent);
1271 aif->data[1] = cpu_to_le32(AifExeFirmwarePanic);
1272 aif->data[2] = cpu_to_le32(AifHighPriority);
1273 aif->data[3] = cpu_to_le32(BlinkLED);
1274
1275 /*
1276 * Put the FIB onto the
1277 * fibctx's fibs
1278 */
1279 list_add_tail(&fib->fiblink, &fibctx->fib_list);
1280 fibctx->count++;
1281 /*
1282 * Set the event to wake up the
1283 * thread that will waiting.
1284 */
1285 up(&fibctx->wait_sem);
1286 } else {
1287 printk(KERN_WARNING "aifd: didn't allocate NewFib.\n");
1288 kfree(fib);
1289 kfree(hw_fib);
1290 }
1291 entry = entry->next;
1292 }
1293
1294 spin_unlock_irqrestore(&aac->fib_lock, flagv);
1295
1296 if (BlinkLED < 0) {
1297 printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED);
1298 goto out;
1299 }
1300
1301 printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
1302
1303 host = aac->scsi_host_ptr;
1304 spin_lock_irqsave(host->host_lock, flagv);
1305 BlinkLED = _aac_reset_adapter(aac);
1306 spin_unlock_irqrestore(host->host_lock, flagv);
1307 return BlinkLED;
1308
1309out:
1310 aac->in_reset = 0;
1311 return BlinkLED;
1312}
1313
1314
1057/** 1315/**
1058 * aac_command_thread - command processing thread 1316 * aac_command_thread - command processing thread
1059 * @dev: Adapter to monitor 1317 * @dev: Adapter to monitor