aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Haverkamp <markh@linux-foundation.org>2007-03-15 13:27:45 -0400
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2007-03-20 11:56:03 -0400
commit03d4433721880bf1972c924b168e4e1dd3c59d53 (patch)
tree820ad2f8aea551d03e7ff8751aba7dfe1f674bcd
parentf2b1a06ad46209c6e631e3099138d1fa3f14d3a8 (diff)
[SCSI] aacraid: Improved error handling
Received from Mark Salyzyn, This set of fixes improve error handling stability of the driver. A popular manifestation of the problems is an NULL pointer reference in the interrupt handler when referencing portions of the scsi command context, or in the scsi_done handling when an offlined device is referenced. The aacraid driver currently does not get notification of orphaned command completions due to devices going offline. The driver also fails to handle the commands that are finished by the error handler, and thus can complete again later at the hands of the adapter causing situations of completion of an invalid scsi command context. Test Unit Ready calls abort assuming that the abort was successful, but are not, and thus when the interrupt from the adapter occurs, they reference invalid command contexts. We add in a TIMED_OUT flag to inform the aacraid FIB context that the interrupt service should merely release the driver resources and not complete the command up. We take advantage of this with the abort handler as well for select abortable commands. And we detect and react if a command that can not be aborted is currently still outstanding to the controller when reissued by the retry mechanism. Signed-off-by: Mark Haverkamp <markh@linux-foundation.org> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
-rw-r--r--drivers/scsi/aacraid/aachba.c51
-rw-r--r--drivers/scsi/aacraid/aacraid.h1
-rw-r--r--drivers/scsi/aacraid/commsup.c23
-rw-r--r--drivers/scsi/aacraid/dpcsup.c24
-rw-r--r--drivers/scsi/aacraid/linit.c64
5 files changed, 130 insertions, 33 deletions
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 3d21d7dd2e5f..0c4e27eb6520 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -5,7 +5,7 @@
5 * based on the old aacraid driver that is.. 5 * based on the old aacraid driver that is..
6 * Adaptec aacraid device driver for Linux. 6 * Adaptec aacraid device driver for Linux.
7 * 7 *
8 * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com) 8 * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 11 * it under the terms of the GNU General Public License as published by
@@ -172,6 +172,30 @@ MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size.
172int expose_physicals = -1; 172int expose_physicals = -1;
173module_param(expose_physicals, int, S_IRUGO|S_IWUSR); 173module_param(expose_physicals, int, S_IRUGO|S_IWUSR);
174MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on"); 174MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on");
175
176
177static inline int aac_valid_context(struct scsi_cmnd *scsicmd,
178 struct fib *fibptr) {
179 struct scsi_device *device;
180
181 if (unlikely(!scsicmd || !scsicmd->scsi_done )) {
182 dprintk((KERN_WARNING "aac_valid_context: scsi command corrupt\n"))
183;
184 aac_fib_complete(fibptr);
185 aac_fib_free(fibptr);
186 return 0;
187 }
188 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL;
189 device = scsicmd->device;
190 if (unlikely(!device || !scsi_device_online(device))) {
191 dprintk((KERN_WARNING "aac_valid_context: scsi device corrupt\n"));
192 aac_fib_complete(fibptr);
193 aac_fib_free(fibptr);
194 return 0;
195 }
196 return 1;
197}
198
175/** 199/**
176 * aac_get_config_status - check the adapter configuration 200 * aac_get_config_status - check the adapter configuration
177 * @common: adapter to query 201 * @common: adapter to query
@@ -342,6 +366,9 @@ static void get_container_name_callback(void *context, struct fib * fibptr)
342 scsicmd = (struct scsi_cmnd *) context; 366 scsicmd = (struct scsi_cmnd *) context;
343 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; 367 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL;
344 368
369 if (!aac_valid_context(scsicmd, fibptr))
370 return;
371
345 dprintk((KERN_DEBUG "get_container_name_callback[cpu %d]: t = %ld.\n", smp_processor_id(), jiffies)); 372 dprintk((KERN_DEBUG "get_container_name_callback[cpu %d]: t = %ld.\n", smp_processor_id(), jiffies));
346 BUG_ON(fibptr == NULL); 373 BUG_ON(fibptr == NULL);
347 374
@@ -431,9 +458,14 @@ static int aac_probe_container_callback2(struct scsi_cmnd * scsicmd)
431 458
432static int _aac_probe_container2(void * context, struct fib * fibptr) 459static int _aac_probe_container2(void * context, struct fib * fibptr)
433{ 460{
434 struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context; 461 struct fsa_dev_info *fsa_dev_ptr;
435 struct fsa_dev_info *fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev;
436 int (*callback)(struct scsi_cmnd *); 462 int (*callback)(struct scsi_cmnd *);
463 struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context;
464
465 if (!aac_valid_context(scsicmd, fibptr))
466 return 0;
467
468 fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev;
437 469
438 scsicmd->SCp.Status = 0; 470 scsicmd->SCp.Status = 0;
439 if (fsa_dev_ptr) { 471 if (fsa_dev_ptr) {
@@ -477,6 +509,9 @@ static int _aac_probe_container1(void * context, struct fib * fibptr)
477 scsicmd = (struct scsi_cmnd *) context; 509 scsicmd = (struct scsi_cmnd *) context;
478 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; 510 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL;
479 511
512 if (!aac_valid_context(scsicmd, fibptr))
513 return 0;
514
480 aac_fib_init(fibptr); 515 aac_fib_init(fibptr);
481 516
482 dinfo = (struct aac_query_mount *)fib_data(fibptr); 517 dinfo = (struct aac_query_mount *)fib_data(fibptr);
@@ -1287,6 +1322,9 @@ static void io_callback(void *context, struct fib * fibptr)
1287 scsicmd = (struct scsi_cmnd *) context; 1322 scsicmd = (struct scsi_cmnd *) context;
1288 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; 1323 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL;
1289 1324
1325 if (!aac_valid_context(scsicmd, fibptr))
1326 return;
1327
1290 dev = (struct aac_dev *)scsicmd->device->host->hostdata; 1328 dev = (struct aac_dev *)scsicmd->device->host->hostdata;
1291 cid = scmd_id(scsicmd); 1329 cid = scmd_id(scsicmd);
1292 1330
@@ -1534,6 +1572,9 @@ static void synchronize_callback(void *context, struct fib *fibptr)
1534 cmd = context; 1572 cmd = context;
1535 cmd->SCp.phase = AAC_OWNER_MIDLEVEL; 1573 cmd->SCp.phase = AAC_OWNER_MIDLEVEL;
1536 1574
1575 if (!aac_valid_context(cmd, fibptr))
1576 return;
1577
1537 dprintk((KERN_DEBUG "synchronize_callback[cpu %d]: t = %ld.\n", 1578 dprintk((KERN_DEBUG "synchronize_callback[cpu %d]: t = %ld.\n",
1538 smp_processor_id(), jiffies)); 1579 smp_processor_id(), jiffies));
1539 BUG_ON(fibptr == NULL); 1580 BUG_ON(fibptr == NULL);
@@ -2086,6 +2127,10 @@ static void aac_srb_callback(void *context, struct fib * fibptr)
2086 2127
2087 scsicmd = (struct scsi_cmnd *) context; 2128 scsicmd = (struct scsi_cmnd *) context;
2088 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; 2129 scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL;
2130
2131 if (!aac_valid_context(scsicmd, fibptr))
2132 return;
2133
2089 dev = (struct aac_dev *)scsicmd->device->host->hostdata; 2134 dev = (struct aac_dev *)scsicmd->device->host->hostdata;
2090 2135
2091 BUG_ON(fibptr == NULL); 2136 BUG_ON(fibptr == NULL);
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 9ca594b33d1c..2c0994b448d8 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -971,7 +971,6 @@ struct aac_dev
971 struct fib *fibs; 971 struct fib *fibs;
972 972
973 struct fib *free_fib; 973 struct fib *free_fib;
974 struct fib *timeout_fib;
975 spinlock_t fib_lock; 974 spinlock_t fib_lock;
976 975
977 struct aac_queue_block *queues; 976 struct aac_queue_block *queues;
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index c933df30f589..a23d7e5de5b0 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -5,7 +5,7 @@
5 * based on the old aacraid driver that is.. 5 * based on the old aacraid driver that is..
6 * Adaptec aacraid device driver for Linux. 6 * Adaptec aacraid device driver for Linux.
7 * 7 *
8 * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com) 8 * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 11 * it under the terms of the GNU General Public License as published by
@@ -178,7 +178,6 @@ struct fib *aac_fib_alloc(struct aac_dev *dev)
178 * @fibptr: fib to free up 178 * @fibptr: fib to free up
179 * 179 *
180 * Frees up a fib and places it on the appropriate queue 180 * Frees up a fib and places it on the appropriate queue
181 * (either free or timed out)
182 */ 181 */
183 182
184void aac_fib_free(struct fib *fibptr) 183void aac_fib_free(struct fib *fibptr)
@@ -186,19 +185,15 @@ void aac_fib_free(struct fib *fibptr)
186 unsigned long flags; 185 unsigned long flags;
187 186
188 spin_lock_irqsave(&fibptr->dev->fib_lock, flags); 187 spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
189 if (fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT) { 188 if (unlikely(fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT))
190 aac_config.fib_timeouts++; 189 aac_config.fib_timeouts++;
191 fibptr->next = fibptr->dev->timeout_fib; 190 if (fibptr->hw_fib_va->header.XferState != 0) {
192 fibptr->dev->timeout_fib = fibptr; 191 printk(KERN_WARNING "aac_fib_free, XferState != 0, fibptr = 0x%p, XferState = 0x%x\n",
193 } else { 192 (void*)fibptr,
194 if (fibptr->hw_fib_va->header.XferState != 0) { 193 le32_to_cpu(fibptr->hw_fib_va->header.XferState));
195 printk(KERN_WARNING "aac_fib_free, XferState != 0, fibptr = 0x%p, XferState = 0x%x\n", 194 }
196 (void*)fibptr, 195 fibptr->next = fibptr->dev->free_fib;
197 le32_to_cpu(fibptr->hw_fib_va->header.XferState)); 196 fibptr->dev->free_fib = fibptr;
198 }
199 fibptr->next = fibptr->dev->free_fib;
200 fibptr->dev->free_fib = fibptr;
201 }
202 spin_unlock_irqrestore(&fibptr->dev->fib_lock, flags); 197 spin_unlock_irqrestore(&fibptr->dev->fib_lock, flags);
203} 198}
204 199
diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c
index 26f4d563d959..4e53f9db1b2c 100644
--- a/drivers/scsi/aacraid/dpcsup.c
+++ b/drivers/scsi/aacraid/dpcsup.c
@@ -5,7 +5,7 @@
5 * based on the old aacraid driver that is.. 5 * based on the old aacraid driver that is..
6 * Adaptec aacraid device driver for Linux. 6 * Adaptec aacraid device driver for Linux.
7 * 7 *
8 * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com) 8 * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 11 * it under the terms of the GNU General Public License as published by
@@ -84,11 +84,13 @@ unsigned int aac_response_normal(struct aac_queue * q)
84 * continue. The caller has already been notified that 84 * continue. The caller has already been notified that
85 * the fib timed out. 85 * the fib timed out.
86 */ 86 */
87 if (!(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) 87 dev->queues->queue[AdapNormCmdQueue].numpending--;
88 dev->queues->queue[AdapNormCmdQueue].numpending--; 88
89 else { 89 if (unlikely(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
90 printk(KERN_WARNING "aacraid: FIB timeout (%x).\n", fib->flags); 90 spin_unlock_irqrestore(q->lock, flags);
91 printk(KERN_DEBUG"aacraid: hwfib=%p fib index=%i fib=%p\n",hwfib, hwfib->header.SenderData,fib); 91 aac_fib_complete(fib);
92 aac_fib_free(fib);
93 spin_lock_irqsave(q->lock, flags);
92 continue; 94 continue;
93 } 95 }
94 spin_unlock_irqrestore(q->lock, flags); 96 spin_unlock_irqrestore(q->lock, flags);
@@ -281,14 +283,14 @@ unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index)
281 * continue. The caller has already been notified that 283 * continue. The caller has already been notified that
282 * the fib timed out. 284 * the fib timed out.
283 */ 285 */
284 if ((fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) { 286 dev->queues->queue[AdapNormCmdQueue].numpending--;
285 printk(KERN_WARNING "aacraid: FIB timeout (%x).\n", fib->flags); 287
286 printk(KERN_DEBUG"aacraid: hwfib=%p index=%i fib=%p\n",hwfib, hwfib->header.SenderData,fib); 288 if (unlikely(fib->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) {
289 aac_fib_complete(fib);
290 aac_fib_free(fib);
287 return 0; 291 return 0;
288 } 292 }
289 293
290 dev->queues->queue[AdapNormCmdQueue].numpending--;
291
292 if (fast) { 294 if (fast) {
293 /* 295 /*
294 * Doctor the fib 296 * Doctor the fib
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 0f948c2fb609..3cf3f6472e94 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -5,7 +5,7 @@
5 * based on the old aacraid driver that is.. 5 * based on the old aacraid driver that is..
6 * Adaptec aacraid device driver for Linux. 6 * Adaptec aacraid device driver for Linux.
7 * 7 *
8 * Copyright (c) 2000 Adaptec, Inc. (aacraid@adaptec.com) 8 * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 11 * it under the terms of the GNU General Public License as published by
@@ -247,6 +247,19 @@ static struct aac_driver_ident aac_drivers[] = {
247 247
248static int aac_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) 248static int aac_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
249{ 249{
250 struct Scsi_Host *host = cmd->device->host;
251 struct aac_dev *dev = (struct aac_dev *)host->hostdata;
252 u32 count = 0;
253 cmd->scsi_done = done;
254 for (; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
255 struct fib * fib = &dev->fibs[count];
256 struct scsi_cmnd * command;
257 if (fib->hw_fib_va->header.XferState &&
258 ((command = fib->callback_data)) &&
259 (command == cmd) &&
260 (cmd->SCp.phase == AAC_OWNER_FIRMWARE))
261 return 0; /* Already owned by Adapter */
262 }
250 cmd->scsi_done = done; 263 cmd->scsi_done = done;
251 cmd->SCp.phase = AAC_OWNER_LOWLEVEL; 264 cmd->SCp.phase = AAC_OWNER_LOWLEVEL;
252 return (aac_scsi_cmd(cmd) ? FAILED : 0); 265 return (aac_scsi_cmd(cmd) ? FAILED : 0);
@@ -446,6 +459,40 @@ static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg)
446 return aac_do_ioctl(dev, cmd, arg); 459 return aac_do_ioctl(dev, cmd, arg);
447} 460}
448 461
462static int aac_eh_abort(struct scsi_cmnd* cmd)
463{
464 struct Scsi_Host * host = cmd->device->host;
465 struct aac_dev * aac = (struct aac_dev *)host->hostdata;
466 int count;
467 int ret = FAILED;
468
469 printk(KERN_ERR "%s: Host adapter abort request (%d,%d,%d,%d)\n",
470 AAC_DRIVERNAME,
471 cmd->device->host->host_no, sdev_channel(cmd->device),
472 sdev_id(cmd->device), cmd->device->lun);
473 switch (cmd->cmnd[0]) {
474 case SERVICE_ACTION_IN:
475 if (!(aac->raw_io_interface) ||
476 !(aac->raw_io_64) ||
477 ((cmd->cmnd[1] & 0x1f) != SAI_READ_CAPACITY_16))
478 break;
479 case INQUIRY:
480 case READ_CAPACITY:
481 case TEST_UNIT_READY:
482 /* Mark associated FIB to not complete, eh handler does this */
483 for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
484 struct fib * fib = &aac->fibs[count];
485 if (fib->hw_fib_va->header.XferState &&
486 (fib->callback_data == cmd)) {
487 fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
488 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
489 ret = SUCCESS;
490 }
491 }
492 }
493 return ret;
494}
495
449/* 496/*
450 * aac_eh_reset - Reset command handling 497 * aac_eh_reset - Reset command handling
451 * @scsi_cmd: SCSI command block causing the reset 498 * @scsi_cmd: SCSI command block causing the reset
@@ -457,12 +504,20 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
457 struct Scsi_Host * host = dev->host; 504 struct Scsi_Host * host = dev->host;
458 struct scsi_cmnd * command; 505 struct scsi_cmnd * command;
459 int count; 506 int count;
460 struct aac_dev * aac; 507 struct aac_dev * aac = (struct aac_dev *)host->hostdata;
461 unsigned long flags; 508 unsigned long flags;
462 509
510 /* Mark the associated FIB to not complete, eh handler does this */
511 for (count = 0; count < (host->can_queue + AAC_NUM_MGT_FIB); ++count) {
512 struct fib * fib = &aac->fibs[count];
513 if (fib->hw_fib_va->header.XferState &&
514 (fib->callback_data == cmd)) {
515 fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
516 cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
517 }
518 }
463 printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n", 519 printk(KERN_ERR "%s: Host adapter reset request. SCSI hang ?\n",
464 AAC_DRIVERNAME); 520 AAC_DRIVERNAME);
465 aac = (struct aac_dev *)host->hostdata;
466 521
467 if ((count = aac_check_health(aac))) 522 if ((count = aac_check_health(aac)))
468 return count; 523 return count;
@@ -496,7 +551,7 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
496 ssleep(1); 551 ssleep(1);
497 } 552 }
498 printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME); 553 printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
499 return -ETIMEDOUT; 554 return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
500} 555}
501 556
502/** 557/**
@@ -796,6 +851,7 @@ static struct scsi_host_template aac_driver_template = {
796 .bios_param = aac_biosparm, 851 .bios_param = aac_biosparm,
797 .shost_attrs = aac_attrs, 852 .shost_attrs = aac_attrs,
798 .slave_configure = aac_slave_configure, 853 .slave_configure = aac_slave_configure,
854 .eh_abort_handler = aac_eh_abort,
799 .eh_host_reset_handler = aac_eh_reset, 855 .eh_host_reset_handler = aac_eh_reset,
800 .can_queue = AAC_NUM_IO_FIB, 856 .can_queue = AAC_NUM_IO_FIB,
801 .this_id = MAXIMUM_NUM_CONTAINERS, 857 .this_id = MAXIMUM_NUM_CONTAINERS,