aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/aacraid/commsup.c
diff options
context:
space:
mode:
authorPenchala Narasimha Reddy Chilakala, ERS-HCLTech <narasimhareddyc@hcl.in>2009-12-21 08:09:27 -0500
committerJames Bottomley <James.Bottomley@suse.de>2010-01-17 13:16:17 -0500
commitcacb6dc3d7fea751879a225c15e48228415e6359 (patch)
treef0d1b3792febab8910274e15f0076053a825e392 /drivers/scsi/aacraid/commsup.c
parente6622df3bb1a8e1135f4b84928e24d4c6802f6b5 (diff)
[SCSI] aacraid: fix File System going into read-only mode
These particular problems were reported by Cisco and SAP and customers as well. Cisco reported on RHEL4 U6 and SAP reported on SLES9 SP4 and SLES10 SP2. We added these fixes on RHEL4 U6 and gave a private build to IBM and Cisco. Cisco and IBM tested it for more than 15 days and they reported that they did not see the issue so far. Before the fix, Cisco used to see the issue within 5 days. We generated a patch for SLES9 SP4 and SLES10 SP2 and submitted to Novell. Novell applied the patch and gave a test build to SAP. SAP tested and reported that the build is working properly. We also tested in our lab using the tools "dishogsync", which is IO stress tool and the tool was provided by Cisco. Issue1: File System going into read-only mode Root cause: The driver tends to not free the memory (FIB) when the management request exits prematurely. The accumulation of such un-freed memory causes the driver to fail to allocate anymore memory (FIB) and hence return 0x70000 value to the upper layer, which puts the file system into read only mode. Fix details: The fix makes sure to free the memory (FIB) even if the request exits prematurely hence ensuring the driver wouldn't run out of memory (FIBs). Issue2: False Raid Alert occurs When the Physical Drives and Logical drives are reported as deleted or added, even though there is no change done on the system Root cause: Driver IOCTLs is signaled with EINTR while waiting on response from the lower layers. Returning "EINTR" will never initiate internal retry. Fix details: The issue was fixed by replacing "EINTR" with "ERESTARTSYS" for mid-layer retries. Signed-off-by: Penchala Narasimha Reddy <ServeRAIDDriver@hcl.in> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/aacraid/commsup.c')
-rw-r--r--drivers/scsi/aacraid/commsup.c72
1 files changed, 61 insertions, 11 deletions
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 956261f25181..94d2954d79ae 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -189,7 +189,14 @@ struct fib *aac_fib_alloc(struct aac_dev *dev)
189 189
190void aac_fib_free(struct fib *fibptr) 190void aac_fib_free(struct fib *fibptr)
191{ 191{
192 unsigned long flags; 192 unsigned long flags, flagsv;
193
194 spin_lock_irqsave(&fibptr->event_lock, flagsv);
195 if (fibptr->done == 2) {
196 spin_unlock_irqrestore(&fibptr->event_lock, flagsv);
197 return;
198 }
199 spin_unlock_irqrestore(&fibptr->event_lock, flagsv);
193 200
194 spin_lock_irqsave(&fibptr->dev->fib_lock, flags); 201 spin_lock_irqsave(&fibptr->dev->fib_lock, flags);
195 if (unlikely(fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT)) 202 if (unlikely(fibptr->flags & FIB_CONTEXT_FLAG_TIMED_OUT))
@@ -390,6 +397,8 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
390 struct hw_fib * hw_fib = fibptr->hw_fib_va; 397 struct hw_fib * hw_fib = fibptr->hw_fib_va;
391 unsigned long flags = 0; 398 unsigned long flags = 0;
392 unsigned long qflags; 399 unsigned long qflags;
400 unsigned long mflags = 0;
401
393 402
394 if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned))) 403 if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
395 return -EBUSY; 404 return -EBUSY;
@@ -471,9 +480,31 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
471 if (!dev->queues) 480 if (!dev->queues)
472 return -EBUSY; 481 return -EBUSY;
473 482
474 if(wait) 483 if (wait) {
484
485 spin_lock_irqsave(&dev->manage_lock, mflags);
486 if (dev->management_fib_count >= AAC_NUM_MGT_FIB) {
487 printk(KERN_INFO "No management Fibs Available:%d\n",
488 dev->management_fib_count);
489 spin_unlock_irqrestore(&dev->manage_lock, mflags);
490 return -EBUSY;
491 }
492 dev->management_fib_count++;
493 spin_unlock_irqrestore(&dev->manage_lock, mflags);
475 spin_lock_irqsave(&fibptr->event_lock, flags); 494 spin_lock_irqsave(&fibptr->event_lock, flags);
476 aac_adapter_deliver(fibptr); 495 }
496
497 if (aac_adapter_deliver(fibptr) != 0) {
498 printk(KERN_ERR "aac_fib_send: returned -EBUSY\n");
499 if (wait) {
500 spin_unlock_irqrestore(&fibptr->event_lock, flags);
501 spin_lock_irqsave(&dev->manage_lock, mflags);
502 dev->management_fib_count--;
503 spin_unlock_irqrestore(&dev->manage_lock, mflags);
504 }
505 return -EBUSY;
506 }
507
477 508
478 /* 509 /*
479 * If the caller wanted us to wait for response wait now. 510 * If the caller wanted us to wait for response wait now.
@@ -516,14 +547,15 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
516 udelay(5); 547 udelay(5);
517 } 548 }
518 } else if (down_interruptible(&fibptr->event_wait)) { 549 } else if (down_interruptible(&fibptr->event_wait)) {
519 fibptr->done = 2; 550 /* Do nothing ... satisfy
520 up(&fibptr->event_wait); 551 * down_interruptible must_check */
521 } 552 }
553
522 spin_lock_irqsave(&fibptr->event_lock, flags); 554 spin_lock_irqsave(&fibptr->event_lock, flags);
523 if ((fibptr->done == 0) || (fibptr->done == 2)) { 555 if (fibptr->done == 0) {
524 fibptr->done = 2; /* Tell interrupt we aborted */ 556 fibptr->done = 2; /* Tell interrupt we aborted */
525 spin_unlock_irqrestore(&fibptr->event_lock, flags); 557 spin_unlock_irqrestore(&fibptr->event_lock, flags);
526 return -EINTR; 558 return -ERESTARTSYS;
527 } 559 }
528 spin_unlock_irqrestore(&fibptr->event_lock, flags); 560 spin_unlock_irqrestore(&fibptr->event_lock, flags);
529 BUG_ON(fibptr->done == 0); 561 BUG_ON(fibptr->done == 0);
@@ -689,6 +721,7 @@ int aac_fib_adapter_complete(struct fib *fibptr, unsigned short size)
689 721
690int aac_fib_complete(struct fib *fibptr) 722int aac_fib_complete(struct fib *fibptr)
691{ 723{
724 unsigned long flags;
692 struct hw_fib * hw_fib = fibptr->hw_fib_va; 725 struct hw_fib * hw_fib = fibptr->hw_fib_va;
693 726
694 /* 727 /*
@@ -709,6 +742,13 @@ int aac_fib_complete(struct fib *fibptr)
709 * command is complete that we had sent to the adapter and this 742 * command is complete that we had sent to the adapter and this
710 * cdb could be reused. 743 * cdb could be reused.
711 */ 744 */
745 spin_lock_irqsave(&fibptr->event_lock, flags);
746 if (fibptr->done == 2) {
747 spin_unlock_irqrestore(&fibptr->event_lock, flags);
748 return 0;
749 }
750 spin_unlock_irqrestore(&fibptr->event_lock, flags);
751
712 if((hw_fib->header.XferState & cpu_to_le32(SentFromHost)) && 752 if((hw_fib->header.XferState & cpu_to_le32(SentFromHost)) &&
713 (hw_fib->header.XferState & cpu_to_le32(AdapterProcessed))) 753 (hw_fib->header.XferState & cpu_to_le32(AdapterProcessed)))
714 { 754 {
@@ -1355,7 +1395,10 @@ int aac_reset_adapter(struct aac_dev * aac, int forced)
1355 1395
1356 if (status >= 0) 1396 if (status >= 0)
1357 aac_fib_complete(fibctx); 1397 aac_fib_complete(fibctx);
1358 aac_fib_free(fibctx); 1398 /* FIB should be freed only after getting
1399 * the response from the F/W */
1400 if (status != -ERESTARTSYS)
1401 aac_fib_free(fibctx);
1359 } 1402 }
1360 } 1403 }
1361 1404
@@ -1759,6 +1802,7 @@ int aac_command_thread(void *data)
1759 struct fib *fibptr; 1802 struct fib *fibptr;
1760 1803
1761 if ((fibptr = aac_fib_alloc(dev))) { 1804 if ((fibptr = aac_fib_alloc(dev))) {
1805 int status;
1762 __le32 *info; 1806 __le32 *info;
1763 1807
1764 aac_fib_init(fibptr); 1808 aac_fib_init(fibptr);
@@ -1769,15 +1813,21 @@ int aac_command_thread(void *data)
1769 1813
1770 *info = cpu_to_le32(now.tv_sec); 1814 *info = cpu_to_le32(now.tv_sec);
1771 1815
1772 (void)aac_fib_send(SendHostTime, 1816 status = aac_fib_send(SendHostTime,
1773 fibptr, 1817 fibptr,
1774 sizeof(*info), 1818 sizeof(*info),
1775 FsaNormal, 1819 FsaNormal,
1776 1, 1, 1820 1, 1,
1777 NULL, 1821 NULL,
1778 NULL); 1822 NULL);
1779 aac_fib_complete(fibptr); 1823 /* Do not set XferState to zero unless
1780 aac_fib_free(fibptr); 1824 * receives a response from F/W */
1825 if (status >= 0)
1826 aac_fib_complete(fibptr);
1827 /* FIB should be freed only after
1828 * getting the response from the F/W */
1829 if (status != -ERESTARTSYS)
1830 aac_fib_free(fibptr);
1781 } 1831 }
1782 difference = (long)(unsigned)update_interval*HZ; 1832 difference = (long)(unsigned)update_interval*HZ;
1783 } else { 1833 } else {