aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@in.ibm.com>2006-04-27 05:33:13 -0400
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-05-31 11:33:49 -0400
commit9bf0a28c9a24e2cee5deecf89d118254374c75ba (patch)
tree1a422b6e38ab0ad291f00dfb061ae3c0d9e4d1e5
parent2b89dad0c7e3b03d45d9674ee9a7b49670df098e (diff)
[SCSI] kdump: mpt fusion driver initialization failure fix
MPT fusion driver initialization fails while second kernel is booting, after a system crash (if kdump kernel is configured). Oops message is pasted below. ***************************************************************************** Fusion MPT base driver 3.03.08 Copyright (c) 1999-2005 LSI Logic Corporation Fusion MPT SAS Host driver 3.03.08 ACPI: PCI Interrupt 0000:01:00.0[A] -> Link [LNKA] -> GSI 5 (level, low) -> IRQ 5 mptbase: Initiating ioc0 bringup BUG: unable to handle kernel paging request at virtual address 00002608 printing eip: c11782fd *pde = 00000000 Oops: 0000 [#1] Modules linked in: CPU: 0 EIP: 0060:[<c11782fd>] Not tainted VLI EFLAGS: 00010046 (2.6.17-rc1-16M #2) EIP is at mptscsih_io_done+0x27/0x3a3 eax: c4fed000 ebx: c4fed000 ecx: 00002600 edx: 00000298 esi: c11782d6 edi: 00002600 ebp: 00000000 esp: c1332f74 ds: 007b es: 007b ss: 0068 Process swapper (pid: 0, threadinfo=c1332000 task=c128f9c0) Stack: <0>0000006c 00000020 00000298 00002600 c4fed000 c4fed000 c11782d6 0000260 0 00000000 c1172c49 c4fed000 c1305b40 00000005 00000000 c1172d75 c48877e0 c1029687 00000000 c1307fb8 00000000 c1305a00 00000001 00000000 c1307fb8 Call Trace: <c11782d6> mptscsih_io_done+0x0/0x3a3 <c1172c49> mpt_turbo_reply+0xbb/0xd3 <c1172d75> mpt_interrupt+0x22/0x2b <c1029687> misrouted_irq+0x63/0xcb <c10297b3> note_interrupt+0x43/0x98 <c10292f9> __do_IRQ+0x68/0x8f <c1003fac> do_IRQ+0x36/0x4e ======================= <c1002aa6> common_interrupt+0x1a/0x20 <c1001150> mwait_idle+0x1a/0x2a <c10010bf> cpu_idle+0x40/0x5c <c1308610> start_kernel+0x17a/0x17c Code: 5e 5f 5d c3 55 89 cd 57 56 53 83 ec 14 89 54 24 0c 89 44 24 10 8b 90 cc 00 00 00 8b 4c 24 0c 81 c2 98 02 00 00 85 ed 89 54 24 08 <0f> b7 79 08 89 fe 74 04 0f b7 75 08 66 39 f7 75 0d 8b 44 24 0c ******************************************************************************* o Kdump capture kernel boot fails during initialization of MPT fusion driver. (LSI Logic / Symbios Logic SAS1064E PCI-Express Fusion-MPT SAS (rev 01)) o Problem is easily reproducible, if system crashed while some disk activity like cp operation was going on. o After a system crash, devices are not shutdown and capture kernel starts booting while skipping BIOS. Hence underlying device is left in operational state. In this case scsi contoller was left with interrupt line asserted reply FIFO was not empty. When driver starts initializing in the second kernel, it receives the interrupt the moment request_irq() is called. Interrupt handler, reads the message from reply FIFO and tries to access the associated message frame and panics, as in the new kernel's context that message frame is not valid at all. o In this scenario, probably we should delay the request_irq() call. First bring up the IOC, reset it if needed and then should register for irq. o I have tested the patch with SAS1064E and 53c1030 controllers. Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Acked-by: "Moore, Eric Dean" <Eric.Moore@lsil.com> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
-rw-r--r--drivers/message/fusion/mptbase.c88
1 files changed, 50 insertions, 38 deletions
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 266414ca2814..bb7ad4d0678a 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -1387,39 +1387,6 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
1387 /* Set lookup ptr. */ 1387 /* Set lookup ptr. */
1388 list_add_tail(&ioc->list, &ioc_list); 1388 list_add_tail(&ioc->list, &ioc_list);
1389 1389
1390 ioc->pci_irq = -1;
1391 if (pdev->irq) {
1392 if (mpt_msi_enable && !pci_enable_msi(pdev))
1393 printk(MYIOC_s_INFO_FMT "PCI-MSI enabled\n", ioc->name);
1394
1395 r = request_irq(pdev->irq, mpt_interrupt, SA_SHIRQ, ioc->name, ioc);
1396
1397 if (r < 0) {
1398#ifndef __sparc__
1399 printk(MYIOC_s_ERR_FMT "Unable to allocate interrupt %d!\n",
1400 ioc->name, pdev->irq);
1401#else
1402 printk(MYIOC_s_ERR_FMT "Unable to allocate interrupt %s!\n",
1403 ioc->name, __irq_itoa(pdev->irq));
1404#endif
1405 list_del(&ioc->list);
1406 iounmap(mem);
1407 kfree(ioc);
1408 return -EBUSY;
1409 }
1410
1411 ioc->pci_irq = pdev->irq;
1412
1413 pci_set_master(pdev); /* ?? */
1414 pci_set_drvdata(pdev, ioc);
1415
1416#ifndef __sparc__
1417 dprintk((KERN_INFO MYNAM ": %s installed at interrupt %d\n", ioc->name, pdev->irq));
1418#else
1419 dprintk((KERN_INFO MYNAM ": %s installed at interrupt %s\n", ioc->name, __irq_itoa(pdev->irq)));
1420#endif
1421 }
1422
1423 /* Check for "bound ports" (929, 929X, 1030, 1035) to reduce redundant resets. 1390 /* Check for "bound ports" (929, 929X, 1030, 1035) to reduce redundant resets.
1424 */ 1391 */
1425 mpt_detect_bound_ports(ioc, pdev); 1392 mpt_detect_bound_ports(ioc, pdev);
@@ -1429,11 +1396,7 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
1429 printk(KERN_WARNING MYNAM 1396 printk(KERN_WARNING MYNAM
1430 ": WARNING - %s did not initialize properly! (%d)\n", 1397 ": WARNING - %s did not initialize properly! (%d)\n",
1431 ioc->name, r); 1398 ioc->name, r);
1432
1433 list_del(&ioc->list); 1399 list_del(&ioc->list);
1434 free_irq(ioc->pci_irq, ioc);
1435 if (mpt_msi_enable)
1436 pci_disable_msi(pdev);
1437 if (ioc->alt_ioc) 1400 if (ioc->alt_ioc)
1438 ioc->alt_ioc->alt_ioc = NULL; 1401 ioc->alt_ioc->alt_ioc = NULL;
1439 iounmap(mem); 1402 iounmap(mem);
@@ -1637,6 +1600,7 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
1637 int handlers; 1600 int handlers;
1638 int ret = 0; 1601 int ret = 0;
1639 int reset_alt_ioc_active = 0; 1602 int reset_alt_ioc_active = 0;
1603 int irq_allocated = 0;
1640 1604
1641 printk(KERN_INFO MYNAM ": Initiating %s %s\n", 1605 printk(KERN_INFO MYNAM ": Initiating %s %s\n",
1642 ioc->name, reason==MPT_HOSTEVENT_IOC_BRINGUP ? "bringup" : "recovery"); 1606 ioc->name, reason==MPT_HOSTEVENT_IOC_BRINGUP ? "bringup" : "recovery");
@@ -1720,6 +1684,48 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
1720 } 1684 }
1721 } 1685 }
1722 1686
1687 /*
1688 * Device is reset now. It must have de-asserted the interrupt line
1689 * (if it was asserted) and it should be safe to register for the
1690 * interrupt now.
1691 */
1692 if ((ret == 0) && (reason == MPT_HOSTEVENT_IOC_BRINGUP)) {
1693 ioc->pci_irq = -1;
1694 if (ioc->pcidev->irq) {
1695 if (mpt_msi_enable && !pci_enable_msi(ioc->pcidev))
1696 printk(MYIOC_s_INFO_FMT "PCI-MSI enabled\n",
1697 ioc->name);
1698 rc = request_irq(ioc->pcidev->irq, mpt_interrupt,
1699 SA_SHIRQ, ioc->name, ioc);
1700 if (rc < 0) {
1701#ifndef __sparc__
1702 printk(MYIOC_s_ERR_FMT "Unable to allocate "
1703 "interrupt %d!\n", ioc->name,
1704 ioc->pcidev->irq);
1705#else
1706 printk(MYIOC_s_ERR_FMT "Unable to allocate "
1707 "interrupt %s!\n", ioc->name,
1708 __irq_itoa(ioc->pcidev->irq));
1709#endif
1710 if (mpt_msi_enable)
1711 pci_disable_msi(ioc->pcidev);
1712 return -EBUSY;
1713 }
1714 irq_allocated = 1;
1715 ioc->pci_irq = ioc->pcidev->irq;
1716 pci_set_master(ioc->pcidev); /* ?? */
1717 pci_set_drvdata(ioc->pcidev, ioc);
1718#ifndef __sparc__
1719 dprintk((KERN_INFO MYNAM ": %s installed at interrupt "
1720 "%d\n", ioc->name, ioc->pcidev->irq));
1721#else
1722 dprintk((KERN_INFO MYNAM ": %s installed at interrupt "
1723 "%s\n", ioc->name,
1724 __irq_itoa(ioc->pcidev->irq)));
1725#endif
1726 }
1727 }
1728
1723 /* Prime reply & request queues! 1729 /* Prime reply & request queues!
1724 * (mucho alloc's) Must be done prior to 1730 * (mucho alloc's) Must be done prior to
1725 * init as upper addresses are needed for init. 1731 * init as upper addresses are needed for init.
@@ -1819,7 +1825,7 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
1819 ret = mptbase_sas_persist_operation(ioc, 1825 ret = mptbase_sas_persist_operation(ioc,
1820 MPI_SAS_OP_CLEAR_NOT_PRESENT); 1826 MPI_SAS_OP_CLEAR_NOT_PRESENT);
1821 if(ret != 0) 1827 if(ret != 0)
1822 return -1; 1828 goto out;
1823 } 1829 }
1824 1830
1825 /* Find IM volumes 1831 /* Find IM volumes
@@ -1900,6 +1906,12 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
1900 /* FIXME? Examine results here? */ 1906 /* FIXME? Examine results here? */
1901 } 1907 }
1902 1908
1909out:
1910 if ((ret != 0) && irq_allocated) {
1911 free_irq(ioc->pci_irq, ioc);
1912 if (mpt_msi_enable)
1913 pci_disable_msi(ioc->pcidev);
1914 }
1903 return ret; 1915 return ret;
1904} 1916}
1905 1917