aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorManoj N. Kumar <manoj@linux.vnet.ibm.com>2016-03-25 15:26:34 -0400
committerMartin K. Petersen <martin.petersen@oracle.com>2016-03-28 20:43:17 -0400
commit9526f36026f778e82b5175249443854c03b2e660 (patch)
tree5f2e14c46a6f336253d66566d436379a83f7c0a3
parent5ec8a1753bc29efa7e4b1391d691c9c719b30257 (diff)
cxlflash: Fix regression issue with re-ordering patch
While running 'sg_reset -H' back to back the following exception was seen: [ 735.115695] Faulting instruction address: 0xd0000000098c0864 cpu 0x0: Vector: 300 (Data Access) at [c000000ffffafa80] pc: d0000000098c0864: cxlflash_async_err_irq+0x84/0x5c0 [cxlflash] lr: c00000000013aed0: handle_irq_event_percpu+0xa0/0x310 sp: c000000ffffafd00 msr: 9000000000009033 dar: 2010000 dsisr: 40000000 current = 0xc000000001510880 paca = 0xc00000000fb80000 softe: 0 irq_happened: 0x01 pid = 0, comm = swapper/0 Linux version 4.5.0-491-26f710d+ enter ? for help [c000000ffffafe10] c00000000013aed0 handle_irq_event_percpu+0xa0/0x310 [c000000ffffafed0] c00000000013b1a8 handle_irq_event+0x68/0xc0 [c000000ffffaff00] c0000000001404ec handle_fasteoi_irq+0xec/0x2a0 [c000000ffffaff30] c00000000013a084 generic_handle_irq+0x54/0x80 [c000000ffffaff60] c000000000011130 __do_irq+0x80/0x1d0 [c000000ffffaff90] c000000000024d40 call_do_irq+0x14/0x24 [c000000001573a20] c000000000011318 do_IRQ+0x98/0x140 [c000000001573a70] c000000000002594 hardware_interrupt_common+0x114/0x180 This exception is being hit because the async_err interrupt path performs an MMIO to read the interrupt status register. The MMIO region in this case is not available. Commit 6ded8b3cbd9a ("cxlflash: Unmap problem state area before detaching master context") re-ordered the sequence in which term_mc() and stop_afu() are called. This introduces a window for interrupts to come in with the problem space area unmapped, that did not exist previously. The fix is to separate the disabling of all AFU interrupts to a distinct function, term_intr() so that it is the first thing that is done in the tear down process. To keep the initialization process symmetric, separate the AFU interrupt setup also to a distinct function: init_intr(). Fixes: 6ded8b3cbd9a ("cxlflash: Unmap problem state area before detaching master context") Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com> Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/cxlflash/main.c130
-rw-r--r--drivers/scsi/cxlflash/main.h5
2 files changed, 93 insertions, 42 deletions
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 3879b46d79e1..1d418073d621 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -683,28 +683,23 @@ static void stop_afu(struct cxlflash_cfg *cfg)
683} 683}
684 684
685/** 685/**
686 * term_mc() - terminates the master context 686 * term_intr() - disables all AFU interrupts
687 * @cfg: Internal structure associated with the host. 687 * @cfg: Internal structure associated with the host.
688 * @level: Depth of allocation, where to begin waterfall tear down. 688 * @level: Depth of allocation, where to begin waterfall tear down.
689 * 689 *
690 * Safe to call with AFU/MC in partially allocated/initialized state. 690 * Safe to call with AFU/MC in partially allocated/initialized state.
691 */ 691 */
692static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level) 692static void term_intr(struct cxlflash_cfg *cfg, enum undo_level level)
693{ 693{
694 int rc = 0;
695 struct afu *afu = cfg->afu; 694 struct afu *afu = cfg->afu;
696 struct device *dev = &cfg->dev->dev; 695 struct device *dev = &cfg->dev->dev;
697 696
698 if (!afu || !cfg->mcctx) { 697 if (!afu || !cfg->mcctx) {
699 dev_err(dev, "%s: returning from term_mc with NULL afu or MC\n", 698 dev_err(dev, "%s: returning with NULL afu or MC\n", __func__);
700 __func__);
701 return; 699 return;
702 } 700 }
703 701
704 switch (level) { 702 switch (level) {
705 case UNDO_START:
706 rc = cxl_stop_context(cfg->mcctx);
707 BUG_ON(rc);
708 case UNMAP_THREE: 703 case UNMAP_THREE:
709 cxl_unmap_afu_irq(cfg->mcctx, 3, afu); 704 cxl_unmap_afu_irq(cfg->mcctx, 3, afu);
710 case UNMAP_TWO: 705 case UNMAP_TWO:
@@ -713,9 +708,34 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level)
713 cxl_unmap_afu_irq(cfg->mcctx, 1, afu); 708 cxl_unmap_afu_irq(cfg->mcctx, 1, afu);
714 case FREE_IRQ: 709 case FREE_IRQ:
715 cxl_free_afu_irqs(cfg->mcctx); 710 cxl_free_afu_irqs(cfg->mcctx);
716 case RELEASE_CONTEXT: 711 /* fall through */
717 cfg->mcctx = NULL; 712 case UNDO_NOOP:
713 /* No action required */
714 break;
715 }
716}
717
718/**
719 * term_mc() - terminates the master context
720 * @cfg: Internal structure associated with the host.
721 * @level: Depth of allocation, where to begin waterfall tear down.
722 *
723 * Safe to call with AFU/MC in partially allocated/initialized state.
724 */
725static void term_mc(struct cxlflash_cfg *cfg)
726{
727 int rc = 0;
728 struct afu *afu = cfg->afu;
729 struct device *dev = &cfg->dev->dev;
730
731 if (!afu || !cfg->mcctx) {
732 dev_err(dev, "%s: returning with NULL afu or MC\n", __func__);
733 return;
718 } 734 }
735
736 rc = cxl_stop_context(cfg->mcctx);
737 WARN_ON(rc);
738 cfg->mcctx = NULL;
719} 739}
720 740
721/** 741/**
@@ -726,10 +746,20 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level)
726 */ 746 */
727static void term_afu(struct cxlflash_cfg *cfg) 747static void term_afu(struct cxlflash_cfg *cfg)
728{ 748{
749 /*
750 * Tear down is carefully orchestrated to ensure
751 * no interrupts can come in when the problem state
752 * area is unmapped.
753 *
754 * 1) Disable all AFU interrupts
755 * 2) Unmap the problem state area
756 * 3) Stop the master context
757 */
758 term_intr(cfg, UNMAP_THREE);
729 if (cfg->afu) 759 if (cfg->afu)
730 stop_afu(cfg); 760 stop_afu(cfg);
731 761
732 term_mc(cfg, UNDO_START); 762 term_mc(cfg);
733 763
734 pr_debug("%s: returning\n", __func__); 764 pr_debug("%s: returning\n", __func__);
735} 765}
@@ -1597,41 +1627,24 @@ static int start_afu(struct cxlflash_cfg *cfg)
1597} 1627}
1598 1628
1599/** 1629/**
1600 * init_mc() - create and register as the master context 1630 * init_intr() - setup interrupt handlers for the master context
1601 * @cfg: Internal structure associated with the host. 1631 * @cfg: Internal structure associated with the host.
1602 * 1632 *
1603 * Return: 0 on success, -errno on failure 1633 * Return: 0 on success, -errno on failure
1604 */ 1634 */
1605static int init_mc(struct cxlflash_cfg *cfg) 1635static enum undo_level init_intr(struct cxlflash_cfg *cfg,
1636 struct cxl_context *ctx)
1606{ 1637{
1607 struct cxl_context *ctx;
1608 struct device *dev = &cfg->dev->dev;
1609 struct afu *afu = cfg->afu; 1638 struct afu *afu = cfg->afu;
1639 struct device *dev = &cfg->dev->dev;
1610 int rc = 0; 1640 int rc = 0;
1611 enum undo_level level; 1641 enum undo_level level = UNDO_NOOP;
1612
1613 ctx = cxl_get_context(cfg->dev);
1614 if (unlikely(!ctx))
1615 return -ENOMEM;
1616 cfg->mcctx = ctx;
1617
1618 /* Set it up as a master with the CXL */
1619 cxl_set_master(ctx);
1620
1621 /* During initialization reset the AFU to start from a clean slate */
1622 rc = cxl_afu_reset(cfg->mcctx);
1623 if (unlikely(rc)) {
1624 dev_err(dev, "%s: initial AFU reset failed rc=%d\n",
1625 __func__, rc);
1626 level = RELEASE_CONTEXT;
1627 goto out;
1628 }
1629 1642
1630 rc = cxl_allocate_afu_irqs(ctx, 3); 1643 rc = cxl_allocate_afu_irqs(ctx, 3);
1631 if (unlikely(rc)) { 1644 if (unlikely(rc)) {
1632 dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n", 1645 dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n",
1633 __func__, rc); 1646 __func__, rc);
1634 level = RELEASE_CONTEXT; 1647 level = UNDO_NOOP;
1635 goto out; 1648 goto out;
1636 } 1649 }
1637 1650
@@ -1661,8 +1674,47 @@ static int init_mc(struct cxlflash_cfg *cfg)
1661 level = UNMAP_TWO; 1674 level = UNMAP_TWO;
1662 goto out; 1675 goto out;
1663 } 1676 }
1677out:
1678 return level;
1679}
1664 1680
1665 rc = 0; 1681/**
1682 * init_mc() - create and register as the master context
1683 * @cfg: Internal structure associated with the host.
1684 *
1685 * Return: 0 on success, -errno on failure
1686 */
1687static int init_mc(struct cxlflash_cfg *cfg)
1688{
1689 struct cxl_context *ctx;
1690 struct device *dev = &cfg->dev->dev;
1691 int rc = 0;
1692 enum undo_level level;
1693
1694 ctx = cxl_get_context(cfg->dev);
1695 if (unlikely(!ctx)) {
1696 rc = -ENOMEM;
1697 goto ret;
1698 }
1699 cfg->mcctx = ctx;
1700
1701 /* Set it up as a master with the CXL */
1702 cxl_set_master(ctx);
1703
1704 /* During initialization reset the AFU to start from a clean slate */
1705 rc = cxl_afu_reset(cfg->mcctx);
1706 if (unlikely(rc)) {
1707 dev_err(dev, "%s: initial AFU reset failed rc=%d\n",
1708 __func__, rc);
1709 goto ret;
1710 }
1711
1712 level = init_intr(cfg, ctx);
1713 if (unlikely(level)) {
1714 dev_err(dev, "%s: setting up interrupts failed rc=%d\n",
1715 __func__, rc);
1716 goto out;
1717 }
1666 1718
1667 /* This performs the equivalent of the CXL_IOCTL_START_WORK. 1719 /* This performs the equivalent of the CXL_IOCTL_START_WORK.
1668 * The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process 1720 * The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process
@@ -1678,7 +1730,7 @@ ret:
1678 pr_debug("%s: returning rc=%d\n", __func__, rc); 1730 pr_debug("%s: returning rc=%d\n", __func__, rc);
1679 return rc; 1731 return rc;
1680out: 1732out:
1681 term_mc(cfg, level); 1733 term_intr(cfg, level);
1682 goto ret; 1734 goto ret;
1683} 1735}
1684 1736
@@ -1751,7 +1803,8 @@ out:
1751err2: 1803err2:
1752 kref_put(&afu->mapcount, afu_unmap); 1804 kref_put(&afu->mapcount, afu_unmap);
1753err1: 1805err1:
1754 term_mc(cfg, UNDO_START); 1806 term_intr(cfg, UNMAP_THREE);
1807 term_mc(cfg);
1755 goto out; 1808 goto out;
1756} 1809}
1757 1810
@@ -2502,8 +2555,7 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
2502 if (unlikely(rc)) 2555 if (unlikely(rc))
2503 dev_err(dev, "%s: Failed to mark user contexts!(%d)\n", 2556 dev_err(dev, "%s: Failed to mark user contexts!(%d)\n",
2504 __func__, rc); 2557 __func__, rc);
2505 stop_afu(cfg); 2558 term_afu(cfg);
2506 term_mc(cfg, UNDO_START);
2507 return PCI_ERS_RESULT_NEED_RESET; 2559 return PCI_ERS_RESULT_NEED_RESET;
2508 case pci_channel_io_perm_failure: 2560 case pci_channel_io_perm_failure:
2509 cfg->state = STATE_FAILTERM; 2561 cfg->state = STATE_FAILTERM;
diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h
index 0faed422c7f4..eb9d8f730b38 100644
--- a/drivers/scsi/cxlflash/main.h
+++ b/drivers/scsi/cxlflash/main.h
@@ -79,12 +79,11 @@
79#define WWPN_BUF_LEN (WWPN_LEN + 1) 79#define WWPN_BUF_LEN (WWPN_LEN + 1)
80 80
81enum undo_level { 81enum undo_level {
82 RELEASE_CONTEXT = 0, 82 UNDO_NOOP = 0,
83 FREE_IRQ, 83 FREE_IRQ,
84 UNMAP_ONE, 84 UNMAP_ONE,
85 UNMAP_TWO, 85 UNMAP_TWO,
86 UNMAP_THREE, 86 UNMAP_THREE
87 UNDO_START
88}; 87};
89 88
90struct dev_dependent_vals { 89struct dev_dependent_vals {