diff options
author | Manoj N. Kumar <manoj@linux.vnet.ibm.com> | 2016-03-25 15:26:34 -0400 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2016-03-28 20:43:17 -0400 |
commit | 9526f36026f778e82b5175249443854c03b2e660 (patch) | |
tree | 5f2e14c46a6f336253d66566d436379a83f7c0a3 | |
parent | 5ec8a1753bc29efa7e4b1391d691c9c719b30257 (diff) |
cxlflash: Fix regression issue with re-ordering patch
While running 'sg_reset -H' back to back the following exception was seen:
[ 735.115695] Faulting instruction address: 0xd0000000098c0864
cpu 0x0: Vector: 300 (Data Access) at [c000000ffffafa80]
pc: d0000000098c0864: cxlflash_async_err_irq+0x84/0x5c0 [cxlflash]
lr: c00000000013aed0: handle_irq_event_percpu+0xa0/0x310
sp: c000000ffffafd00
msr: 9000000000009033
dar: 2010000
dsisr: 40000000
current = 0xc000000001510880
paca = 0xc00000000fb80000 softe: 0 irq_happened: 0x01
pid = 0, comm = swapper/0
Linux version 4.5.0-491-26f710d+
enter ? for help
[c000000ffffafe10] c00000000013aed0 handle_irq_event_percpu+0xa0/0x310
[c000000ffffafed0] c00000000013b1a8 handle_irq_event+0x68/0xc0
[c000000ffffaff00] c0000000001404ec handle_fasteoi_irq+0xec/0x2a0
[c000000ffffaff30] c00000000013a084 generic_handle_irq+0x54/0x80
[c000000ffffaff60] c000000000011130 __do_irq+0x80/0x1d0
[c000000ffffaff90] c000000000024d40 call_do_irq+0x14/0x24
[c000000001573a20] c000000000011318 do_IRQ+0x98/0x140
[c000000001573a70] c000000000002594 hardware_interrupt_common+0x114/0x180
This exception is being hit because the async_err interrupt path performs
an MMIO to read the interrupt status register. The MMIO region in this
case is not available.
Commit 6ded8b3cbd9a ("cxlflash: Unmap problem state area before detaching
master context") re-ordered the sequence in which term_mc() and stop_afu()
are called. This introduces a window for interrupts to come in with the
problem space area unmapped, that did not exist previously.
The fix is to separate the disabling of all AFU interrupts to a distinct
function, term_intr() so that it is the first thing that is done in the
tear down process.
To keep the initialization process symmetric, separate the AFU interrupt
setup also to a distinct function: init_intr().
Fixes: 6ded8b3cbd9a ("cxlflash: Unmap problem state area before detaching master context")
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r-- | drivers/scsi/cxlflash/main.c | 130 | ||||
-rw-r--r-- | drivers/scsi/cxlflash/main.h | 5 |
2 files changed, 93 insertions, 42 deletions
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 3879b46d79e1..1d418073d621 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c | |||
@@ -683,28 +683,23 @@ static void stop_afu(struct cxlflash_cfg *cfg) | |||
683 | } | 683 | } |
684 | 684 | ||
685 | /** | 685 | /** |
686 | * term_mc() - terminates the master context | 686 | * term_intr() - disables all AFU interrupts |
687 | * @cfg: Internal structure associated with the host. | 687 | * @cfg: Internal structure associated with the host. |
688 | * @level: Depth of allocation, where to begin waterfall tear down. | 688 | * @level: Depth of allocation, where to begin waterfall tear down. |
689 | * | 689 | * |
690 | * Safe to call with AFU/MC in partially allocated/initialized state. | 690 | * Safe to call with AFU/MC in partially allocated/initialized state. |
691 | */ | 691 | */ |
692 | static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level) | 692 | static void term_intr(struct cxlflash_cfg *cfg, enum undo_level level) |
693 | { | 693 | { |
694 | int rc = 0; | ||
695 | struct afu *afu = cfg->afu; | 694 | struct afu *afu = cfg->afu; |
696 | struct device *dev = &cfg->dev->dev; | 695 | struct device *dev = &cfg->dev->dev; |
697 | 696 | ||
698 | if (!afu || !cfg->mcctx) { | 697 | if (!afu || !cfg->mcctx) { |
699 | dev_err(dev, "%s: returning from term_mc with NULL afu or MC\n", | 698 | dev_err(dev, "%s: returning with NULL afu or MC\n", __func__); |
700 | __func__); | ||
701 | return; | 699 | return; |
702 | } | 700 | } |
703 | 701 | ||
704 | switch (level) { | 702 | switch (level) { |
705 | case UNDO_START: | ||
706 | rc = cxl_stop_context(cfg->mcctx); | ||
707 | BUG_ON(rc); | ||
708 | case UNMAP_THREE: | 703 | case UNMAP_THREE: |
709 | cxl_unmap_afu_irq(cfg->mcctx, 3, afu); | 704 | cxl_unmap_afu_irq(cfg->mcctx, 3, afu); |
710 | case UNMAP_TWO: | 705 | case UNMAP_TWO: |
@@ -713,9 +708,34 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level) | |||
713 | cxl_unmap_afu_irq(cfg->mcctx, 1, afu); | 708 | cxl_unmap_afu_irq(cfg->mcctx, 1, afu); |
714 | case FREE_IRQ: | 709 | case FREE_IRQ: |
715 | cxl_free_afu_irqs(cfg->mcctx); | 710 | cxl_free_afu_irqs(cfg->mcctx); |
716 | case RELEASE_CONTEXT: | 711 | /* fall through */ |
717 | cfg->mcctx = NULL; | 712 | case UNDO_NOOP: |
713 | /* No action required */ | ||
714 | break; | ||
715 | } | ||
716 | } | ||
717 | |||
718 | /** | ||
719 | * term_mc() - terminates the master context | ||
720 | * @cfg: Internal structure associated with the host. | ||
721 | * @level: Depth of allocation, where to begin waterfall tear down. | ||
722 | * | ||
723 | * Safe to call with AFU/MC in partially allocated/initialized state. | ||
724 | */ | ||
725 | static void term_mc(struct cxlflash_cfg *cfg) | ||
726 | { | ||
727 | int rc = 0; | ||
728 | struct afu *afu = cfg->afu; | ||
729 | struct device *dev = &cfg->dev->dev; | ||
730 | |||
731 | if (!afu || !cfg->mcctx) { | ||
732 | dev_err(dev, "%s: returning with NULL afu or MC\n", __func__); | ||
733 | return; | ||
718 | } | 734 | } |
735 | |||
736 | rc = cxl_stop_context(cfg->mcctx); | ||
737 | WARN_ON(rc); | ||
738 | cfg->mcctx = NULL; | ||
719 | } | 739 | } |
720 | 740 | ||
721 | /** | 741 | /** |
@@ -726,10 +746,20 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level) | |||
726 | */ | 746 | */ |
727 | static void term_afu(struct cxlflash_cfg *cfg) | 747 | static void term_afu(struct cxlflash_cfg *cfg) |
728 | { | 748 | { |
749 | /* | ||
750 | * Tear down is carefully orchestrated to ensure | ||
751 | * no interrupts can come in when the problem state | ||
752 | * area is unmapped. | ||
753 | * | ||
754 | * 1) Disable all AFU interrupts | ||
755 | * 2) Unmap the problem state area | ||
756 | * 3) Stop the master context | ||
757 | */ | ||
758 | term_intr(cfg, UNMAP_THREE); | ||
729 | if (cfg->afu) | 759 | if (cfg->afu) |
730 | stop_afu(cfg); | 760 | stop_afu(cfg); |
731 | 761 | ||
732 | term_mc(cfg, UNDO_START); | 762 | term_mc(cfg); |
733 | 763 | ||
734 | pr_debug("%s: returning\n", __func__); | 764 | pr_debug("%s: returning\n", __func__); |
735 | } | 765 | } |
@@ -1597,41 +1627,24 @@ static int start_afu(struct cxlflash_cfg *cfg) | |||
1597 | } | 1627 | } |
1598 | 1628 | ||
1599 | /** | 1629 | /** |
1600 | * init_mc() - create and register as the master context | 1630 | * init_intr() - setup interrupt handlers for the master context |
1601 | * @cfg: Internal structure associated with the host. | 1631 | * @cfg: Internal structure associated with the host. |
1602 | * | 1632 | * |
1603 | * Return: 0 on success, -errno on failure | 1633 | * Return: 0 on success, -errno on failure |
1604 | */ | 1634 | */ |
1605 | static int init_mc(struct cxlflash_cfg *cfg) | 1635 | static enum undo_level init_intr(struct cxlflash_cfg *cfg, |
1636 | struct cxl_context *ctx) | ||
1606 | { | 1637 | { |
1607 | struct cxl_context *ctx; | ||
1608 | struct device *dev = &cfg->dev->dev; | ||
1609 | struct afu *afu = cfg->afu; | 1638 | struct afu *afu = cfg->afu; |
1639 | struct device *dev = &cfg->dev->dev; | ||
1610 | int rc = 0; | 1640 | int rc = 0; |
1611 | enum undo_level level; | 1641 | enum undo_level level = UNDO_NOOP; |
1612 | |||
1613 | ctx = cxl_get_context(cfg->dev); | ||
1614 | if (unlikely(!ctx)) | ||
1615 | return -ENOMEM; | ||
1616 | cfg->mcctx = ctx; | ||
1617 | |||
1618 | /* Set it up as a master with the CXL */ | ||
1619 | cxl_set_master(ctx); | ||
1620 | |||
1621 | /* During initialization reset the AFU to start from a clean slate */ | ||
1622 | rc = cxl_afu_reset(cfg->mcctx); | ||
1623 | if (unlikely(rc)) { | ||
1624 | dev_err(dev, "%s: initial AFU reset failed rc=%d\n", | ||
1625 | __func__, rc); | ||
1626 | level = RELEASE_CONTEXT; | ||
1627 | goto out; | ||
1628 | } | ||
1629 | 1642 | ||
1630 | rc = cxl_allocate_afu_irqs(ctx, 3); | 1643 | rc = cxl_allocate_afu_irqs(ctx, 3); |
1631 | if (unlikely(rc)) { | 1644 | if (unlikely(rc)) { |
1632 | dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n", | 1645 | dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n", |
1633 | __func__, rc); | 1646 | __func__, rc); |
1634 | level = RELEASE_CONTEXT; | 1647 | level = UNDO_NOOP; |
1635 | goto out; | 1648 | goto out; |
1636 | } | 1649 | } |
1637 | 1650 | ||
@@ -1661,8 +1674,47 @@ static int init_mc(struct cxlflash_cfg *cfg) | |||
1661 | level = UNMAP_TWO; | 1674 | level = UNMAP_TWO; |
1662 | goto out; | 1675 | goto out; |
1663 | } | 1676 | } |
1677 | out: | ||
1678 | return level; | ||
1679 | } | ||
1664 | 1680 | ||
1665 | rc = 0; | 1681 | /** |
1682 | * init_mc() - create and register as the master context | ||
1683 | * @cfg: Internal structure associated with the host. | ||
1684 | * | ||
1685 | * Return: 0 on success, -errno on failure | ||
1686 | */ | ||
1687 | static int init_mc(struct cxlflash_cfg *cfg) | ||
1688 | { | ||
1689 | struct cxl_context *ctx; | ||
1690 | struct device *dev = &cfg->dev->dev; | ||
1691 | int rc = 0; | ||
1692 | enum undo_level level; | ||
1693 | |||
1694 | ctx = cxl_get_context(cfg->dev); | ||
1695 | if (unlikely(!ctx)) { | ||
1696 | rc = -ENOMEM; | ||
1697 | goto ret; | ||
1698 | } | ||
1699 | cfg->mcctx = ctx; | ||
1700 | |||
1701 | /* Set it up as a master with the CXL */ | ||
1702 | cxl_set_master(ctx); | ||
1703 | |||
1704 | /* During initialization reset the AFU to start from a clean slate */ | ||
1705 | rc = cxl_afu_reset(cfg->mcctx); | ||
1706 | if (unlikely(rc)) { | ||
1707 | dev_err(dev, "%s: initial AFU reset failed rc=%d\n", | ||
1708 | __func__, rc); | ||
1709 | goto ret; | ||
1710 | } | ||
1711 | |||
1712 | level = init_intr(cfg, ctx); | ||
1713 | if (unlikely(level)) { | ||
1714 | dev_err(dev, "%s: setting up interrupts failed rc=%d\n", | ||
1715 | __func__, rc); | ||
1716 | goto out; | ||
1717 | } | ||
1666 | 1718 | ||
1667 | /* This performs the equivalent of the CXL_IOCTL_START_WORK. | 1719 | /* This performs the equivalent of the CXL_IOCTL_START_WORK. |
1668 | * The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process | 1720 | * The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process |
@@ -1678,7 +1730,7 @@ ret: | |||
1678 | pr_debug("%s: returning rc=%d\n", __func__, rc); | 1730 | pr_debug("%s: returning rc=%d\n", __func__, rc); |
1679 | return rc; | 1731 | return rc; |
1680 | out: | 1732 | out: |
1681 | term_mc(cfg, level); | 1733 | term_intr(cfg, level); |
1682 | goto ret; | 1734 | goto ret; |
1683 | } | 1735 | } |
1684 | 1736 | ||
@@ -1751,7 +1803,8 @@ out: | |||
1751 | err2: | 1803 | err2: |
1752 | kref_put(&afu->mapcount, afu_unmap); | 1804 | kref_put(&afu->mapcount, afu_unmap); |
1753 | err1: | 1805 | err1: |
1754 | term_mc(cfg, UNDO_START); | 1806 | term_intr(cfg, UNMAP_THREE); |
1807 | term_mc(cfg); | ||
1755 | goto out; | 1808 | goto out; |
1756 | } | 1809 | } |
1757 | 1810 | ||
@@ -2502,8 +2555,7 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, | |||
2502 | if (unlikely(rc)) | 2555 | if (unlikely(rc)) |
2503 | dev_err(dev, "%s: Failed to mark user contexts!(%d)\n", | 2556 | dev_err(dev, "%s: Failed to mark user contexts!(%d)\n", |
2504 | __func__, rc); | 2557 | __func__, rc); |
2505 | stop_afu(cfg); | 2558 | term_afu(cfg); |
2506 | term_mc(cfg, UNDO_START); | ||
2507 | return PCI_ERS_RESULT_NEED_RESET; | 2559 | return PCI_ERS_RESULT_NEED_RESET; |
2508 | case pci_channel_io_perm_failure: | 2560 | case pci_channel_io_perm_failure: |
2509 | cfg->state = STATE_FAILTERM; | 2561 | cfg->state = STATE_FAILTERM; |
diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h index 0faed422c7f4..eb9d8f730b38 100644 --- a/drivers/scsi/cxlflash/main.h +++ b/drivers/scsi/cxlflash/main.h | |||
@@ -79,12 +79,11 @@ | |||
79 | #define WWPN_BUF_LEN (WWPN_LEN + 1) | 79 | #define WWPN_BUF_LEN (WWPN_LEN + 1) |
80 | 80 | ||
81 | enum undo_level { | 81 | enum undo_level { |
82 | RELEASE_CONTEXT = 0, | 82 | UNDO_NOOP = 0, |
83 | FREE_IRQ, | 83 | FREE_IRQ, |
84 | UNMAP_ONE, | 84 | UNMAP_ONE, |
85 | UNMAP_TWO, | 85 | UNMAP_TWO, |
86 | UNMAP_THREE, | 86 | UNMAP_THREE |
87 | UNDO_START | ||
88 | }; | 87 | }; |
89 | 88 | ||
90 | struct dev_dependent_vals { | 89 | struct dev_dependent_vals { |