diff options
author | Jerome Glisse <jglisse@redhat.com> | 2010-03-09 09:45:12 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2010-04-05 21:21:11 -0400 |
commit | 90aca4d2740255bd130ea71a91530b9920c70abe (patch) | |
tree | acf9b8a4353e6727cd6cba5b71caaf9f067e465d /drivers/gpu/drm/radeon/r100.c | |
parent | a2d07b7438f015a0349bc9af3c96a8164549bbc5 (diff) |
drm/radeon/kms: simplify & improve GPU reset V2
This simplify and improve GPU reset for R1XX-R6XX hw, it's
not 100% reliable here are result:
- R1XX/R2XX works bunch of time in a row, sometimes it
seems it can work indifinitly
- R3XX/R3XX the most unreliable one, sometimes you will be
able to reset few times, sometimes not even once
- R5XX more reliable than previous hw, seems to work most
of the times but once in a while it fails for no obvious
reasons (same status than previous reset just no same
happy ending)
- R6XX/R7XX are lot more reliable with this patch, still
it seems that it can fail after a bunch (reset every
2sec for 3hour bring down the GPU & computer)
This have been tested on various hw, for some odd reasons
i wasn't able to lockup RS480/RS690 (while they use to
love locking up).
Note that on R1XX-R5XX the cursor will disapear after
lockup haven't checked why, switch to console and back
to X will restore cursor.
Next step is to record the bogus command that leaded to
the lockup.
V2 Fix r6xx resume path to avoid reinitializing blit
module, use the gpu_lockup boolean to avoid entering
inifinite waiting loop on fence while reiniting the GPU
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r100.c')
-rw-r--r-- | drivers/gpu/drm/radeon/r100.c | 180 |
1 files changed, 64 insertions, 116 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 8bb91092bffc..7a4a4fc276b3 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c | |||
@@ -662,26 +662,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) | |||
662 | if (r100_debugfs_cp_init(rdev)) { | 662 | if (r100_debugfs_cp_init(rdev)) { |
663 | DRM_ERROR("Failed to register debugfs file for CP !\n"); | 663 | DRM_ERROR("Failed to register debugfs file for CP !\n"); |
664 | } | 664 | } |
665 | /* Reset CP */ | ||
666 | tmp = RREG32(RADEON_CP_CSQ_STAT); | ||
667 | if ((tmp & (1 << 31))) { | ||
668 | DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp); | ||
669 | WREG32(RADEON_CP_CSQ_MODE, 0); | ||
670 | WREG32(RADEON_CP_CSQ_CNTL, 0); | ||
671 | WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); | ||
672 | tmp = RREG32(RADEON_RBBM_SOFT_RESET); | ||
673 | mdelay(2); | ||
674 | WREG32(RADEON_RBBM_SOFT_RESET, 0); | ||
675 | tmp = RREG32(RADEON_RBBM_SOFT_RESET); | ||
676 | mdelay(2); | ||
677 | tmp = RREG32(RADEON_CP_CSQ_STAT); | ||
678 | if ((tmp & (1 << 31))) { | ||
679 | DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp); | ||
680 | } | ||
681 | } else { | ||
682 | DRM_INFO("radeon: cp idle (0x%08X)\n", tmp); | ||
683 | } | ||
684 | |||
685 | if (!rdev->me_fw) { | 665 | if (!rdev->me_fw) { |
686 | r = r100_cp_init_microcode(rdev); | 666 | r = r100_cp_init_microcode(rdev); |
687 | if (r) { | 667 | if (r) { |
@@ -786,39 +766,6 @@ void r100_cp_disable(struct radeon_device *rdev) | |||
786 | } | 766 | } |
787 | } | 767 | } |
788 | 768 | ||
789 | int r100_cp_reset(struct radeon_device *rdev) | ||
790 | { | ||
791 | uint32_t tmp; | ||
792 | bool reinit_cp; | ||
793 | int i; | ||
794 | |||
795 | reinit_cp = rdev->cp.ready; | ||
796 | rdev->cp.ready = false; | ||
797 | WREG32(RADEON_CP_CSQ_MODE, 0); | ||
798 | WREG32(RADEON_CP_CSQ_CNTL, 0); | ||
799 | WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); | ||
800 | (void)RREG32(RADEON_RBBM_SOFT_RESET); | ||
801 | udelay(200); | ||
802 | WREG32(RADEON_RBBM_SOFT_RESET, 0); | ||
803 | /* Wait to prevent race in RBBM_STATUS */ | ||
804 | mdelay(1); | ||
805 | for (i = 0; i < rdev->usec_timeout; i++) { | ||
806 | tmp = RREG32(RADEON_RBBM_STATUS); | ||
807 | if (!(tmp & (1 << 16))) { | ||
808 | DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n", | ||
809 | tmp); | ||
810 | if (reinit_cp) { | ||
811 | return r100_cp_init(rdev, rdev->cp.ring_size); | ||
812 | } | ||
813 | return 0; | ||
814 | } | ||
815 | DRM_UDELAY(1); | ||
816 | } | ||
817 | tmp = RREG32(RADEON_RBBM_STATUS); | ||
818 | DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp); | ||
819 | return -1; | ||
820 | } | ||
821 | |||
822 | void r100_cp_commit(struct radeon_device *rdev) | 769 | void r100_cp_commit(struct radeon_device *rdev) |
823 | { | 770 | { |
824 | WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); | 771 | WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr); |
@@ -1732,51 +1679,6 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev) | |||
1732 | return -1; | 1679 | return -1; |
1733 | } | 1680 | } |
1734 | 1681 | ||
1735 | void r100_gpu_init(struct radeon_device *rdev) | ||
1736 | { | ||
1737 | /* TODO: anythings to do here ? pipes ? */ | ||
1738 | r100_hdp_reset(rdev); | ||
1739 | } | ||
1740 | |||
1741 | void r100_hdp_reset(struct radeon_device *rdev) | ||
1742 | { | ||
1743 | uint32_t tmp; | ||
1744 | |||
1745 | tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL; | ||
1746 | tmp |= (7 << 28); | ||
1747 | WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE); | ||
1748 | (void)RREG32(RADEON_HOST_PATH_CNTL); | ||
1749 | udelay(200); | ||
1750 | WREG32(RADEON_RBBM_SOFT_RESET, 0); | ||
1751 | WREG32(RADEON_HOST_PATH_CNTL, tmp); | ||
1752 | (void)RREG32(RADEON_HOST_PATH_CNTL); | ||
1753 | } | ||
1754 | |||
1755 | int r100_rb2d_reset(struct radeon_device *rdev) | ||
1756 | { | ||
1757 | uint32_t tmp; | ||
1758 | int i; | ||
1759 | |||
1760 | WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2); | ||
1761 | (void)RREG32(RADEON_RBBM_SOFT_RESET); | ||
1762 | udelay(200); | ||
1763 | WREG32(RADEON_RBBM_SOFT_RESET, 0); | ||
1764 | /* Wait to prevent race in RBBM_STATUS */ | ||
1765 | mdelay(1); | ||
1766 | for (i = 0; i < rdev->usec_timeout; i++) { | ||
1767 | tmp = RREG32(RADEON_RBBM_STATUS); | ||
1768 | if (!(tmp & (1 << 26))) { | ||
1769 | DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n", | ||
1770 | tmp); | ||
1771 | return 0; | ||
1772 | } | ||
1773 | DRM_UDELAY(1); | ||
1774 | } | ||
1775 | tmp = RREG32(RADEON_RBBM_STATUS); | ||
1776 | DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp); | ||
1777 | return -1; | ||
1778 | } | ||
1779 | |||
1780 | void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp) | 1682 | void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp) |
1781 | { | 1683 | { |
1782 | lockup->last_cp_rptr = cp->rptr; | 1684 | lockup->last_cp_rptr = cp->rptr; |
@@ -1863,31 +1765,77 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev) | |||
1863 | return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, &rdev->cp); | 1765 | return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, &rdev->cp); |
1864 | } | 1766 | } |
1865 | 1767 | ||
1768 | void r100_bm_disable(struct radeon_device *rdev) | ||
1769 | { | ||
1770 | u32 tmp; | ||
1771 | |||
1772 | /* disable bus mastering */ | ||
1773 | tmp = RREG32(R_000030_BUS_CNTL); | ||
1774 | WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044); | ||
1775 | mdelay(1); | ||
1776 | WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042); | ||
1777 | mdelay(1); | ||
1778 | WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040); | ||
1779 | tmp = RREG32(RADEON_BUS_CNTL); | ||
1780 | mdelay(1); | ||
1781 | pci_read_config_word(rdev->pdev, 0x4, (u16*)&tmp); | ||
1782 | pci_write_config_word(rdev->pdev, 0x4, tmp & 0xFFFB); | ||
1783 | mdelay(1); | ||
1784 | } | ||
1785 | |||
1866 | int r100_asic_reset(struct radeon_device *rdev) | 1786 | int r100_asic_reset(struct radeon_device *rdev) |
1867 | { | 1787 | { |
1868 | uint32_t status; | 1788 | struct r100_mc_save save; |
1789 | u32 status, tmp; | ||
1869 | 1790 | ||
1870 | /* reset order likely matter */ | 1791 | r100_mc_stop(rdev, &save); |
1871 | status = RREG32(RADEON_RBBM_STATUS); | 1792 | status = RREG32(R_000E40_RBBM_STATUS); |
1872 | /* reset HDP */ | 1793 | if (!G_000E40_GUI_ACTIVE(status)) { |
1873 | r100_hdp_reset(rdev); | 1794 | return 0; |
1874 | /* reset rb2d */ | ||
1875 | if (status & ((1 << 17) | (1 << 18) | (1 << 27))) { | ||
1876 | r100_rb2d_reset(rdev); | ||
1877 | } | 1795 | } |
1878 | /* TODO: reset 3D engine */ | 1796 | status = RREG32(R_000E40_RBBM_STATUS); |
1797 | dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); | ||
1798 | /* stop CP */ | ||
1799 | WREG32(RADEON_CP_CSQ_CNTL, 0); | ||
1800 | tmp = RREG32(RADEON_CP_RB_CNTL); | ||
1801 | WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); | ||
1802 | WREG32(RADEON_CP_RB_RPTR_WR, 0); | ||
1803 | WREG32(RADEON_CP_RB_WPTR, 0); | ||
1804 | WREG32(RADEON_CP_RB_CNTL, tmp); | ||
1805 | /* save PCI state */ | ||
1806 | pci_save_state(rdev->pdev); | ||
1807 | /* disable bus mastering */ | ||
1808 | r100_bm_disable(rdev); | ||
1809 | WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) | | ||
1810 | S_0000F0_SOFT_RESET_RE(1) | | ||
1811 | S_0000F0_SOFT_RESET_PP(1) | | ||
1812 | S_0000F0_SOFT_RESET_RB(1)); | ||
1813 | RREG32(R_0000F0_RBBM_SOFT_RESET); | ||
1814 | mdelay(500); | ||
1815 | WREG32(R_0000F0_RBBM_SOFT_RESET, 0); | ||
1816 | mdelay(1); | ||
1817 | status = RREG32(R_000E40_RBBM_STATUS); | ||
1818 | dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); | ||
1879 | /* reset CP */ | 1819 | /* reset CP */ |
1880 | status = RREG32(RADEON_RBBM_STATUS); | 1820 | WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1)); |
1881 | if (status & (1 << 16)) { | 1821 | RREG32(R_0000F0_RBBM_SOFT_RESET); |
1882 | r100_cp_reset(rdev); | 1822 | mdelay(500); |
1883 | } | 1823 | WREG32(R_0000F0_RBBM_SOFT_RESET, 0); |
1824 | mdelay(1); | ||
1825 | status = RREG32(R_000E40_RBBM_STATUS); | ||
1826 | dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status); | ||
1827 | /* restore PCI & busmastering */ | ||
1828 | pci_restore_state(rdev->pdev); | ||
1829 | r100_enable_bm(rdev); | ||
1884 | /* Check if GPU is idle */ | 1830 | /* Check if GPU is idle */ |
1885 | status = RREG32(RADEON_RBBM_STATUS); | 1831 | if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || |
1886 | if (status & RADEON_RBBM_ACTIVE) { | 1832 | G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { |
1887 | DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status); | 1833 | dev_err(rdev->dev, "failed to reset GPU\n"); |
1834 | rdev->gpu_lockup = true; | ||
1888 | return -1; | 1835 | return -1; |
1889 | } | 1836 | } |
1890 | DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status); | 1837 | r100_mc_resume(rdev, &save); |
1838 | dev_info(rdev->dev, "GPU reset succeed\n"); | ||
1891 | return 0; | 1839 | return 0; |
1892 | } | 1840 | } |
1893 | 1841 | ||
@@ -3475,7 +3423,7 @@ static int r100_startup(struct radeon_device *rdev) | |||
3475 | /* Resume clock */ | 3423 | /* Resume clock */ |
3476 | r100_clock_startup(rdev); | 3424 | r100_clock_startup(rdev); |
3477 | /* Initialize GPU configuration (# pipes, ...) */ | 3425 | /* Initialize GPU configuration (# pipes, ...) */ |
3478 | r100_gpu_init(rdev); | 3426 | // r100_gpu_init(rdev); |
3479 | /* Initialize GART (initialize after TTM so we can allocate | 3427 | /* Initialize GART (initialize after TTM so we can allocate |
3480 | * memory through TTM but finalize after TTM) */ | 3428 | * memory through TTM but finalize after TTM) */ |
3481 | r100_enable_bm(rdev); | 3429 | r100_enable_bm(rdev); |