aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r100.c
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2010-03-09 09:45:10 -0500
committerDave Airlie <airlied@redhat.com>2010-04-05 20:42:45 -0400
commit225758d8ba4fdcc1e8c9cf617fd89529bd4a9596 (patch)
treea9ac2f23435d4a6db5aa33774ba94d9f0aeb5c4c /drivers/gpu/drm/radeon/r100.c
parent95beb690170e6ce918fe53c73a0fcc7cf64d704a (diff)
drm/radeon/kms: fence cleanup + more reliable GPU lockup detection V4
This patch cleanup the fence code, it drops the timeout field of fence as the time to complete each IB is unpredictable and shouldn't be bound. The fence cleanup lead to GPU lockup detection improvement, this patch introduce a callback, allowing to do asic specific test for lockup detection. In this patch the CP is use as a first indicator of GPU lockup. If CP doesn't make progress during 1second we assume we are facing a GPU lockup. To avoid overhead of testing GPU lockup frequently due to fence taking time to be signaled we query the lockup callback every 500msec. There is plenty code comment explaining the design & choise inside the code. This have been tested mostly on R3XX/R5XX hw, in normal running destkop (compiz firefox, quake3 running) the lockup callback wasn't call once (1 hour session). Also tested with forcing GPU lockup and lockup was reported after the 1s CP activity timeout. V2 switch to 500ms timeout so GPU lockup get call at least 2 times in less than 2sec. V3 store last jiffies in fence struct so on ERESTART, EBUSY we keep track of how long we already wait for a given fence V4 make sure we got up to date cp read pointer so we don't have false positive Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r100.c')
-rw-r--r--drivers/gpu/drm/radeon/r100.c86
1 files changed, 86 insertions, 0 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 3ae51ada1abf..845c8f3063fe 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1777,6 +1777,92 @@ int r100_rb2d_reset(struct radeon_device *rdev)
1777 return -1; 1777 return -1;
1778} 1778}
1779 1779
1780void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
1781{
1782 lockup->last_cp_rptr = cp->rptr;
1783 lockup->last_jiffies = jiffies;
1784}
1785
1786/**
1787 * r100_gpu_cp_is_lockup() - check if CP is lockup by recording information
1788 * @rdev: radeon device structure
1789 * @lockup: r100_gpu_lockup structure holding CP lockup tracking informations
1790 * @cp: radeon_cp structure holding CP information
1791 *
1792 * We don't need to initialize the lockup tracking information as we will either
1793 * have CP rptr to a different value of jiffies wrap around which will force
1794 * initialization of the lockup tracking informations.
1795 *
1796 * A possible false positivie is if we get call after while and last_cp_rptr ==
1797 * the current CP rptr, even if it's unlikely it might happen. To avoid this
1798 * if the elapsed time since last call is bigger than 2 second than we return
1799 * false and update the tracking information. Due to this the caller must call
1800 * r100_gpu_cp_is_lockup several time in less than 2sec for lockup to be reported
1801 * the fencing code should be cautious about that.
1802 *
1803 * Caller should write to the ring to force CP to do something so we don't get
1804 * false positive when CP is just gived nothing to do.
1805 *
1806 **/
1807bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_cp *cp)
1808{
1809 unsigned long cjiffies, elapsed;
1810
1811 cjiffies = jiffies;
1812 if (!time_after(cjiffies, lockup->last_jiffies)) {
1813 /* likely a wrap around */
1814 lockup->last_cp_rptr = cp->rptr;
1815 lockup->last_jiffies = jiffies;
1816 return false;
1817 }
1818 if (cp->rptr != lockup->last_cp_rptr) {
1819 /* CP is still working no lockup */
1820 lockup->last_cp_rptr = cp->rptr;
1821 lockup->last_jiffies = jiffies;
1822 return false;
1823 }
1824 elapsed = jiffies_to_msecs(cjiffies - lockup->last_jiffies);
1825 if (elapsed >= 3000) {
1826 /* very likely the improbable case where current
1827 * rptr is equal to last recorded, a while ago, rptr
1828 * this is more likely a false positive update tracking
1829 * information which should force us to be recall at
1830 * latter point
1831 */
1832 lockup->last_cp_rptr = cp->rptr;
1833 lockup->last_jiffies = jiffies;
1834 return false;
1835 }
1836 if (elapsed >= 1000) {
1837 dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed);
1838 return true;
1839 }
1840 /* give a chance to the GPU ... */
1841 return false;
1842}
1843
1844bool r100_gpu_is_lockup(struct radeon_device *rdev)
1845{
1846 u32 rbbm_status;
1847 int r;
1848
1849 rbbm_status = RREG32(R_000E40_RBBM_STATUS);
1850 if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
1851 r100_gpu_lockup_update(&rdev->config.r100.lockup, &rdev->cp);
1852 return false;
1853 }
1854 /* force CP activities */
1855 r = radeon_ring_lock(rdev, 2);
1856 if (!r) {
1857 /* PACKET2 NOP */
1858 radeon_ring_write(rdev, 0x80000000);
1859 radeon_ring_write(rdev, 0x80000000);
1860 radeon_ring_unlock_commit(rdev);
1861 }
1862 rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
1863 return r100_gpu_cp_is_lockup(rdev, &rdev->config.r100.lockup, &rdev->cp);
1864}
1865
1780int r100_gpu_reset(struct radeon_device *rdev) 1866int r100_gpu_reset(struct radeon_device *rdev)
1781{ 1867{
1782 uint32_t status; 1868 uint32_t status;