diff options
author | Jerome Glisse <jglisse@redhat.com> | 2010-03-09 09:45:10 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2010-04-05 20:42:45 -0400 |
commit | 225758d8ba4fdcc1e8c9cf617fd89529bd4a9596 (patch) | |
tree | a9ac2f23435d4a6db5aa33774ba94d9f0aeb5c4c /drivers/gpu/drm/radeon/radeon.h | |
parent | 95beb690170e6ce918fe53c73a0fcc7cf64d704a (diff) |
drm/radeon/kms: fence cleanup + more reliable GPU lockup detection V4
This patch cleanup the fence code, it drops the timeout field of
fence as the time to complete each IB is unpredictable and shouldn't
be bound.
The fence cleanup lead to GPU lockup detection improvement, this
patch introduce a callback, allowing to do asic specific test for
lockup detection. In this patch the CP is use as a first indicator
of GPU lockup. If CP doesn't make progress during 1second we assume
we are facing a GPU lockup.
To avoid overhead of testing GPU lockup frequently due to fence
taking time to be signaled we query the lockup callback every
500msec. There is plenty code comment explaining the design & choise
inside the code.
This have been tested mostly on R3XX/R5XX hw, in normal running
destkop (compiz firefox, quake3 running) the lockup callback wasn't
call once (1 hour session). Also tested with forcing GPU lockup and
lockup was reported after the 1s CP activity timeout.
V2 switch to 500ms timeout so GPU lockup get call at least 2 times
in less than 2sec.
V3 store last jiffies in fence struct so on ERESTART, EBUSY we keep
track of how long we already wait for a given fence
V4 make sure we got up to date cp read pointer so we don't have
false positive
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon.h')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 104 |
1 files changed, 59 insertions, 45 deletions
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 034218c3dbbb..a3d13c367176 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -99,6 +99,7 @@ extern int radeon_hw_i2c; | |||
99 | * symbol; | 99 | * symbol; |
100 | */ | 100 | */ |
101 | #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ | 101 | #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ |
102 | #define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) | ||
102 | /* RADEON_IB_POOL_SIZE must be a power of 2 */ | 103 | /* RADEON_IB_POOL_SIZE must be a power of 2 */ |
103 | #define RADEON_IB_POOL_SIZE 16 | 104 | #define RADEON_IB_POOL_SIZE 16 |
104 | #define RADEON_DEBUGFS_MAX_NUM_FILES 32 | 105 | #define RADEON_DEBUGFS_MAX_NUM_FILES 32 |
@@ -182,7 +183,8 @@ struct radeon_fence_driver { | |||
182 | uint32_t scratch_reg; | 183 | uint32_t scratch_reg; |
183 | atomic_t seq; | 184 | atomic_t seq; |
184 | uint32_t last_seq; | 185 | uint32_t last_seq; |
185 | unsigned long count_timeout; | 186 | unsigned long last_jiffies; |
187 | unsigned long last_timeout; | ||
186 | wait_queue_head_t queue; | 188 | wait_queue_head_t queue; |
187 | rwlock_t lock; | 189 | rwlock_t lock; |
188 | struct list_head created; | 190 | struct list_head created; |
@@ -197,7 +199,6 @@ struct radeon_fence { | |||
197 | struct list_head list; | 199 | struct list_head list; |
198 | /* protected by radeon_fence.lock */ | 200 | /* protected by radeon_fence.lock */ |
199 | uint32_t seq; | 201 | uint32_t seq; |
200 | unsigned long timeout; | ||
201 | bool emited; | 202 | bool emited; |
202 | bool signaled; | 203 | bool signaled; |
203 | }; | 204 | }; |
@@ -746,6 +747,7 @@ struct radeon_asic { | |||
746 | int (*resume)(struct radeon_device *rdev); | 747 | int (*resume)(struct radeon_device *rdev); |
747 | int (*suspend)(struct radeon_device *rdev); | 748 | int (*suspend)(struct radeon_device *rdev); |
748 | void (*vga_set_state)(struct radeon_device *rdev, bool state); | 749 | void (*vga_set_state)(struct radeon_device *rdev, bool state); |
750 | bool (*gpu_is_lockup)(struct radeon_device *rdev); | ||
749 | int (*gpu_reset)(struct radeon_device *rdev); | 751 | int (*gpu_reset)(struct radeon_device *rdev); |
750 | void (*gart_tlb_flush)(struct radeon_device *rdev); | 752 | void (*gart_tlb_flush)(struct radeon_device *rdev); |
751 | int (*gart_set_page)(struct radeon_device *rdev, int i, uint64_t addr); | 753 | int (*gart_set_page)(struct radeon_device *rdev, int i, uint64_t addr); |
@@ -804,59 +806,68 @@ struct radeon_asic { | |||
804 | /* | 806 | /* |
805 | * Asic structures | 807 | * Asic structures |
806 | */ | 808 | */ |
809 | struct r100_gpu_lockup { | ||
810 | unsigned long last_jiffies; | ||
811 | u32 last_cp_rptr; | ||
812 | }; | ||
813 | |||
807 | struct r100_asic { | 814 | struct r100_asic { |
808 | const unsigned *reg_safe_bm; | 815 | const unsigned *reg_safe_bm; |
809 | unsigned reg_safe_bm_size; | 816 | unsigned reg_safe_bm_size; |
810 | u32 hdp_cntl; | 817 | u32 hdp_cntl; |
818 | struct r100_gpu_lockup lockup; | ||
811 | }; | 819 | }; |
812 | 820 | ||
813 | struct r300_asic { | 821 | struct r300_asic { |
814 | const unsigned *reg_safe_bm; | 822 | const unsigned *reg_safe_bm; |
815 | unsigned reg_safe_bm_size; | 823 | unsigned reg_safe_bm_size; |
816 | u32 resync_scratch; | 824 | u32 resync_scratch; |
817 | u32 hdp_cntl; | 825 | u32 hdp_cntl; |
826 | struct r100_gpu_lockup lockup; | ||
818 | }; | 827 | }; |
819 | 828 | ||
820 | struct r600_asic { | 829 | struct r600_asic { |
821 | unsigned max_pipes; | 830 | unsigned max_pipes; |
822 | unsigned max_tile_pipes; | 831 | unsigned max_tile_pipes; |
823 | unsigned max_simds; | 832 | unsigned max_simds; |
824 | unsigned max_backends; | 833 | unsigned max_backends; |
825 | unsigned max_gprs; | 834 | unsigned max_gprs; |
826 | unsigned max_threads; | 835 | unsigned max_threads; |
827 | unsigned max_stack_entries; | 836 | unsigned max_stack_entries; |
828 | unsigned max_hw_contexts; | 837 | unsigned max_hw_contexts; |
829 | unsigned max_gs_threads; | 838 | unsigned max_gs_threads; |
830 | unsigned sx_max_export_size; | 839 | unsigned sx_max_export_size; |
831 | unsigned sx_max_export_pos_size; | 840 | unsigned sx_max_export_pos_size; |
832 | unsigned sx_max_export_smx_size; | 841 | unsigned sx_max_export_smx_size; |
833 | unsigned sq_num_cf_insts; | 842 | unsigned sq_num_cf_insts; |
834 | unsigned tiling_nbanks; | 843 | unsigned tiling_nbanks; |
835 | unsigned tiling_npipes; | 844 | unsigned tiling_npipes; |
836 | unsigned tiling_group_size; | 845 | unsigned tiling_group_size; |
846 | struct r100_gpu_lockup lockup; | ||
837 | }; | 847 | }; |
838 | 848 | ||
839 | struct rv770_asic { | 849 | struct rv770_asic { |
840 | unsigned max_pipes; | 850 | unsigned max_pipes; |
841 | unsigned max_tile_pipes; | 851 | unsigned max_tile_pipes; |
842 | unsigned max_simds; | 852 | unsigned max_simds; |
843 | unsigned max_backends; | 853 | unsigned max_backends; |
844 | unsigned max_gprs; | 854 | unsigned max_gprs; |
845 | unsigned max_threads; | 855 | unsigned max_threads; |
846 | unsigned max_stack_entries; | 856 | unsigned max_stack_entries; |
847 | unsigned max_hw_contexts; | 857 | unsigned max_hw_contexts; |
848 | unsigned max_gs_threads; | 858 | unsigned max_gs_threads; |
849 | unsigned sx_max_export_size; | 859 | unsigned sx_max_export_size; |
850 | unsigned sx_max_export_pos_size; | 860 | unsigned sx_max_export_pos_size; |
851 | unsigned sx_max_export_smx_size; | 861 | unsigned sx_max_export_smx_size; |
852 | unsigned sq_num_cf_insts; | 862 | unsigned sq_num_cf_insts; |
853 | unsigned sx_num_of_sets; | 863 | unsigned sx_num_of_sets; |
854 | unsigned sc_prim_fifo_size; | 864 | unsigned sc_prim_fifo_size; |
855 | unsigned sc_hiz_tile_fifo_size; | 865 | unsigned sc_hiz_tile_fifo_size; |
856 | unsigned sc_earlyz_tile_fifo_fize; | 866 | unsigned sc_earlyz_tile_fifo_fize; |
857 | unsigned tiling_nbanks; | 867 | unsigned tiling_nbanks; |
858 | unsigned tiling_npipes; | 868 | unsigned tiling_npipes; |
859 | unsigned tiling_group_size; | 869 | unsigned tiling_group_size; |
870 | struct r100_gpu_lockup lockup; | ||
860 | }; | 871 | }; |
861 | 872 | ||
862 | union radeon_asic_config { | 873 | union radeon_asic_config { |
@@ -1145,6 +1156,7 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) | |||
1145 | #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) | 1156 | #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) |
1146 | #define radeon_cs_parse(p) rdev->asic->cs_parse((p)) | 1157 | #define radeon_cs_parse(p) rdev->asic->cs_parse((p)) |
1147 | #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) | 1158 | #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) |
1159 | #define radeon_gpu_is_lockup(rdev) (rdev)->asic->gpu_is_lockup((rdev)) | ||
1148 | #define radeon_gpu_reset(rdev) (rdev)->asic->gpu_reset((rdev)) | 1160 | #define radeon_gpu_reset(rdev) (rdev)->asic->gpu_reset((rdev)) |
1149 | #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev)) | 1161 | #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart_tlb_flush((rdev)) |
1150 | #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart_set_page((rdev), (i), (p)) | 1162 | #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart_set_page((rdev), (i), (p)) |
@@ -1200,6 +1212,8 @@ extern int radeon_resume_kms(struct drm_device *dev); | |||
1200 | extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state); | 1212 | extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state); |
1201 | 1213 | ||
1202 | /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ | 1214 | /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ |
1215 | extern void r100_gpu_lockup_update(struct r100_gpu_lockup *lockup, struct radeon_cp *cp); | ||
1216 | extern bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *lockup, struct radeon_cp *cp); | ||
1203 | 1217 | ||
1204 | /* rv200,rv250,rv280 */ | 1218 | /* rv200,rv250,rv280 */ |
1205 | extern void r200_set_safe_registers(struct radeon_device *rdev); | 1219 | extern void r200_set_safe_registers(struct radeon_device *rdev); |