diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-05-21 11:53:53 -0400 |
---|---|---|
committer | Deepak Nibade <dnibade@nvidia.com> | 2016-12-27 04:52:05 -0500 |
commit | 0c5c1bf61ae1bd3e16a398a7b54e78314c361eb1 (patch) | |
tree | 5b6c043b7d5c63918b27f4d1c3040db9701bd823 /drivers/gpu/nvgpu | |
parent | 94a7c5ff2cbe8a583e9b8fc4777e5debe4c48810 (diff) |
gpu: nvgpu: gp10b: Wait for preempted or empty
ZBC is safe to update and GPU is safe to rail gate when units are
in preempted or empty state. Idle may never be reached in case of
graphics preemption, so relax the ZBC update wait condition.
Bug 1640378
Change-Id: I40c59e9af22a7a30b777c6b9f87e69d130042e44
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/745655
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 66 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | 24 |
2 files changed, 90 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 1b88112e..03462d5f 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -14,6 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ | 16 | #include "gk20a/gk20a.h" /* FERMI and MAXWELL classes defined here */ |
17 | #include <linux/delay.h> | ||
17 | 18 | ||
18 | #include "gk20a/gr_gk20a.h" | 19 | #include "gk20a/gr_gk20a.h" |
19 | 20 | ||
@@ -23,6 +24,7 @@ | |||
23 | #include "hw_fifo_gp10b.h" | 24 | #include "hw_fifo_gp10b.h" |
24 | #include "hw_proj_gp10b.h" | 25 | #include "hw_proj_gp10b.h" |
25 | #include "hw_ctxsw_prog_gp10b.h" | 26 | #include "hw_ctxsw_prog_gp10b.h" |
27 | #include "hw_mc_gp10b.h" | ||
26 | 28 | ||
27 | static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) | 29 | static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) |
28 | { | 30 | { |
@@ -779,6 +781,69 @@ static int gr_gp10b_dump_gr_status_regs(struct gk20a *g, | |||
779 | return 0; | 781 | return 0; |
780 | } | 782 | } |
781 | 783 | ||
784 | static bool gr_activity_empty_or_preempted(u32 val) | ||
785 | { | ||
786 | while(val) { | ||
787 | u32 v = val & 7; | ||
788 | if (v != gr_activity_4_gpc0_empty_v() && | ||
789 | v != gr_activity_4_gpc0_preempted_v()) | ||
790 | return false; | ||
791 | val >>= 3; | ||
792 | } | ||
793 | |||
794 | return true; | ||
795 | } | ||
796 | |||
797 | static int gr_gp10b_wait_empty(struct gk20a *g, unsigned long end_jiffies, | ||
798 | u32 expect_delay) | ||
799 | { | ||
800 | u32 delay = expect_delay; | ||
801 | bool gr_enabled; | ||
802 | bool ctxsw_active; | ||
803 | bool gr_busy; | ||
804 | u32 gr_status; | ||
805 | u32 activity0, activity1, activity2, activity4; | ||
806 | |||
807 | gk20a_dbg_fn(""); | ||
808 | |||
809 | do { | ||
810 | /* fmodel: host gets fifo_engine_status(gr) from gr | ||
811 | only when gr_status is read */ | ||
812 | gr_status = gk20a_readl(g, gr_status_r()); | ||
813 | |||
814 | gr_enabled = gk20a_readl(g, mc_enable_r()) & | ||
815 | mc_enable_pgraph_enabled_f(); | ||
816 | |||
817 | ctxsw_active = gr_status & 1<<7; | ||
818 | |||
819 | activity0 = gk20a_readl(g, gr_activity_0_r()); | ||
820 | activity1 = gk20a_readl(g, gr_activity_1_r()); | ||
821 | activity2 = gk20a_readl(g, gr_activity_2_r()); | ||
822 | activity4 = gk20a_readl(g, gr_activity_4_r()); | ||
823 | |||
824 | gr_busy = !(gr_activity_empty_or_preempted(activity0) && | ||
825 | gr_activity_empty_or_preempted(activity1) && | ||
826 | activity2 == 0 && | ||
827 | gr_activity_empty_or_preempted(activity4)); | ||
828 | |||
829 | if (!gr_enabled || (!gr_busy && !ctxsw_active)) { | ||
830 | gk20a_dbg_fn("done"); | ||
831 | return 0; | ||
832 | } | ||
833 | |||
834 | usleep_range(delay, delay * 2); | ||
835 | delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); | ||
836 | |||
837 | } while (time_before(jiffies, end_jiffies) | ||
838 | || !tegra_platform_is_silicon()); | ||
839 | |||
840 | gk20a_err(dev_from_gk20a(g), | ||
841 | "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", | ||
842 | ctxsw_active, gr_busy, activity0, activity1, activity2, activity4); | ||
843 | |||
844 | return -EAGAIN; | ||
845 | } | ||
846 | |||
782 | void gp10b_init_gr(struct gpu_ops *gops) | 847 | void gp10b_init_gr(struct gpu_ops *gops) |
783 | { | 848 | { |
784 | gm20b_init_gr(gops); | 849 | gm20b_init_gr(gops); |
@@ -802,4 +867,5 @@ void gp10b_init_gr(struct gpu_ops *gops) | |||
802 | gops->gr.update_ctxsw_preemption_mode = | 867 | gops->gr.update_ctxsw_preemption_mode = |
803 | gr_gp10b_update_ctxsw_preemption_mode; | 868 | gr_gp10b_update_ctxsw_preemption_mode; |
804 | gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs; | 869 | gops->gr.dump_gr_regs = gr_gp10b_dump_gr_status_regs; |
870 | gops->gr.wait_empty = gr_gp10b_wait_empty; | ||
805 | } | 871 | } |
diff --git a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h index 02674d6b..b185604e 100644 --- a/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/hw_gr_gp10b.h | |||
@@ -334,6 +334,30 @@ static inline u32 gr_activity_4_r(void) | |||
334 | { | 334 | { |
335 | return 0x00400390; | 335 | return 0x00400390; |
336 | } | 336 | } |
337 | static inline u32 gr_activity_4_gpc0_s(void) | ||
338 | { | ||
339 | return 3; | ||
340 | } | ||
341 | static inline u32 gr_activity_4_gpc0_f(u32 v) | ||
342 | { | ||
343 | return (v & 0x7) << 0; | ||
344 | } | ||
345 | static inline u32 gr_activity_4_gpc0_m(void) | ||
346 | { | ||
347 | return 0x7 << 0; | ||
348 | } | ||
349 | static inline u32 gr_activity_4_gpc0_v(u32 r) | ||
350 | { | ||
351 | return (r >> 0) & 0x7; | ||
352 | } | ||
353 | static inline u32 gr_activity_4_gpc0_empty_v(void) | ||
354 | { | ||
355 | return 0x00000000; | ||
356 | } | ||
357 | static inline u32 gr_activity_4_gpc0_preempted_v(void) | ||
358 | { | ||
359 | return 0x00000004; | ||
360 | } | ||
337 | static inline u32 gr_pri_gpc0_gcc_dbg_r(void) | 361 | static inline u32 gr_pri_gpc0_gcc_dbg_r(void) |
338 | { | 362 | { |
339 | return 0x00501000; | 363 | return 0x00501000; |