diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-04-25 08:00:54 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:09:57 -0400 |
commit | 1c9aaa1eafcf91fbc29404b449f2bec072c804a5 (patch) | |
tree | 702f9933600962f05d0d76a9624a67f027b7bea8 | |
parent | 24fc5e36a7f4fe2f36f78c6c91909595964f1645 (diff) |
gpu: nvgpu: Implement ELPG flush for gm20b
ELPG flush is initiated from a common broadcast register, but must be
waited on via per-L2 registers. Split gk20a and gm20b versions of
the flush.
Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/401545
Reviewed-by: Automatic_Commit_Validation_User
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_common.c | 34 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | 34 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 46 |
4 files changed, 97 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c index ac46a9a0..72477983 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c | |||
@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) | |||
313 | 0, max_comptag_lines - 1); | 313 | 0, max_comptag_lines - 1); |
314 | 314 | ||
315 | } | 315 | } |
316 | |||
317 | /* Flushes the compression bit cache as well as "data". | ||
318 | * Note: the name here is a bit of a misnomer. ELPG uses this | ||
319 | * internally... but ELPG doesn't have to be on to do it manually. | ||
320 | */ | ||
321 | static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) | ||
322 | { | ||
323 | u32 data; | ||
324 | s32 retry = 100; | ||
325 | |||
326 | gk20a_dbg_fn(""); | ||
327 | |||
328 | /* Make sure all previous writes are committed to the L2. There's no | ||
329 | guarantee that writes are to DRAM. This will be a sysmembar internal | ||
330 | to the L2. */ | ||
331 | gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), | ||
332 | ltc_ltcs_ltss_g_elpg_flush_pending_f()); | ||
333 | do { | ||
334 | data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r()); | ||
335 | |||
336 | if (ltc_ltc0_ltss_g_elpg_flush_v(data) == | ||
337 | ltc_ltc0_ltss_g_elpg_flush_pending_v()) { | ||
338 | gk20a_dbg_info("g_elpg_flush 0x%x", data); | ||
339 | retry--; | ||
340 | usleep_range(20, 40); | ||
341 | } else | ||
342 | break; | ||
343 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
344 | |||
345 | if (retry < 0) | ||
346 | gk20a_warn(dev_from_gk20a(g), | ||
347 | "g_elpg_flush too many retries"); | ||
348 | |||
349 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index c1ba2aee..9f5317fc 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c | |||
@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g) | |||
212 | gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); | 212 | gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); |
213 | } | 213 | } |
214 | 214 | ||
215 | /* Flushes the compression bit cache as well as "data". | ||
216 | * Note: the name here is a bit of a misnomer. ELPG uses this | ||
217 | * internally... but ELPG doesn't have to be on to do it manually. | ||
218 | */ | ||
219 | static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) | ||
220 | { | ||
221 | u32 data; | ||
222 | s32 retry = 100; | ||
223 | |||
224 | gk20a_dbg_fn(""); | ||
225 | |||
226 | /* Make sure all previous writes are committed to the L2. There's no | ||
227 | guarantee that writes are to DRAM. This will be a sysmembar internal | ||
228 | to the L2. */ | ||
229 | gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), | ||
230 | ltc_ltcs_ltss_g_elpg_flush_pending_f()); | ||
231 | do { | ||
232 | data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r()); | ||
233 | |||
234 | if (ltc_ltc0_ltss_g_elpg_flush_v(data) == | ||
235 | ltc_ltc0_ltss_g_elpg_flush_pending_v()) { | ||
236 | gk20a_dbg_info("g_elpg_flush 0x%x", data); | ||
237 | retry--; | ||
238 | usleep_range(20, 40); | ||
239 | } else | ||
240 | break; | ||
241 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
242 | |||
243 | if (retry < 0) | ||
244 | gk20a_warn(dev_from_gk20a(g), | ||
245 | "g_elpg_flush too many retries"); | ||
246 | |||
247 | } | ||
248 | |||
215 | void gk20a_init_ltc(struct gpu_ops *gops) | 249 | void gk20a_init_ltc(struct gpu_ops *gops) |
216 | { | 250 | { |
217 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; | 251 | gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index 28c58f50..9840805d 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | |||
@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void) | |||
96 | } | 96 | } |
97 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void) | 97 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void) |
98 | { | 98 | { |
99 | return 0x1; | 99 | return 0x1; |
100 | } | 100 | } |
101 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void) | 101 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void) |
102 | { | 102 | { |
103 | return 0x2; | 103 | return 0x2; |
104 | } | 104 | } |
105 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r) | 105 | static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r) |
106 | { | 106 | { |
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void) | |||
258 | { | 258 | { |
259 | return 0x1; | 259 | return 0x1; |
260 | } | 260 | } |
261 | static inline u32 ltc_ltc1_ltss_g_elpg_r(void) | ||
262 | { | ||
263 | return 0x00142214; | ||
264 | } | ||
265 | static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r) | ||
266 | { | ||
267 | return (r >> 0) & 0x1; | ||
268 | } | ||
269 | static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void) | ||
270 | { | ||
271 | return 0x00000001; | ||
272 | } | ||
273 | static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void) | ||
274 | { | ||
275 | return 0x1; | ||
276 | } | ||
261 | static inline u32 ltc_ltc0_ltss_intr_r(void) | 277 | static inline u32 ltc_ltc0_ltss_intr_r(void) |
262 | { | 278 | { |
263 | return 0x0014020c; | 279 | return 0x0014020c; |
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 5da21c64..43c90970 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g) | |||
193 | gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); | 193 | gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); |
194 | } | 194 | } |
195 | 195 | ||
196 | static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) | ||
197 | { | ||
198 | u32 data; | ||
199 | bool done[g->ltc_count]; | ||
200 | s32 retry = 100; | ||
201 | int i; | ||
202 | int num_done = 0; | ||
203 | u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r(); | ||
204 | |||
205 | gk20a_dbg_fn(""); | ||
206 | |||
207 | for (i = 0; i < g->ltc_count; i++) | ||
208 | done[i] = 0; | ||
209 | |||
210 | gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(), | ||
211 | ltc_ltcs_ltss_g_elpg_flush_pending_f()); | ||
212 | do { | ||
213 | for (i = 0; i < g->ltc_count; i++) { | ||
214 | if (done[i]) | ||
215 | continue; | ||
216 | |||
217 | data = gk20a_readl(g, | ||
218 | ltc_ltc0_ltss_g_elpg_r() + ltc_d * i); | ||
219 | |||
220 | if (ltc_ltc0_ltss_g_elpg_flush_v(data)) { | ||
221 | gk20a_dbg_info("g_elpg_flush 0x%x", data); | ||
222 | } else { | ||
223 | done[i] = 1; | ||
224 | num_done++; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | if (num_done < g->ltc_count) { | ||
229 | retry--; | ||
230 | usleep_range(20, 40); | ||
231 | } else | ||
232 | break; | ||
233 | } while (retry >= 0 || !tegra_platform_is_silicon()); | ||
234 | |||
235 | if (retry < 0) | ||
236 | gk20a_warn(dev_from_gk20a(g), | ||
237 | "g_elpg_flush too many retries"); | ||
238 | } | ||
239 | |||
196 | void gm20b_init_ltc(struct gpu_ops *gops) | 240 | void gm20b_init_ltc(struct gpu_ops *gops) |
197 | { | 241 | { |
198 | /* Gk20a reused ops. */ | 242 | /* Gk20a reused ops. */ |
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops) | |||
209 | gops->ltc.init_fs_state = gm20b_ltc_init_fs_state; | 253 | gops->ltc.init_fs_state = gm20b_ltc_init_fs_state; |
210 | gops->ltc.init_comptags = gm20b_ltc_init_comptags; | 254 | gops->ltc.init_comptags = gm20b_ltc_init_comptags; |
211 | gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl; | 255 | gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl; |
212 | gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked; | 256 | gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked; |
213 | gops->ltc.isr = gm20b_ltc_isr; | 257 | gops->ltc.isr = gm20b_ltc_isr; |
214 | } | 258 | } |