summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-04-25 08:00:54 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:57 -0400
commit1c9aaa1eafcf91fbc29404b449f2bec072c804a5 (patch)
tree702f9933600962f05d0d76a9624a67f027b7bea8
parent24fc5e36a7f4fe2f36f78c6c91909595964f1645 (diff)
gpu: nvgpu: Implement ELPG flush for gm20b
ELPG flush is initiated from a common broadcast register, but must be waited on via per-L2 registers. Split gk20a and gm20b versions of the flush. Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/401545 Reviewed-by: Automatic_Commit_Validation_User
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_common.c34
-rw-r--r--drivers/gpu/nvgpu/gk20a/ltc_gk20a.c34
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h20
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c46
4 files changed, 97 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index ac46a9a0..72477983 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
313 0, max_comptag_lines - 1); 313 0, max_comptag_lines - 1);
314 314
315} 315}
316
317/* Flushes the compression bit cache as well as "data".
318 * Note: the name here is a bit of a misnomer. ELPG uses this
319 * internally... but ELPG doesn't have to be on to do it manually.
320 */
321static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
322{
323 u32 data;
324 s32 retry = 100;
325
326 gk20a_dbg_fn("");
327
328 /* Make sure all previous writes are committed to the L2. There's no
329 guarantee that writes are to DRAM. This will be a sysmembar internal
330 to the L2. */
331 gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
332 ltc_ltcs_ltss_g_elpg_flush_pending_f());
333 do {
334 data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
335
336 if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
337 ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
338 gk20a_dbg_info("g_elpg_flush 0x%x", data);
339 retry--;
340 usleep_range(20, 40);
341 } else
342 break;
343 } while (retry >= 0 || !tegra_platform_is_silicon());
344
345 if (retry < 0)
346 gk20a_warn(dev_from_gk20a(g),
347 "g_elpg_flush too many retries");
348
349}
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index c1ba2aee..9f5317fc 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g)
212 gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); 212 gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
213} 213}
214 214
215/* Flushes the compression bit cache as well as "data".
216 * Note: the name here is a bit of a misnomer. ELPG uses this
217 * internally... but ELPG doesn't have to be on to do it manually.
218 */
219static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
220{
221 u32 data;
222 s32 retry = 100;
223
224 gk20a_dbg_fn("");
225
226 /* Make sure all previous writes are committed to the L2. There's no
227 guarantee that writes are to DRAM. This will be a sysmembar internal
228 to the L2. */
229 gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
230 ltc_ltcs_ltss_g_elpg_flush_pending_f());
231 do {
232 data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
233
234 if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
235 ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
236 gk20a_dbg_info("g_elpg_flush 0x%x", data);
237 retry--;
238 usleep_range(20, 40);
239 } else
240 break;
241 } while (retry >= 0 || !tegra_platform_is_silicon());
242
243 if (retry < 0)
244 gk20a_warn(dev_from_gk20a(g),
245 "g_elpg_flush too many retries");
246
247}
248
215void gk20a_init_ltc(struct gpu_ops *gops) 249void gk20a_init_ltc(struct gpu_ops *gops)
216{ 250{
217 gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; 251 gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
index 28c58f50..9840805d 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
96} 96}
97static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void) 97static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
98{ 98{
99 return 0x1; 99 return 0x1;
100} 100}
101static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void) 101static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
102{ 102{
103 return 0x2; 103 return 0x2;
104} 104}
105static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r) 105static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
106{ 106{
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
258{ 258{
259 return 0x1; 259 return 0x1;
260} 260}
261static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
262{
263 return 0x00142214;
264}
265static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
266{
267 return (r >> 0) & 0x1;
268}
269static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
270{
271 return 0x00000001;
272}
273static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
274{
275 return 0x1;
276}
261static inline u32 ltc_ltc0_ltss_intr_r(void) 277static inline u32 ltc_ltc0_ltss_intr_r(void)
262{ 278{
263 return 0x0014020c; 279 return 0x0014020c;
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 5da21c64..43c90970 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g)
193 gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr); 193 gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
194} 194}
195 195
196static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
197{
198 u32 data;
199 bool done[g->ltc_count];
200 s32 retry = 100;
201 int i;
202 int num_done = 0;
203 u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
204
205 gk20a_dbg_fn("");
206
207 for (i = 0; i < g->ltc_count; i++)
208 done[i] = 0;
209
210 gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
211 ltc_ltcs_ltss_g_elpg_flush_pending_f());
212 do {
213 for (i = 0; i < g->ltc_count; i++) {
214 if (done[i])
215 continue;
216
217 data = gk20a_readl(g,
218 ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
219
220 if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
221 gk20a_dbg_info("g_elpg_flush 0x%x", data);
222 } else {
223 done[i] = 1;
224 num_done++;
225 }
226 }
227
228 if (num_done < g->ltc_count) {
229 retry--;
230 usleep_range(20, 40);
231 } else
232 break;
233 } while (retry >= 0 || !tegra_platform_is_silicon());
234
235 if (retry < 0)
236 gk20a_warn(dev_from_gk20a(g),
237 "g_elpg_flush too many retries");
238}
239
196void gm20b_init_ltc(struct gpu_ops *gops) 240void gm20b_init_ltc(struct gpu_ops *gops)
197{ 241{
198 /* Gk20a reused ops. */ 242 /* Gk20a reused ops. */
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops)
209 gops->ltc.init_fs_state = gm20b_ltc_init_fs_state; 253 gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
210 gops->ltc.init_comptags = gm20b_ltc_init_comptags; 254 gops->ltc.init_comptags = gm20b_ltc_init_comptags;
211 gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl; 255 gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
212 gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked; 256 gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
213 gops->ltc.isr = gm20b_ltc_isr; 257 gops->ltc.isr = gm20b_ltc_isr;
214} 258}