summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2018-01-19 18:16:44 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-24 17:42:16 -0500
commitfb0a23ea168fd0947d9bc1064f91f15ef8a0c057 (patch)
tree001dc993501eec6e83bf83b19752001a9c470a54
parentdcff39ba8ca18a8f2f8fa860118c2757a5370413 (diff)
gpu: nvgpu: Implement gp10b variant of cbc_ctrl
Pascal has support for more comptags than Maxwell, but we were using gm20b definitions for cbc_ctrl on all chips. Specifically field clear_upper_bound is one bit wider in Pascal. Implement gp10b version of cbc_ctrl and take that into use in Pascal and Volta. Bug 200381317 Change-Id: I7d3cb9e92498e08f8704f156e2afb34404ce587e Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1642574 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Seema Khowala <seemaj@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c2
-rw-r--r--drivers/gpu/nvgpu/gp10b/ltc_gp10b.c95
-rw-r--r--drivers/gpu/nvgpu/gp10b/ltc_gp10b.h4
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c2
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c2
6 files changed, 102 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 68562955..7a57672e 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -236,7 +236,7 @@ static const struct gpu_ops gp106_ops = {
236 .init_cbc = NULL, 236 .init_cbc = NULL,
237 .init_fs_state = gm20b_ltc_init_fs_state, 237 .init_fs_state = gm20b_ltc_init_fs_state,
238 .init_comptags = gp10b_ltc_init_comptags, 238 .init_comptags = gp10b_ltc_init_comptags,
239 .cbc_ctrl = gm20b_ltc_cbc_ctrl, 239 .cbc_ctrl = gp10b_ltc_cbc_ctrl,
240 .isr = gp10b_ltc_isr, 240 .isr = gp10b_ltc_isr,
241 .cbc_fix_config = NULL, 241 .cbc_fix_config = NULL,
242 .flush = gm20b_flush_ltc, 242 .flush = gm20b_flush_ltc,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index a5b42565..eb3d1f9d 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -199,7 +199,7 @@ static const struct gpu_ops gp10b_ops = {
199 .init_cbc = gm20b_ltc_init_cbc, 199 .init_cbc = gm20b_ltc_init_cbc,
200 .init_fs_state = gp10b_ltc_init_fs_state, 200 .init_fs_state = gp10b_ltc_init_fs_state,
201 .init_comptags = gp10b_ltc_init_comptags, 201 .init_comptags = gp10b_ltc_init_comptags,
202 .cbc_ctrl = gm20b_ltc_cbc_ctrl, 202 .cbc_ctrl = gp10b_ltc_cbc_ctrl,
203 .isr = gp10b_ltc_isr, 203 .isr = gp10b_ltc_isr,
204 .cbc_fix_config = gm20b_ltc_cbc_fix_config, 204 .cbc_fix_config = gm20b_ltc_cbc_fix_config,
205 .flush = gm20b_flush_ltc, 205 .flush = gm20b_flush_ltc,
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index d191d778..7735d1ae 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -22,6 +22,8 @@
22 * DEALINGS IN THE SOFTWARE. 22 * DEALINGS IN THE SOFTWARE.
23 */ 23 */
24 24
25#include <trace/events/gk20a.h>
26
25#include <dt-bindings/memory/tegra-swgroup.h> 27#include <dt-bindings/memory/tegra-swgroup.h>
26 28
27#include <nvgpu/ltc.h> 29#include <nvgpu/ltc.h>
@@ -132,6 +134,99 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
132 return 0; 134 return 0;
133} 135}
134 136
137int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
138 u32 min, u32 max)
139{
140 struct gr_gk20a *gr = &g->gr;
141 struct nvgpu_timeout timeout;
142 int err = 0;
143 u32 ltc, slice, ctrl1, val, hw_op = 0;
144 u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
145 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
146 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
147 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
148 const u32 max_lines = 16384;
149
150 nvgpu_log_fn(g, " ");
151
152 trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max);
153
154 if (gr->compbit_store.mem.size == 0)
155 return 0;
156
157 while (1) {
158 const u32 iter_max = min(min + max_lines - 1, max);
159 bool full_cache_op = true;
160
161 nvgpu_mutex_acquire(&g->mm.l2_op_lock);
162
163 nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max);
164
165 if (op == gk20a_cbc_op_clear) {
166 gk20a_writel(
167 g, ltc_ltcs_ltss_cbc_ctrl2_r(),
168 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(
169 min));
170 gk20a_writel(
171 g, ltc_ltcs_ltss_cbc_ctrl3_r(),
172 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(
173 iter_max));
174 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
175 full_cache_op = false;
176 } else if (op == gk20a_cbc_op_clean) {
177 /* this is full-cache op */
178 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
179 } else if (op == gk20a_cbc_op_invalidate) {
180 /* this is full-cache op */
181 hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
182 } else {
183 nvgpu_err(g, "Unknown op: %u", (unsigned)op);
184 err = -EINVAL;
185 goto out;
186 }
187 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
188 gk20a_readl(g,
189 ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);
190
191 for (ltc = 0; ltc < g->ltc_count; ltc++) {
192 for (slice = 0; slice < slices_per_ltc; slice++) {
193
194 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
195 ltc * ltc_stride + slice * lts_stride;
196
197 nvgpu_timeout_init(g, &timeout, 2000,
198 NVGPU_TIMER_RETRY_TIMER);
199 do {
200 val = gk20a_readl(g, ctrl1);
201 if (!(val & hw_op))
202 break;
203 nvgpu_udelay(5);
204 } while (!nvgpu_timeout_expired(&timeout));
205
206 if (nvgpu_timeout_peek_expired(&timeout)) {
207 nvgpu_err(g, "comp tag clear timeout");
208 err = -EBUSY;
209 goto out;
210 }
211 }
212 }
213
214 /* are we done? */
215 if (full_cache_op || iter_max == max)
216 break;
217
218 /* note: iter_max is inclusive upper bound */
219 min = iter_max + 1;
220
221 /* give a chance for higher-priority threads to progress */
222 nvgpu_mutex_release(&g->mm.l2_op_lock);
223 }
224out:
225 trace_gk20a_ltc_cbc_ctrl_done(g->name);
226 nvgpu_mutex_release(&g->mm.l2_op_lock);
227 return err;
228}
229
135void gp10b_ltc_isr(struct gk20a *g) 230void gp10b_ltc_isr(struct gk20a *g)
136{ 231{
137 u32 mc_intr, ltc_intr; 232 u32 mc_intr, ltc_intr;
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h
index 825204cb..c1a2bf64 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -29,5 +29,7 @@ void gp10b_ltc_isr(struct gk20a *g);
29int gp10b_determine_L2_size_bytes(struct gk20a *g); 29int gp10b_determine_L2_size_bytes(struct gk20a *g);
30int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); 30int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr);
31void gp10b_ltc_init_fs_state(struct gk20a *g); 31void gp10b_ltc_init_fs_state(struct gk20a *g);
32int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
33 u32 min, u32 max);
32void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled); 34void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled);
33#endif 35#endif
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 6103b923..f8302621 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -267,7 +267,7 @@ static const struct gpu_ops gv100_ops = {
267 .init_cbc = NULL, 267 .init_cbc = NULL,
268 .init_fs_state = gv11b_ltc_init_fs_state, 268 .init_fs_state = gv11b_ltc_init_fs_state,
269 .init_comptags = gp10b_ltc_init_comptags, 269 .init_comptags = gp10b_ltc_init_comptags,
270 .cbc_ctrl = gm20b_ltc_cbc_ctrl, 270 .cbc_ctrl = gp10b_ltc_cbc_ctrl,
271 .isr = gv11b_ltc_isr, 271 .isr = gv11b_ltc_isr,
272 .cbc_fix_config = NULL, 272 .cbc_fix_config = NULL,
273 .flush = gm20b_flush_ltc, 273 .flush = gm20b_flush_ltc,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 93f819e4..e4e9323b 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -234,7 +234,7 @@ static const struct gpu_ops gv11b_ops = {
234 .init_cbc = NULL, 234 .init_cbc = NULL,
235 .init_fs_state = gv11b_ltc_init_fs_state, 235 .init_fs_state = gv11b_ltc_init_fs_state,
236 .init_comptags = gp10b_ltc_init_comptags, 236 .init_comptags = gp10b_ltc_init_comptags,
237 .cbc_ctrl = gm20b_ltc_cbc_ctrl, 237 .cbc_ctrl = gp10b_ltc_cbc_ctrl,
238 .isr = gv11b_ltc_isr, 238 .isr = gv11b_ltc_isr,
239 .cbc_fix_config = gv11b_ltc_cbc_fix_config, 239 .cbc_fix_config = gv11b_ltc_cbc_fix_config,
240 .flush = gm20b_flush_ltc, 240 .flush = gm20b_flush_ltc,