gpu: nvgpu: Implement gp10b variant of cbc_ctrl

Pascal has support for more comptags than Maxwell, but we were using gm20b definitions for cbc_ctrl on all chips. Specifically field clear_upper_bound is one bit wider in Pascal. Implement gp10b version of cbc_ctrl and take that into use in Pascal and Volta. Bug 200381317 Change-Id: I7d3cb9e92498e08f8704f156e2afb34404ce587e Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1642574 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Seema Khowala <seemaj@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Terje Bergstrom <tbergstrom@nvidia.com> 2018-01-19 18:16:44 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-01-24 17:42:16 -0500
commit: fb0a23ea168fd0947d9bc1064f91f15ef8a0c057 (patch)
tree: 001dc993501eec6e83bf83b19752001a9c470a54 /drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
parent: dcff39ba8ca18a8f2f8fa860118c2757a5370413 (diff)
1 files changed, 95 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
index d191d778..7735d1ae 100644
--- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -22,6 +22,8 @@
 * DEALINGS IN THE SOFTWARE.
 */
+#include <trace/events/gk20a.h>
 #include <dt-bindings/memory/tegra-swgroup.h>
 #include <nvgpu/ltc.h>
@@ -132,6 +134,99 @@ int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
        return 0;
 }
+int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
+                       u32 min, u32 max)
+{
+        struct gr_gk20a *gr = &g->gr;
+        struct nvgpu_timeout timeout;
+        int err = 0;
+        u32 ltc, slice, ctrl1, val, hw_op = 0;
+        u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
+                                gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
+        u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
+        u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
+        const u32 max_lines = 16384;
+        nvgpu_log_fn(g, " ");
+        trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max);
+        if (gr->compbit_store.mem.size == 0)
+                return 0;
+        while (1) {
+                const u32 iter_max = min(min + max_lines - 1, max);
+                bool full_cache_op = true;
+                nvgpu_mutex_acquire(&g->mm.l2_op_lock);
+                nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max);
+                if (op == gk20a_cbc_op_clear) {
+                        gk20a_writel(
+                                g, ltc_ltcs_ltss_cbc_ctrl2_r(),
+                                ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(
+                                        min));
+                        gk20a_writel(
+                                g, ltc_ltcs_ltss_cbc_ctrl3_r(),
+                                ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(
+                                        iter_max));
+                        hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
+                        full_cache_op = false;
+                } else if (op == gk20a_cbc_op_clean) {
+                        /* this is full-cache op */
+                        hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
+                } else if (op == gk20a_cbc_op_invalidate) {
+                        /* this is full-cache op */
+                        hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
+                } else {
+                        nvgpu_err(g, "Unknown op: %u", (unsigned)op);
+                        err = -EINVAL;
+                        goto out;
+                }
+                gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
+                             gk20a_readl(g,
+                                         ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);
+                for (ltc = 0; ltc < g->ltc_count; ltc++) {
+                        for (slice = 0; slice < slices_per_ltc; slice++) {
+                                ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
+                                        ltc * ltc_stride + slice * lts_stride;
+                                nvgpu_timeout_init(g, &timeout, 2000,
+                                                   NVGPU_TIMER_RETRY_TIMER);
+                                do {
+                                        val = gk20a_readl(g, ctrl1);
+                                        if (!(val & hw_op))
+                                                break;
+                                        nvgpu_udelay(5);
+                                } while (!nvgpu_timeout_expired(&timeout));
+                                if (nvgpu_timeout_peek_expired(&timeout)) {
+                                        nvgpu_err(g, "comp tag clear timeout");
+                                        err = -EBUSY;
+                                        goto out;
+                                }
+                        }
+                }
+                /* are we done? */
+                if (full_cache_op || iter_max == max)
+                        break;
+                /* note: iter_max is inclusive upper bound */
+                min = iter_max + 1;
+                /* give a chance for higher-priority threads to progress */
+                nvgpu_mutex_release(&g->mm.l2_op_lock);
+        }
+out:
+        trace_gk20a_ltc_cbc_ctrl_done(g->name);
+        nvgpu_mutex_release(&g->mm.l2_op_lock);
+        return err;
+}
 void gp10b_ltc_isr(struct gk20a *g)
 {
        u32 mc_intr, ltc_intr;
author	Terje Bergstrom <tbergstrom@nvidia.com>	2018-01-19 18:16:44 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-01-24 17:42:16 -0500
commit	fb0a23ea168fd0947d9bc1064f91f15ef8a0c057 (patch)
tree	001dc993501eec6e83bf83b19752001a9c470a54 /drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
parent	dcff39ba8ca18a8f2f8fa860118c2757a5370413 (diff)

diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index d191d778..7735d1ae 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c
@@ -22,6 +22,8 @@
22	* DEALINGS IN THE SOFTWARE.	22	* DEALINGS IN THE SOFTWARE.
23	*/	23	*/
24		24
		25	#include <trace/events/gk20a.h>
		26
25	#include <dt-bindings/memory/tegra-swgroup.h>	27	#include <dt-bindings/memory/tegra-swgroup.h>
26		28
27	#include <nvgpu/ltc.h>	29	#include <nvgpu/ltc.h>
@@ -132,6 +134,99 @@ int gp10b_ltc_init_comptags(struct gk20a g, struct gr_gk20a gr)
132	return 0;	134	return 0;
133	}	135	}
134		136
		137	int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
		138	u32 min, u32 max)
		139	{
		140	struct gr_gk20a *gr = &g->gr;
		141	struct nvgpu_timeout timeout;
		142	int err = 0;
		143	u32 ltc, slice, ctrl1, val, hw_op = 0;
		144	u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
		145	gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
		146	u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
		147	u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
		148	const u32 max_lines = 16384;
		149
		150	nvgpu_log_fn(g, " ");
		151
		152	trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max);
		153
		154	if (gr->compbit_store.mem.size == 0)
		155	return 0;
		156
		157	while (1) {
		158	const u32 iter_max = min(min + max_lines - 1, max);
		159	bool full_cache_op = true;
		160
		161	nvgpu_mutex_acquire(&g->mm.l2_op_lock);
		162
		163	nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max);
		164
		165	if (op == gk20a_cbc_op_clear) {
		166	gk20a_writel(
		167	g, ltc_ltcs_ltss_cbc_ctrl2_r(),
		168	ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(
		169	min));
		170	gk20a_writel(
		171	g, ltc_ltcs_ltss_cbc_ctrl3_r(),
		172	ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(
		173	iter_max));
		174	hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
		175	full_cache_op = false;
		176	} else if (op == gk20a_cbc_op_clean) {
		177	/* this is full-cache op */
		178	hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
		179	} else if (op == gk20a_cbc_op_invalidate) {
		180	/* this is full-cache op */
		181	hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
		182	} else {
		183	nvgpu_err(g, "Unknown op: %u", (unsigned)op);
		184	err = -EINVAL;
		185	goto out;
		186	}
		187	gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
		188	gk20a_readl(g,
		189	ltc_ltcs_ltss_cbc_ctrl1_r()) \| hw_op);
		190
		191	for (ltc = 0; ltc < g->ltc_count; ltc++) {
		192	for (slice = 0; slice < slices_per_ltc; slice++) {
		193
		194	ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
		195	ltc * ltc_stride + slice * lts_stride;
		196
		197	nvgpu_timeout_init(g, &timeout, 2000,
		198	NVGPU_TIMER_RETRY_TIMER);
		199	do {
		200	val = gk20a_readl(g, ctrl1);
		201	if (!(val & hw_op))
		202	break;
		203	nvgpu_udelay(5);
		204	} while (!nvgpu_timeout_expired(&timeout));
		205
		206	if (nvgpu_timeout_peek_expired(&timeout)) {
		207	nvgpu_err(g, "comp tag clear timeout");
		208	err = -EBUSY;
		209	goto out;
		210	}
		211	}
		212	}
		213
		214	/* are we done? */
		215	if (full_cache_op \|\| iter_max == max)
		216	break;
		217
		218	/* note: iter_max is inclusive upper bound */
		219	min = iter_max + 1;
		220
		221	/* give a chance for higher-priority threads to progress */
		222	nvgpu_mutex_release(&g->mm.l2_op_lock);
		223	}
		224	out:
		225	trace_gk20a_ltc_cbc_ctrl_done(g->name);
		226	nvgpu_mutex_release(&g->mm.l2_op_lock);
		227	return err;
		228	}
		229
135	void gp10b_ltc_isr(struct gk20a *g)	230	void gp10b_ltc_isr(struct gk20a *g)
136	{	231	{
137	u32 mc_intr, ltc_intr;	232	u32 mc_intr, ltc_intr;