gpu: nvgpu: Add czf_bypass sysfs node for gp10b

This change adds a new sysfs node to allow configuring CZF_BYPASS, to enable platforms with low context-switching latency requirements. /sys/devices/17000000.gp10b/czf_bypass Values: 0 - always 1 - lateZ (default) 2 - single pass 3 - never The specified value will apply only to newly allocated contexts. Bug 1914014 Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com> Reviewed-on: http://git-master/r/1478567 (cherry picked from commit 3bc022cb385b53f698b04f218db535e8162e8c94) Reviewed-on: http://git-master/r/1473820 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Peter Boonstoppel <pboonstoppel@nvidia.com> 2017-05-02 15:09:40 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-05-17 13:24:20 -0400
commit: 39a9e251da0fb4da8512593d3ce4f6eba47d5e0c (patch)
tree: 4b0cc5a4c196ba815aff2856034ffbf115cc2fa6 /drivers/gpu/nvgpu/gk20a
parent: 65de2a2d65a2d7f748580cbc646438a7b4e99d13 (diff)
3 files changed, 49 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index c36049b9..b5d0572e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -366,6 +366,8 @@ struct gpu_ops {
                int (*resume_from_pause)(struct gk20a *g);
                int (*clear_sm_errors)(struct gk20a *g);
                u32 (*tpc_enabled_exceptions)(struct gk20a *g);
+                int (*set_czf_bypass)(struct gk20a *g,
+                                      struct channel_gk20a *ch);
        } gr;
        struct {
                void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 04d494fc..25636bbd 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
                c->first_init = true;
        }
+        if (g->ops.gr.set_czf_bypass)
+                g->ops.gr.set_czf_bypass(g, c);
        gk20a_dbg_fn("done");
        return 0;
 out:
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
        return ret;
 }
-int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
-                          struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+                            struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
-                          u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
+                            u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
+                            bool ch_is_curr_ctx)
 {
        struct gk20a *g = ch->g;
        struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
        bool gr_ctx_ready = false;
        bool pm_ctx_ready = false;
        struct nvgpu_mem *current_mem = NULL;
-        bool ch_is_curr_ctx, restart_gr_ctxsw = false;
        u32 i, j, offset, v;
        struct gr_gk20a *gr = &g->gr;
        u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
        u32 *offsets = NULL;
        u32 *offset_addrs = NULL;
        u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
-        int err, pass;
+        int err = 0, pass;
        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
                   num_ctx_wr_ops, num_ctx_rd_ops);
-        /* disable channel switching.
-         * at that point the hardware state can be inspected to
-         * determine if the context we're interested in is current.
-         */
-        err = gr_gk20a_disable_ctxsw(g);
-        if (err) {
-                nvgpu_err(g, "unable to stop gr ctxsw");
-                /* this should probably be ctx-fatal... */
-                goto cleanup;
-        }
-        restart_gr_ctxsw = true;
-        ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
-        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
        if (ch_is_curr_ctx) {
                for (pass = 0; pass < 2; pass++) {
                        ctx_op_nr = 0;
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
        if (pm_ctx_ready)
                nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);
-        if (restart_gr_ctxsw) {
+        return err;
-                int tmp_err = gr_gk20a_enable_ctxsw(g);
+}
-                if (tmp_err) {
-                        nvgpu_err(g, "unable to restart ctxsw!\n");
+int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
-                        err = tmp_err;
+                          struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
-                }
+                          u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
+{
+        struct gk20a *g = ch->g;
+        int err, tmp_err;
+        bool ch_is_curr_ctx;
+        /* disable channel switching.
+         * at that point the hardware state can be inspected to
+         * determine if the context we're interested in is current.
+         */
+        err = gr_gk20a_disable_ctxsw(g);
+        if (err) {
+                nvgpu_err(g, "unable to stop gr ctxsw");
+                /* this should probably be ctx-fatal... */
+                return err;
+        }
+        ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
+        gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
+                  ch_is_curr_ctx);
+        err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
+                                      num_ctx_rd_ops, ch_is_curr_ctx);
+        tmp_err = gr_gk20a_enable_ctxsw(g);
+        if (tmp_err) {
+                nvgpu_err(g, "unable to restart ctxsw!\n");
+                err = tmp_err;
        }
        return err;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 06ce96e7..ee528c31 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -320,6 +320,7 @@ struct gr_gk20a {
        u32 alpha_cb_default_size;
        u32 alpha_cb_size;
        u32 timeslice_mode;
+        u32 czf_bypass;
        struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op;
 int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
                          struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
                          u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
+int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+                            struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+                            u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
+                            bool ch_is_curr_ctx);
 int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
                                    u32 addr,
                                    u32 max_offsets,
author	Peter Boonstoppel <pboonstoppel@nvidia.com>	2017-05-02 15:09:40 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-05-17 13:24:20 -0400
commit	39a9e251da0fb4da8512593d3ce4f6eba47d5e0c (patch)
tree	4b0cc5a4c196ba815aff2856034ffbf115cc2fa6 /drivers/gpu/nvgpu/gk20a
parent	65de2a2d65a2d7f748580cbc646438a7b4e99d13 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c36049b9..b5d0572e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -366,6 +366,8 @@ struct gpu_ops {
366	int (resume_from_pause)(struct gk20a g);	366	int (resume_from_pause)(struct gk20a g);
367	int (clear_sm_errors)(struct gk20a g);	367	int (clear_sm_errors)(struct gk20a g);
368	u32 (tpc_enabled_exceptions)(struct gk20a g);	368	u32 (tpc_enabled_exceptions)(struct gk20a g);
		369	int (set_czf_bypass)(struct gk20a g,
		370	struct channel_gk20a *ch);
369	} gr;	371	} gr;
370	struct {	372	struct {
371	void (init_hw)(struct gk20a g);	373	void (init_hw)(struct gk20a g);


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 04d494fc..25636bbd 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3312	c->first_init = true;	3312	c->first_init = true;
3313	}	3313	}
3314		3314
		3315	if (g->ops.gr.set_czf_bypass)
		3316	g->ops.gr.set_czf_bypass(g, c);
		3317
3315	gk20a_dbg_fn("done");	3318	gk20a_dbg_fn("done");
3316	return 0;	3319	return 0;
3317	out:	3320	out:
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
8236	return ret;	8239	return ret;
8237	}	8240	}
8238		8241
8239	int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,	8242	int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8240	struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,	8243	struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
8241	u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)	8244	u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
		8245	bool ch_is_curr_ctx)
8242	{	8246	{
8243	struct gk20a *g = ch->g;	8247	struct gk20a *g = ch->g;
8244	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;	8248	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
8245	bool gr_ctx_ready = false;	8249	bool gr_ctx_ready = false;
8246	bool pm_ctx_ready = false;	8250	bool pm_ctx_ready = false;
8247	struct nvgpu_mem *current_mem = NULL;	8251	struct nvgpu_mem *current_mem = NULL;
8248	bool ch_is_curr_ctx, restart_gr_ctxsw = false;
8249	u32 i, j, offset, v;	8252	u32 i, j, offset, v;
8250	struct gr_gk20a *gr = &g->gr;	8253	struct gr_gk20a *gr = &g->gr;
8251	u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;	8254	u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
8252	u32 *offsets = NULL;	8255	u32 *offsets = NULL;
8253	u32 *offset_addrs = NULL;	8256	u32 *offset_addrs = NULL;
8254	u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};	8257	u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
8255	int err, pass;	8258	int err = 0, pass;
8256		8259
8257	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",	8260	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
8258	num_ctx_wr_ops, num_ctx_rd_ops);	8261	num_ctx_wr_ops, num_ctx_rd_ops);
8259		8262
8260	/* disable channel switching.
8261	* at that point the hardware state can be inspected to
8262	* determine if the context we're interested in is current.
8263	*/
8264	err = gr_gk20a_disable_ctxsw(g);
8265	if (err) {
8266	nvgpu_err(g, "unable to stop gr ctxsw");
8267	/* this should probably be ctx-fatal... */
8268	goto cleanup;
8269	}
8270
8271	restart_gr_ctxsw = true;
8272
8273	ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
8274
8275	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
8276
8277	if (ch_is_curr_ctx) {	8263	if (ch_is_curr_ctx) {
8278	for (pass = 0; pass < 2; pass++) {	8264	for (pass = 0; pass < 2; pass++) {
8279	ctx_op_nr = 0;	8265	ctx_op_nr = 0;
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8497	if (pm_ctx_ready)	8483	if (pm_ctx_ready)
8498	nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);	8484	nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);
8499		8485
8500	if (restart_gr_ctxsw) {	8486	return err;
8501	int tmp_err = gr_gk20a_enable_ctxsw(g);	8487	}
8502	if (tmp_err) {	8488
8503	nvgpu_err(g, "unable to restart ctxsw!\n");	8489	int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8504	err = tmp_err;	8490	struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
8505	}	8491	u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
		8492	{
		8493	struct gk20a *g = ch->g;
		8494	int err, tmp_err;
		8495	bool ch_is_curr_ctx;
		8496
		8497	/* disable channel switching.
		8498	* at that point the hardware state can be inspected to
		8499	* determine if the context we're interested in is current.
		8500	*/
		8501	err = gr_gk20a_disable_ctxsw(g);
		8502	if (err) {
		8503	nvgpu_err(g, "unable to stop gr ctxsw");
		8504	/* this should probably be ctx-fatal... */
		8505	return err;
		8506	}
		8507
		8508	ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
		8509
		8510	gk20a_dbg(gpu_dbg_fn \| gpu_dbg_gpu_dbg, "is curr ctx=%d",
		8511	ch_is_curr_ctx);
		8512
		8513	err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
		8514	num_ctx_rd_ops, ch_is_curr_ctx);
		8515
		8516	tmp_err = gr_gk20a_enable_ctxsw(g);
		8517	if (tmp_err) {
		8518	nvgpu_err(g, "unable to restart ctxsw!\n");
		8519	err = tmp_err;
8506	}	8520	}
8507		8521
8508	return err;	8522	return err;


diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 06ce96e7..ee528c31 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -320,6 +320,7 @@ struct gr_gk20a {
320	u32 alpha_cb_default_size;	320	u32 alpha_cb_default_size;
321	u32 alpha_cb_size;	321	u32 alpha_cb_size;
322	u32 timeslice_mode;	322	u32 timeslice_mode;
		323	u32 czf_bypass;
323		324
324	struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];	325	struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
325		326
@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op;
563	int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,	564	int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
564	struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,	565	struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
565	u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);	566	u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
		567	int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
		568	struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
		569	u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
		570	bool ch_is_curr_ctx);
566	int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,	571	int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
567	u32 addr,	572	u32 addr,
568	u32 max_offsets,	573	u32 max_offsets,