summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorPeter Boonstoppel <pboonstoppel@nvidia.com>2017-05-02 15:09:40 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-17 13:24:20 -0400
commit39a9e251da0fb4da8512593d3ce4f6eba47d5e0c (patch)
tree4b0cc5a4c196ba815aff2856034ffbf115cc2fa6 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent65de2a2d65a2d7f748580cbc646438a7b4e99d13 (diff)
gpu: nvgpu: Add czf_bypass sysfs node for gp10b
This change adds a new sysfs node to allow configuring CZF_BYPASS, to enable platforms with low context-switching latency requirements. /sys/devices/17000000.gp10b/czf_bypass Values: 0 - always 1 - lateZ (default) 2 - single pass 3 - never The specified value will apply only to newly allocated contexts. Bug 1914014 Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com> Reviewed-on: http://git-master/r/1478567 (cherry picked from commit 3bc022cb385b53f698b04f218db535e8162e8c94) Reviewed-on: http://git-master/r/1473820 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c70
1 files changed, 42 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 04d494fc..25636bbd 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3312 c->first_init = true; 3312 c->first_init = true;
3313 } 3313 }
3314 3314
3315 if (g->ops.gr.set_czf_bypass)
3316 g->ops.gr.set_czf_bypass(g, c);
3317
3315 gk20a_dbg_fn("done"); 3318 gk20a_dbg_fn("done");
3316 return 0; 3319 return 0;
3317out: 3320out:
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
8236 return ret; 8239 return ret;
8237} 8240}
8238 8241
8239int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, 8242int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8240 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, 8243 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
8241 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) 8244 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
8245 bool ch_is_curr_ctx)
8242{ 8246{
8243 struct gk20a *g = ch->g; 8247 struct gk20a *g = ch->g;
8244 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 8248 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
8245 bool gr_ctx_ready = false; 8249 bool gr_ctx_ready = false;
8246 bool pm_ctx_ready = false; 8250 bool pm_ctx_ready = false;
8247 struct nvgpu_mem *current_mem = NULL; 8251 struct nvgpu_mem *current_mem = NULL;
8248 bool ch_is_curr_ctx, restart_gr_ctxsw = false;
8249 u32 i, j, offset, v; 8252 u32 i, j, offset, v;
8250 struct gr_gk20a *gr = &g->gr; 8253 struct gr_gk20a *gr = &g->gr;
8251 u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; 8254 u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
8252 u32 *offsets = NULL; 8255 u32 *offsets = NULL;
8253 u32 *offset_addrs = NULL; 8256 u32 *offset_addrs = NULL;
8254 u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; 8257 u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
8255 int err, pass; 8258 int err = 0, pass;
8256 8259
8257 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", 8260 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
8258 num_ctx_wr_ops, num_ctx_rd_ops); 8261 num_ctx_wr_ops, num_ctx_rd_ops);
8259 8262
8260 /* disable channel switching.
8261 * at that point the hardware state can be inspected to
8262 * determine if the context we're interested in is current.
8263 */
8264 err = gr_gk20a_disable_ctxsw(g);
8265 if (err) {
8266 nvgpu_err(g, "unable to stop gr ctxsw");
8267 /* this should probably be ctx-fatal... */
8268 goto cleanup;
8269 }
8270
8271 restart_gr_ctxsw = true;
8272
8273 ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
8274
8275 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
8276
8277 if (ch_is_curr_ctx) { 8263 if (ch_is_curr_ctx) {
8278 for (pass = 0; pass < 2; pass++) { 8264 for (pass = 0; pass < 2; pass++) {
8279 ctx_op_nr = 0; 8265 ctx_op_nr = 0;
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8497 if (pm_ctx_ready) 8483 if (pm_ctx_ready)
8498 nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); 8484 nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);
8499 8485
8500 if (restart_gr_ctxsw) { 8486 return err;
8501 int tmp_err = gr_gk20a_enable_ctxsw(g); 8487}
8502 if (tmp_err) { 8488
8503 nvgpu_err(g, "unable to restart ctxsw!\n"); 8489int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8504 err = tmp_err; 8490 struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
8505 } 8491 u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
8492{
8493 struct gk20a *g = ch->g;
8494 int err, tmp_err;
8495 bool ch_is_curr_ctx;
8496
8497 /* disable channel switching.
8498 * at that point the hardware state can be inspected to
8499 * determine if the context we're interested in is current.
8500 */
8501 err = gr_gk20a_disable_ctxsw(g);
8502 if (err) {
8503 nvgpu_err(g, "unable to stop gr ctxsw");
8504 /* this should probably be ctx-fatal... */
8505 return err;
8506 }
8507
8508 ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
8509
8510 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
8511 ch_is_curr_ctx);
8512
8513 err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
8514 num_ctx_rd_ops, ch_is_curr_ctx);
8515
8516 tmp_err = gr_gk20a_enable_ctxsw(g);
8517 if (tmp_err) {
8518 nvgpu_err(g, "unable to restart ctxsw!\n");
8519 err = tmp_err;
8506 } 8520 }
8507 8521
8508 return err; 8522 return err;