diff options
author | Peter Boonstoppel <pboonstoppel@nvidia.com> | 2017-05-02 15:09:40 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-05-17 13:24:20 -0400 |
commit | 39a9e251da0fb4da8512593d3ce4f6eba47d5e0c (patch) | |
tree | 4b0cc5a4c196ba815aff2856034ffbf115cc2fa6 /drivers/gpu/nvgpu/gk20a | |
parent | 65de2a2d65a2d7f748580cbc646438a7b4e99d13 (diff) |
gpu: nvgpu: Add czf_bypass sysfs node for gp10b
This change adds a new sysfs node to allow configuring CZF_BYPASS, to
enable platforms with low context-switching latency requirements.
/sys/devices/17000000.gp10b/czf_bypass
Values:
0 - always
1 - lateZ (default)
2 - single pass
3 - never
The specified value will apply only to newly allocated contexts.
Bug 1914014
Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f
Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com>
Reviewed-on: http://git-master/r/1478567
(cherry picked from commit 3bc022cb385b53f698b04f218db535e8162e8c94)
Reviewed-on: http://git-master/r/1473820
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 70 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 5 |
3 files changed, 49 insertions, 28 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c36049b9..b5d0572e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -366,6 +366,8 @@ struct gpu_ops { | |||
366 | int (*resume_from_pause)(struct gk20a *g); | 366 | int (*resume_from_pause)(struct gk20a *g); |
367 | int (*clear_sm_errors)(struct gk20a *g); | 367 | int (*clear_sm_errors)(struct gk20a *g); |
368 | u32 (*tpc_enabled_exceptions)(struct gk20a *g); | 368 | u32 (*tpc_enabled_exceptions)(struct gk20a *g); |
369 | int (*set_czf_bypass)(struct gk20a *g, | ||
370 | struct channel_gk20a *ch); | ||
369 | } gr; | 371 | } gr; |
370 | struct { | 372 | struct { |
371 | void (*init_hw)(struct gk20a *g); | 373 | void (*init_hw)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 04d494fc..25636bbd 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
3312 | c->first_init = true; | 3312 | c->first_init = true; |
3313 | } | 3313 | } |
3314 | 3314 | ||
3315 | if (g->ops.gr.set_czf_bypass) | ||
3316 | g->ops.gr.set_czf_bypass(g, c); | ||
3317 | |||
3315 | gk20a_dbg_fn("done"); | 3318 | gk20a_dbg_fn("done"); |
3316 | return 0; | 3319 | return 0; |
3317 | out: | 3320 | out: |
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) | |||
8236 | return ret; | 8239 | return ret; |
8237 | } | 8240 | } |
8238 | 8241 | ||
8239 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | 8242 | int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
8240 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | 8243 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, |
8241 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) | 8244 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, |
8245 | bool ch_is_curr_ctx) | ||
8242 | { | 8246 | { |
8243 | struct gk20a *g = ch->g; | 8247 | struct gk20a *g = ch->g; |
8244 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 8248 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; |
8245 | bool gr_ctx_ready = false; | 8249 | bool gr_ctx_ready = false; |
8246 | bool pm_ctx_ready = false; | 8250 | bool pm_ctx_ready = false; |
8247 | struct nvgpu_mem *current_mem = NULL; | 8251 | struct nvgpu_mem *current_mem = NULL; |
8248 | bool ch_is_curr_ctx, restart_gr_ctxsw = false; | ||
8249 | u32 i, j, offset, v; | 8252 | u32 i, j, offset, v; |
8250 | struct gr_gk20a *gr = &g->gr; | 8253 | struct gr_gk20a *gr = &g->gr; |
8251 | u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; | 8254 | u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count; |
8252 | u32 *offsets = NULL; | 8255 | u32 *offsets = NULL; |
8253 | u32 *offset_addrs = NULL; | 8256 | u32 *offset_addrs = NULL; |
8254 | u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; | 8257 | u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops}; |
8255 | int err, pass; | 8258 | int err = 0, pass; |
8256 | 8259 | ||
8257 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", | 8260 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", |
8258 | num_ctx_wr_ops, num_ctx_rd_ops); | 8261 | num_ctx_wr_ops, num_ctx_rd_ops); |
8259 | 8262 | ||
8260 | /* disable channel switching. | ||
8261 | * at that point the hardware state can be inspected to | ||
8262 | * determine if the context we're interested in is current. | ||
8263 | */ | ||
8264 | err = gr_gk20a_disable_ctxsw(g); | ||
8265 | if (err) { | ||
8266 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
8267 | /* this should probably be ctx-fatal... */ | ||
8268 | goto cleanup; | ||
8269 | } | ||
8270 | |||
8271 | restart_gr_ctxsw = true; | ||
8272 | |||
8273 | ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); | ||
8274 | |||
8275 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx); | ||
8276 | |||
8277 | if (ch_is_curr_ctx) { | 8263 | if (ch_is_curr_ctx) { |
8278 | for (pass = 0; pass < 2; pass++) { | 8264 | for (pass = 0; pass < 2; pass++) { |
8279 | ctx_op_nr = 0; | 8265 | ctx_op_nr = 0; |
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
8497 | if (pm_ctx_ready) | 8483 | if (pm_ctx_ready) |
8498 | nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); | 8484 | nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); |
8499 | 8485 | ||
8500 | if (restart_gr_ctxsw) { | 8486 | return err; |
8501 | int tmp_err = gr_gk20a_enable_ctxsw(g); | 8487 | } |
8502 | if (tmp_err) { | 8488 | |
8503 | nvgpu_err(g, "unable to restart ctxsw!\n"); | 8489 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
8504 | err = tmp_err; | 8490 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, |
8505 | } | 8491 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops) |
8492 | { | ||
8493 | struct gk20a *g = ch->g; | ||
8494 | int err, tmp_err; | ||
8495 | bool ch_is_curr_ctx; | ||
8496 | |||
8497 | /* disable channel switching. | ||
8498 | * at that point the hardware state can be inspected to | ||
8499 | * determine if the context we're interested in is current. | ||
8500 | */ | ||
8501 | err = gr_gk20a_disable_ctxsw(g); | ||
8502 | if (err) { | ||
8503 | nvgpu_err(g, "unable to stop gr ctxsw"); | ||
8504 | /* this should probably be ctx-fatal... */ | ||
8505 | return err; | ||
8506 | } | ||
8507 | |||
8508 | ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); | ||
8509 | |||
8510 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", | ||
8511 | ch_is_curr_ctx); | ||
8512 | |||
8513 | err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops, | ||
8514 | num_ctx_rd_ops, ch_is_curr_ctx); | ||
8515 | |||
8516 | tmp_err = gr_gk20a_enable_ctxsw(g); | ||
8517 | if (tmp_err) { | ||
8518 | nvgpu_err(g, "unable to restart ctxsw!\n"); | ||
8519 | err = tmp_err; | ||
8506 | } | 8520 | } |
8507 | 8521 | ||
8508 | return err; | 8522 | return err; |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 06ce96e7..ee528c31 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -320,6 +320,7 @@ struct gr_gk20a { | |||
320 | u32 alpha_cb_default_size; | 320 | u32 alpha_cb_default_size; |
321 | u32 alpha_cb_size; | 321 | u32 alpha_cb_size; |
322 | u32 timeslice_mode; | 322 | u32 timeslice_mode; |
323 | u32 czf_bypass; | ||
323 | 324 | ||
324 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; | 325 | struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; |
325 | 326 | ||
@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op; | |||
563 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | 564 | int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, |
564 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | 565 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, |
565 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); | 566 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops); |
567 | int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | ||
568 | struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops, | ||
569 | u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, | ||
570 | bool ch_is_curr_ctx); | ||
566 | int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, | 571 | int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, |
567 | u32 addr, | 572 | u32 addr, |
568 | u32 max_offsets, | 573 | u32 max_offsets, |