From 907adfd785e3a3582b2649c48bf4c32f41745e3f Mon Sep 17 00:00:00 2001 From: Peter Boonstoppel Date: Tue, 10 Jan 2017 10:22:54 -0800 Subject: gpu: nvgpu: Add NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX This ioctl can be used on gp10b to set a flag in the context header indicating this context should be run at elevated clock frequency. FECS ctxsw ucode will read this flag as part of the context switch and will request higher GPU clock frequencies from BPMP for the duration of the context execution. Bug 1819874 Change-Id: I84bf580923d95585095716d49cea24e58c9440ed Signed-off-by: Peter Boonstoppel Reviewed-on: http://git-master/r/1292746 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 18 +++++++++++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 4 +++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 3 +++ drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 + drivers/gpu/nvgpu/gp106/gr_gp106.c | 3 ++- drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 45 +++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 2 ++ include/uapi/linux/nvgpu.h | 11 +++++++- 8 files changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index b963ad96..3dda1cbf 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -3874,6 +3874,24 @@ long gk20a_channel_ioctl(struct file *filp, err = -EINVAL; } break; + case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX: + if (ch->g->ops.gr.set_boosted_ctx) { + bool boost = + ((struct nvgpu_boosted_ctx_args *)buf)->boost; + + err = gk20a_busy(dev); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = ch->g->ops.gr.set_boosted_ctx(ch, boost); + gk20a_idle(dev); + } else { + err = -EINVAL; + } + break; default: dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8d6d36fc..7f2383d9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -319,6 +319,10 @@ struct gpu_ops { struct vm_gk20a *vm, u32 class, u32 graphics_preempt_mode, u32 compute_preempt_mode); + int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); + void (*update_boosted_ctx)(struct gk20a *g, + struct mem_desc *mem, + struct gr_ctx_desc *gr_ctx); int (*fuse_override)(struct gk20a *g); void (*init_sm_id_table)(struct gk20a *g); int (*load_smid_config)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 0e1c88a4..170bfc7f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -2109,6 +2109,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, if (g->ops.gr.update_ctxsw_preemption_mode) g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); + if (g->ops.gr.update_boosted_ctx) + g->ops.gr.update_boosted_ctx(g, mem, ch_ctx->gr_ctx); + virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 19ab2ec0..5a987a82 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -391,6 +391,7 @@ struct gr_ctx_desc { u32 graphics_preempt_mode; u32 compute_preempt_mode; + bool boosted_ctx; #ifdef CONFIG_ARCH_TEGRA_18x_SOC struct gr_ctx_desc_t18x t18x; #endif diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index 896c3fc6..dae23374 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c @@ -243,5 +243,6 @@ void gp106_init_gr(struct gpu_ops *gops) gops->gr.init_preemption_state = NULL; gops->gr.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode; gops->gr.create_gr_sysfs = NULL; - + gops->gr.set_boosted_ctx = NULL; + gops->gr.update_boosted_ctx = NULL; } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index a7aa4003..e680e753 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -2060,6 +2060,49 @@ clean_up: return err; } +static int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, + bool boost) +{ + struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; + struct gk20a *g = ch->g; + struct mem_desc *mem = &gr_ctx->mem; + int err = 0; + + gr_ctx->boosted_ctx = boost; + + if (gk20a_mem_begin(g, mem)) + return -ENOMEM; + + err = gk20a_disable_channel_tsg(g, ch); + if (err) + goto unmap_ctx; + + err = gk20a_fifo_preempt(g, ch); + if (err) + goto enable_ch; + + if (g->ops.gr.update_boosted_ctx) + g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); + else + err = -ENOSYS; + +enable_ch: + gk20a_enable_channel_tsg(g, ch); +unmap_ctx: + gk20a_mem_end(g, mem); + + return err; +} + +static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct mem_desc *mem, + struct gr_ctx_desc *gr_ctx) { + u32 v; + + v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( + gr_ctx->boosted_ctx); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v); +} + static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode) @@ -2271,4 +2314,6 @@ void gp10b_init_gr(struct gpu_ops *gops) gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags; gops->gr.fuse_override = gp10b_gr_fuse_override; gops->gr.load_smid_config = gr_gp10b_load_smid_config; + gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx; + gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx; } diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 6cafc49f..36bbee00 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -1134,4 +1134,6 @@ void vgpu_init_gr_ops(struct gpu_ops *gops) gops->gr.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode; gops->gr.clear_sm_error_state = vgpu_gr_clear_sm_error_state; gops->gr.dump_gr_regs = NULL; + gops->gr.set_boosted_ctx = NULL; + gops->gr.update_boosted_ctx = NULL; } diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 5a9a1d25..1853f667 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1537,6 +1537,13 @@ struct nvgpu_preemption_mode_args { __u32 compute_preempt_mode; /* in */ }; +struct nvgpu_boosted_ctx_args { +#define NVGPU_BOOSTED_CTX_MODE_NORMAL (0U) +#define NVGPU_BOOSTED_CTX_MODE_BOOSTED_EXECUTION (1U) + __u32 boost; + __u32 padding; +}; + #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ @@ -1585,9 +1592,11 @@ struct nvgpu_preemption_mode_args { _IOW(NVGPU_IOCTL_MAGIC, 122, struct nvgpu_preemption_mode_args) #define NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX \ _IOW(NVGPU_IOCTL_MAGIC, 123, struct nvgpu_alloc_gpfifo_ex_args) +#define NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX \ + _IOW(NVGPU_IOCTL_MAGIC, 124, struct nvgpu_boosted_ctx_args) #define NVGPU_IOCTL_CHANNEL_LAST \ - _IOC_NR(NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX) + _IOC_NR(NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX) #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) /* -- cgit v1.2.2