summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Boonstoppel <pboonstoppel@nvidia.com>2017-01-10 13:22:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-02-14 17:54:46 -0500
commit907adfd785e3a3582b2649c48bf4c32f41745e3f (patch)
tree5ffa4118738135d4b33e9b6d1de035b87ef8b351
parent4fb3161ba3bb1c8481b1ca426a44aa4ee3ad744e (diff)
gpu: nvgpu: Add NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX
This ioctl can be used on gp10b to set a flag in the context header indicating this context should be run at elevated clock frequency. FECS ctxsw ucode will read this flag as part of the context switch and will request higher GPU clock frequencies from BPMP for the duration of the context execution. Bug 1819874 Change-Id: I84bf580923d95585095716d49cea24e58c9440ed Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com> Reviewed-on: http://git-master/r/1292746 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c18
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gp106/gr_gp106.c3
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c45
-rw-r--r--drivers/gpu/nvgpu/vgpu/gr_vgpu.c2
-rw-r--r--include/uapi/linux/nvgpu.h11
8 files changed, 85 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index b963ad96..3dda1cbf 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -3874,6 +3874,24 @@ long gk20a_channel_ioctl(struct file *filp,
3874 err = -EINVAL; 3874 err = -EINVAL;
3875 } 3875 }
3876 break; 3876 break;
3877 case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX:
3878 if (ch->g->ops.gr.set_boosted_ctx) {
3879 bool boost =
3880 ((struct nvgpu_boosted_ctx_args *)buf)->boost;
3881
3882 err = gk20a_busy(dev);
3883 if (err) {
3884 dev_err(dev,
3885 "%s: failed to host gk20a for ioctl cmd: 0x%x",
3886 __func__, cmd);
3887 break;
3888 }
3889 err = ch->g->ops.gr.set_boosted_ctx(ch, boost);
3890 gk20a_idle(dev);
3891 } else {
3892 err = -EINVAL;
3893 }
3894 break;
3877 default: 3895 default:
3878 dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); 3896 dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
3879 err = -ENOTTY; 3897 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8d6d36fc..7f2383d9 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -319,6 +319,10 @@ struct gpu_ops {
319 struct vm_gk20a *vm, u32 class, 319 struct vm_gk20a *vm, u32 class,
320 u32 graphics_preempt_mode, 320 u32 graphics_preempt_mode,
321 u32 compute_preempt_mode); 321 u32 compute_preempt_mode);
322 int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost);
323 void (*update_boosted_ctx)(struct gk20a *g,
324 struct mem_desc *mem,
325 struct gr_ctx_desc *gr_ctx);
322 int (*fuse_override)(struct gk20a *g); 326 int (*fuse_override)(struct gk20a *g);
323 void (*init_sm_id_table)(struct gk20a *g); 327 void (*init_sm_id_table)(struct gk20a *g);
324 int (*load_smid_config)(struct gk20a *g); 328 int (*load_smid_config)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 0e1c88a4..170bfc7f 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2109,6 +2109,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
2109 if (g->ops.gr.update_ctxsw_preemption_mode) 2109 if (g->ops.gr.update_ctxsw_preemption_mode)
2110 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); 2110 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem);
2111 2111
2112 if (g->ops.gr.update_boosted_ctx)
2113 g->ops.gr.update_boosted_ctx(g, mem, ch_ctx->gr_ctx);
2114
2112 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 2115 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va);
2113 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 2116 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va);
2114 2117
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 19ab2ec0..5a987a82 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -391,6 +391,7 @@ struct gr_ctx_desc {
391 391
392 u32 graphics_preempt_mode; 392 u32 graphics_preempt_mode;
393 u32 compute_preempt_mode; 393 u32 compute_preempt_mode;
394 bool boosted_ctx;
394#ifdef CONFIG_ARCH_TEGRA_18x_SOC 395#ifdef CONFIG_ARCH_TEGRA_18x_SOC
395 struct gr_ctx_desc_t18x t18x; 396 struct gr_ctx_desc_t18x t18x;
396#endif 397#endif
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index 896c3fc6..dae23374 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -243,5 +243,6 @@ void gp106_init_gr(struct gpu_ops *gops)
243 gops->gr.init_preemption_state = NULL; 243 gops->gr.init_preemption_state = NULL;
244 gops->gr.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode; 244 gops->gr.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode;
245 gops->gr.create_gr_sysfs = NULL; 245 gops->gr.create_gr_sysfs = NULL;
246 246 gops->gr.set_boosted_ctx = NULL;
247 gops->gr.update_boosted_ctx = NULL;
247} 248}
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index a7aa4003..e680e753 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -2060,6 +2060,49 @@ clean_up:
2060 return err; 2060 return err;
2061} 2061}
2062 2062
2063static int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
2064 bool boost)
2065{
2066 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx;
2067 struct gk20a *g = ch->g;
2068 struct mem_desc *mem = &gr_ctx->mem;
2069 int err = 0;
2070
2071 gr_ctx->boosted_ctx = boost;
2072
2073 if (gk20a_mem_begin(g, mem))
2074 return -ENOMEM;
2075
2076 err = gk20a_disable_channel_tsg(g, ch);
2077 if (err)
2078 goto unmap_ctx;
2079
2080 err = gk20a_fifo_preempt(g, ch);
2081 if (err)
2082 goto enable_ch;
2083
2084 if (g->ops.gr.update_boosted_ctx)
2085 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
2086 else
2087 err = -ENOSYS;
2088
2089enable_ch:
2090 gk20a_enable_channel_tsg(g, ch);
2091unmap_ctx:
2092 gk20a_mem_end(g, mem);
2093
2094 return err;
2095}
2096
2097static void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct mem_desc *mem,
2098 struct gr_ctx_desc *gr_ctx) {
2099 u32 v;
2100
2101 v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(
2102 gr_ctx->boosted_ctx);
2103 gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v);
2104}
2105
2063static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, 2106static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2064 u32 graphics_preempt_mode, 2107 u32 graphics_preempt_mode,
2065 u32 compute_preempt_mode) 2108 u32 compute_preempt_mode)
@@ -2271,4 +2314,6 @@ void gp10b_init_gr(struct gpu_ops *gops)
2271 gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags; 2314 gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
2272 gops->gr.fuse_override = gp10b_gr_fuse_override; 2315 gops->gr.fuse_override = gp10b_gr_fuse_override;
2273 gops->gr.load_smid_config = gr_gp10b_load_smid_config; 2316 gops->gr.load_smid_config = gr_gp10b_load_smid_config;
2317 gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
2318 gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
2274} 2319}
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 6cafc49f..36bbee00 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -1134,4 +1134,6 @@ void vgpu_init_gr_ops(struct gpu_ops *gops)
1134 gops->gr.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode; 1134 gops->gr.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode;
1135 gops->gr.clear_sm_error_state = vgpu_gr_clear_sm_error_state; 1135 gops->gr.clear_sm_error_state = vgpu_gr_clear_sm_error_state;
1136 gops->gr.dump_gr_regs = NULL; 1136 gops->gr.dump_gr_regs = NULL;
1137 gops->gr.set_boosted_ctx = NULL;
1138 gops->gr.update_boosted_ctx = NULL;
1137} 1139}
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 5a9a1d25..1853f667 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1537,6 +1537,13 @@ struct nvgpu_preemption_mode_args {
1537 __u32 compute_preempt_mode; /* in */ 1537 __u32 compute_preempt_mode; /* in */
1538}; 1538};
1539 1539
1540struct nvgpu_boosted_ctx_args {
1541#define NVGPU_BOOSTED_CTX_MODE_NORMAL (0U)
1542#define NVGPU_BOOSTED_CTX_MODE_BOOSTED_EXECUTION (1U)
1543 __u32 boost;
1544 __u32 padding;
1545};
1546
1540#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ 1547#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \
1541 _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) 1548 _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
1542#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ 1549#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \
@@ -1585,9 +1592,11 @@ struct nvgpu_preemption_mode_args {
1585 _IOW(NVGPU_IOCTL_MAGIC, 122, struct nvgpu_preemption_mode_args) 1592 _IOW(NVGPU_IOCTL_MAGIC, 122, struct nvgpu_preemption_mode_args)
1586#define NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX \ 1593#define NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX \
1587 _IOW(NVGPU_IOCTL_MAGIC, 123, struct nvgpu_alloc_gpfifo_ex_args) 1594 _IOW(NVGPU_IOCTL_MAGIC, 123, struct nvgpu_alloc_gpfifo_ex_args)
1595#define NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX \
1596 _IOW(NVGPU_IOCTL_MAGIC, 124, struct nvgpu_boosted_ctx_args)
1588 1597
1589#define NVGPU_IOCTL_CHANNEL_LAST \ 1598#define NVGPU_IOCTL_CHANNEL_LAST \
1590 _IOC_NR(NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX) 1599 _IOC_NR(NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX)
1591#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) 1600#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)
1592 1601
1593/* 1602/*