From ae809fddbe90bcec0d48e1213fa36cc5ba76550d Mon Sep 17 00:00:00 2001 From: Vaikundanathan S Date: Tue, 28 Aug 2018 11:58:25 +0530 Subject: gpu:nvgpu: Add GV10x perf event In case of VFE update, schedule work to set P0 clocks. Added function nvgpu_clk_set_fll_clk_gv10x to update P0 clocks on perf event. Fixed MISRA issues caused by this excluding external functions and MACROs Bug 2331655 Change-Id: Id96c473092ee7f0b651413aefdd4b6f2f59e0b12 Signed-off-by: Vaikundanathan S Reviewed-on: https://git-master.nvidia.com/r/1808014 Reviewed-on: https://git-master.nvidia.com/r/1813881 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.sources | 3 +- drivers/gpu/nvgpu/clk/clk.c | 37 ++++++++++ drivers/gpu/nvgpu/clk/clk.h | 1 + drivers/gpu/nvgpu/common/pmu/pmu_fw.c | 4 -- drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 3 + drivers/gpu/nvgpu/gv100/perf_gv100.c | 120 ++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gv100/perf_gv100.h | 36 ++++++++++ drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 +- drivers/gpu/nvgpu/perf/perf.c | 15 ---- drivers/gpu/nvgpu/perf/perf.h | 8 ++- drivers/gpu/nvgpu/pstate/pstate.c | 2 +- 13 files changed, 211 insertions(+), 23 deletions(-) create mode 100644 drivers/gpu/nvgpu/gv100/perf_gv100.c create mode 100644 drivers/gpu/nvgpu/gv100/perf_gv100.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 926ce056..3d7671d0 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -333,6 +333,7 @@ nvgpu-y += \ gv100/nvlink_gv100.o \ gv100/hal_gv100.o \ gv100/pmu_gv100.o \ + gv100/perf_gv100.o \ pstate/pstate.o \ clk/clk_vin.o \ clk/clk_fll.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index a26f827d..5c15ebd0 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -215,4 +215,5 @@ srcs := os/posix/nvgpu.c \ gv100/flcn_gv100.c \ gv100/nvlink_gv100.c \ gv100/hal_gv100.c \ - gv100/pmu_gv100.c + gv100/pmu_gv100.c \ + gv100/perf_gv100.c diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c index f0624595..8532a08b 100644 --- a/drivers/gpu/nvgpu/clk/clk.c +++ b/drivers/gpu/nvgpu/clk/clk.c @@ -866,6 +866,43 @@ u32 nvgpu_clk_set_boot_fll_clk_gv10x(struct gk20a *g) return status; } +int nvgpu_clk_set_fll_clk_gv10x(struct gk20a *g) +{ + int status; + struct change_fll_clk bootfllclk; + u16 gpcclk_clkmhz = BOOT_GPCCLK_MHZ; + u32 gpcclk_voltuv = 0U; + u32 voltuv = 0U; + + status = clk_vf_point_cache(g); + if (status != 0) { + nvgpu_err(g, "caching failed"); + return status; + } + + status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPCCLK, + &gpcclk_clkmhz, &gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); + if (status != 0) { + return status; + } + + voltuv = gpcclk_voltuv; + + status = volt_set_voltage(g, voltuv, 0U); + if (status != 0) { + nvgpu_err(g, "attempt to set max voltage failed %d", voltuv); + } + + bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPCCLK; + bootfllclk.clkmhz = gpcclk_clkmhz; + bootfllclk.voltuv = voltuv; + status = clk_program_fllclks(g, &bootfllclk); + if (status != 0) { + nvgpu_err(g, "attempt to set max gpcclk failed"); + } + return status; +} + u32 clk_domain_get_f_or_v( struct gk20a *g, u32 clkapidomain, diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h index 5a6aeae3..3f4bdf73 100644 --- a/drivers/gpu/nvgpu/clk/clk.h +++ b/drivers/gpu/nvgpu/clk/clk.h @@ -138,6 +138,7 @@ u32 nvgpu_clk_vf_change_inject_data_fill_gp10x(struct gk20a *g, struct nv_pmu_clk_rpc *rpccall, struct set_fll_clk *setfllclk); u32 nvgpu_clk_set_boot_fll_clk_gv10x(struct gk20a *g); +int nvgpu_clk_set_fll_clk_gv10x(struct gk20a *g); int clk_pmu_freq_effective_avg_load(struct gk20a *g, bool bload); u32 clk_freq_effective_avg(struct gk20a *g, u32 clkDomainMask); #endif /* NVGPU_CLK_H */ diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c index a94453fb..c2d6a921 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c @@ -1325,8 +1325,6 @@ static int nvgpu_init_pmu_fw_ver_ops(struct nvgpu_pmu *pmu) clk_avfs_get_vin_cal_fuse_v20; g->ops.pmu_ver.clk.clk_vf_change_inject_data_fill = nvgpu_clk_vf_change_inject_data_fill_gv10x; - g->ops.pmu_ver.clk.perf_pmu_vfe_load = - perf_pmu_vfe_load_gv10x; g->ops.pmu_ver.clk.clk_set_boot_clk = nvgpu_clk_set_boot_fll_clk_gv10x; } else { @@ -1500,8 +1498,6 @@ static int nvgpu_init_pmu_fw_ver_ops(struct nvgpu_pmu *pmu) clk_avfs_get_vin_cal_fuse_v10; g->ops.pmu_ver.clk.clk_vf_change_inject_data_fill = nvgpu_clk_vf_change_inject_data_fill_gp10x; - g->ops.pmu_ver.clk.perf_pmu_vfe_load = - perf_pmu_vfe_load; break; case APP_VERSION_GM20B: g->ops.pmu_ver.pg_cmd_eng_buf_load_size = diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index d3804f0e..063fbbe3 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -676,6 +676,7 @@ static const struct gpu_ops gp106_ops = { .mclk_init = gp106_mclk_init, .mclk_change = gp106_mclk_change, .mclk_deinit = gp106_mclk_deinit, + .perf_pmu_vfe_load = perf_pmu_vfe_load, }, .clk_arb = { .get_arbiter_clk_domains = gp106_get_arbiter_clk_domains, @@ -844,6 +845,7 @@ int gp106_init_hal(struct gk20a *g) gops->clk.mclk_change = gp106_ops.clk.mclk_change; gops->clk.mclk_deinit = gp106_ops.clk.mclk_deinit; gops->clk.clk_domain_get_f_points = gp106_ops.clk.clk_domain_get_f_points; + gops->clk.perf_pmu_vfe_load = gp106_ops.clk.perf_pmu_vfe_load; gops->clk_arb = gp106_ops.clk_arb; gops->regops = gp106_ops.regops; diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index ee6dd436..0c64ce58 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -107,6 +107,7 @@ #include "gv100/pmu_gv100.h" #include "gv100/nvlink_gv100.h" #include "gv100/regops_gv100.h" +#include "gv100/perf_gv100.h" #include #include @@ -770,6 +771,7 @@ static const struct gpu_ops gv100_ops = { .get_rate_cntr = gp106_get_rate_cntr, .measure_freq = gp106_clk_measure_freq, .suspend_clk_support = gp106_suspend_clk_support, + .perf_pmu_vfe_load = gv100_perf_pmu_vfe_load, }, .clk_arb = { .get_arbiter_clk_domains = gp106_get_arbiter_clk_domains, @@ -981,6 +983,7 @@ int gv100_init_hal(struct gk20a *g) gops->clk.get_crystal_clk_hz = gv100_ops.clk.get_crystal_clk_hz; gops->clk.measure_freq = gv100_ops.clk.measure_freq; gops->clk.suspend_clk_support = gv100_ops.clk.suspend_clk_support; + gops->clk.perf_pmu_vfe_load = gv100_ops.clk.perf_pmu_vfe_load; /* Lone functions */ gops->chip_init_gpu_characteristics = diff --git a/drivers/gpu/nvgpu/gv100/perf_gv100.c b/drivers/gpu/nvgpu/gv100/perf_gv100.c new file mode 100644 index 00000000..f3ba78ba --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/perf_gv100.c @@ -0,0 +1,120 @@ +/* + * GV100 PERF + * + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "gk20a/gk20a.h" +#include "gv100/perf_gv100.h" + +static int pmu_set_boot_clk_runcb_fn(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct nvgpu_pmu *pmu = &g->pmu; + struct nv_pmu_rpc_struct_perf_load rpc; + struct perf_pmupstate *perf_pmu = &g->perf_pmu; + struct nvgpu_vfe_invalidate *vfe_init = &perf_pmu->vfe_init; + int status = 0; + + nvgpu_log_fn(g, "thread start"); + + while (true) { + NVGPU_COND_WAIT_INTERRUPTIBLE(&vfe_init->wq, + (vfe_init->state_change == true), 0); + + vfe_init->state_change = false; + + memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perf_load)); + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, VFE_INVALIDATE, &rpc, 0); + if (status != 0) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + } + status = nvgpu_clk_set_fll_clk_gv10x(g); + } + + return 0; +} + +static int gv100_pmu_handle_perf_event(struct gk20a *g, void *pmumsg) +{ + struct nv_pmu_perf_msg *msg = (struct nv_pmu_perf_msg *)pmumsg; + struct perf_pmupstate *perf_pmu = &g->perf_pmu; + + nvgpu_log_fn(g, " "); + switch (msg->msg_type) { + case NV_PMU_PERF_MSG_ID_VFE_CALLBACK: + perf_pmu->vfe_init.state_change = true; + nvgpu_cond_signal(&perf_pmu->vfe_init.wq); + break; + default: + WARN_ON(1); + break; + } + return 0; +} + +u32 perf_pmu_init_vfe_perf_event(struct gk20a *g) +{ + struct perf_pmupstate *perf_pmu = &g->perf_pmu; + char thread_name[64]; + u32 err = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_cond_init(&perf_pmu->vfe_init.wq); + + snprintf(thread_name, sizeof(thread_name), + "nvgpu_vfe_invalidate_init_%s", g->name); + + err = nvgpu_thread_create(&perf_pmu->vfe_init.state_task, g, + pmu_set_boot_clk_runcb_fn, thread_name); + if (err != 0U) { + nvgpu_err(g, "failed to start nvgpu_vfe_invalidate_init thread"); + } + + return err; + +} + +u32 gv100_perf_pmu_vfe_load(struct gk20a *g) +{ + struct nvgpu_pmu *pmu = &g->pmu; + struct nv_pmu_rpc_struct_perf_load rpc; + u32 status = 0; + + memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perf_load)); + PMU_RPC_EXECUTE_CPB(status, pmu, PERF, VFE_INVALIDATE, &rpc, 0); + if (status != 0U) { + nvgpu_err(g, "Failed to execute RPC status=0x%x", + status); + } + + perf_pmu_init_vfe_perf_event(g); + + /*register call back for future VFE updates*/ + g->ops.perf.handle_pmu_perf_event = gv100_pmu_handle_perf_event; + + return status; +} diff --git a/drivers/gpu/nvgpu/gv100/perf_gv100.h b/drivers/gpu/nvgpu/gv100/perf_gv100.h new file mode 100644 index 00000000..e128c06a --- /dev/null +++ b/drivers/gpu/nvgpu/gv100/perf_gv100.h @@ -0,0 +1,36 @@ +/* + * GV100 PERF + * + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __PERF_GV100_H_ +#define __PERF_GV100_H_ + +#include + +struct gk20a; + +u32 perf_pmu_init_vfe_perf_event(struct gk20a *g); +u32 gv100_perf_pmu_vfe_load(struct gk20a *g); + + +#endif /*__PERF_GV100_H_*/ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 5f875707..e2a0cbf7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -878,7 +878,6 @@ struct gpu_ops { u32 (*clk_vf_change_inject_data_fill)(struct gk20a *g, struct nv_pmu_clk_rpc *rpccall, struct set_fll_clk *setfllclk); - u32 (*perf_pmu_vfe_load)(struct gk20a *g); u32 (*clk_set_boot_clk)(struct gk20a *g); }clk; } pmu_ver; @@ -1113,6 +1112,7 @@ struct gpu_ops { bool support_clk_freq_controller; bool support_pmgr_domain; bool support_lpwr_pg; + u32 (*perf_pmu_vfe_load)(struct gk20a *g); } clk; struct { int (*arbiter_clk_init)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/perf/perf.c b/drivers/gpu/nvgpu/perf/perf.c index daed6f54..582b2577 100644 --- a/drivers/gpu/nvgpu/perf/perf.c +++ b/drivers/gpu/nvgpu/perf/perf.c @@ -67,21 +67,6 @@ static int pmu_handle_perf_event(struct gk20a *g, void *pmu_msg) return 0; } -u32 perf_pmu_vfe_load_gv10x(struct gk20a *g) -{ - struct nvgpu_pmu *pmu = &g->pmu; - struct nv_pmu_rpc_struct_perf_load rpc; - u32 status = 0; - - memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perf_load)); - PMU_RPC_EXECUTE_CPB(status, pmu, PERF, VFE_INVALIDATE, &rpc, 0); - if (status) { - nvgpu_err(g, "Failed to execute RPC status=0x%x", - status); - } - return status; -} - u32 perf_pmu_vfe_load(struct gk20a *g) { struct pmu_cmd cmd; diff --git a/drivers/gpu/nvgpu/perf/perf.h b/drivers/gpu/nvgpu/perf/perf.h index 180efb2c..17a9b0e1 100644 --- a/drivers/gpu/nvgpu/perf/perf.h +++ b/drivers/gpu/nvgpu/perf/perf.h @@ -65,15 +65,21 @@ struct gk20a; +struct nvgpu_vfe_invalidate { + bool state_change; + struct nvgpu_cond wq; + struct nvgpu_thread state_task; +}; + struct perf_pmupstate { struct vfe_vars vfe_varobjs; struct vfe_equs vfe_equobjs; struct pstates pstatesobjs; struct obj_volt volt; struct obj_lwpr lpwr; + struct nvgpu_vfe_invalidate vfe_init; }; u32 perf_pmu_vfe_load(struct gk20a *g); -u32 perf_pmu_vfe_load_gv10x(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c index 80f3edb6..0e7404cb 100644 --- a/drivers/gpu/nvgpu/pstate/pstate.c +++ b/drivers/gpu/nvgpu/pstate/pstate.c @@ -222,7 +222,7 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g) return err; } - err = g->ops.pmu_ver.clk.perf_pmu_vfe_load(g); + err = g->ops.clk.perf_pmu_vfe_load(g); if (err) { return err; } -- cgit v1.2.2