From 78e3d22da3c2513d425c8c2560468ce854a982dd Mon Sep 17 00:00:00 2001 From: Aparna Das Date: Tue, 11 Sep 2018 17:11:44 -0700 Subject: gpu: nvgpu: vgpu: support clk-arb 1. Implement the following vgpu functions to support clk-arb: - vgpu_clk_get_range() to return min and max freqs from supported frequencies - implement vgpu_clk_get_round_rate() which sets rounded rate to input rate. Rounding is handled in RM Server - modify vgpu_clk_get_freqs() to retrieve freq table in IVM memory instead of copying the value in array as part of cmd message. 2. Add support for clk-arb related HALs for vgpu. 3. support_clk_freq_controller is assigned true for vgpu provided guest VM has the privilege to set clock frequency. Bug 200422845 Bug 2363882 Jira EVLR-3254 Change-Id: I91fc392db381c5db1d52b19d45ec0481fdc27554 Signed-off-by: Aparna Das Reviewed-on: https://git-master.nvidia.com/r/1812379 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h | 2 +- drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h | 4 + drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c | 14 ++ drivers/gpu/nvgpu/vgpu/clk_vgpu.c | 174 ++++++++++++++++++++-- drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 16 ++ drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 16 ++ drivers/gpu/nvgpu/vgpu/vgpu.c | 8 + 7 files changed, 218 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h index f7a58c87..5ee50b18 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h @@ -519,6 +519,7 @@ struct tegra_vgpu_constants_params { u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT]; u32 hwpm_ctx_size; u8 force_preempt_mode; + u8 can_set_clkrate; u32 default_timeslice_us; u32 preempt_ctx_size; u32 channel_base; @@ -578,7 +579,6 @@ struct tegra_vgpu_perfbuf_mgt_params { struct tegra_vgpu_get_gpu_freq_table_params { u32 num_freqs; - u32 freqs[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE]; /* in kHz */ }; struct tegra_vgpu_vsms_mapping_params { diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h index 762bc229..15ab879e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h @@ -26,6 +26,7 @@ #include #include #include +#include #include struct device; @@ -45,6 +46,9 @@ struct vgpu_priv_data { struct tegra_vgpu_constants_params constants; struct vgpu_ecc_stat *ecc_stats; int ecc_stats_count; + u32 num_freqs; + unsigned long *freqs; + struct nvgpu_mutex vgpu_clk_get_freq_lock; }; struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c index e01178ed..aa2fa998 100644 --- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "vgpu_linux.h" #include "vgpu/fecs_trace_vgpu.h" @@ -72,12 +73,19 @@ static void vgpu_remove_support(struct gk20a *g) static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) { struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); nvgpu_mutex_init(&g->power_lock); nvgpu_mutex_init(&g->ctxsw_disable_lock); + nvgpu_mutex_init(&g->clk_arb_enable_lock); + + nvgpu_mutex_init(&priv->vgpu_clk_get_freq_lock); + l->regs_saved = l->regs; l->bar1_saved = l->bar1; + nvgpu_atomic_set(&g->clk_arb_global_nr, 0); + g->aggressive_sync_destroy = platform->aggressive_sync_destroy; g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh; __nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints); @@ -206,6 +214,12 @@ int vgpu_pm_finalize_poweron(struct device *dev) goto done; } + err = nvgpu_clk_arb_init_arbiter(g); + if (err) { + nvgpu_err(g, "failed to init clk arb"); + goto done; + } + err = g->ops.chip_init_gpu_characteristics(g); if (err) { nvgpu_err(g, "failed to init gk20a gpu characteristics"); diff --git a/drivers/gpu/nvgpu/vgpu/clk_vgpu.c b/drivers/gpu/nvgpu/vgpu/clk_vgpu.c index efcb4fb0..6aea7c8f 100644 --- a/drivers/gpu/nvgpu/vgpu/clk_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/clk_vgpu.c @@ -21,14 +21,12 @@ */ #include +#include #include "gk20a/gk20a.h" #include "clk_vgpu.h" #include "ctrl/ctrlclk.h" -static unsigned long -vgpu_freq_table[TEGRA_VGPU_GPU_FREQ_TABLE_SIZE]; - static unsigned long vgpu_clk_get_rate(struct gk20a *g, u32 api_domain) { struct tegra_vgpu_cmd_msg msg = {}; @@ -95,9 +93,117 @@ static int vgpu_clk_set_rate(struct gk20a *g, static unsigned long vgpu_clk_get_maxrate(struct gk20a *g, u32 api_domain) { - struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + unsigned long *freqs; + int num_freqs = 0; + int err; + unsigned long ret = 0; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + err = vgpu_clk_get_freqs(g, &freqs, &num_freqs); + if (err == 0) { + /* return freq in Hz */ + ret = freqs[num_freqs - 1]; + } + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return ret; +} + +static int vgpu_clk_get_round_rate(struct gk20a *g, u32 api_domain, + unsigned long rate_target, unsigned long *rounded_rate) +{ + int err = -EINVAL; + + nvgpu_log_fn(g, " "); - return priv->constants.max_freq; + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + *rounded_rate = rate_target; + err = 0; + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; +} + +static int vgpu_clk_get_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + unsigned long *freqs; + int num_freqs = 0; + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + err = vgpu_clk_get_freqs(g, &freqs, &num_freqs); + if (err == 0) { + /* return freq in MHz */ + *min_mhz = (u16)(freqs[0] / 1000000); + *max_mhz = (u16)(freqs[num_freqs - 1] / 1000000); + } + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; +} + +static int vgpu_clk_get_f_points(struct gk20a *g, + u32 api_domain, u32 *num_points, u16 *freqs_mhz) +{ + unsigned long *freqs; + int num_freqs = 0; + u32 i; + int err = -EINVAL; + + nvgpu_log_fn(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPCCLK: + err = vgpu_clk_get_freqs(g, &freqs, &num_freqs); + if (err) { + return err; + } + + if (num_points == NULL) { + return -EINVAL; + } + + if (*num_points != 0U) { + if (freqs == NULL || (*num_points > (u32)num_freqs)) { + return -EINVAL; + } + } + + if (*num_points == 0) { + *num_points = num_freqs; + } else { + for (i = 0; i < *num_points; i++) { + /* return freq in MHz */ + freqs_mhz[i] = (u16)(freqs[i] / 1000000); + } + } + break; + default: + nvgpu_err(g, "unknown clock: %u", api_domain); + break; + } + + return err; } void vgpu_init_clk_support(struct gk20a *g) @@ -105,37 +211,75 @@ void vgpu_init_clk_support(struct gk20a *g) g->ops.clk.get_rate = vgpu_clk_get_rate; g->ops.clk.set_rate = vgpu_clk_set_rate; g->ops.clk.get_maxrate = vgpu_clk_get_maxrate; + g->ops.clk.clk_get_round_rate = vgpu_clk_get_round_rate; + g->ops.clk.get_clk_range = vgpu_clk_get_range; + g->ops.clk.clk_domain_get_f_points = vgpu_clk_get_f_points; } -int vgpu_clk_get_freqs(struct gk20a *g, unsigned long **freqs, int *num_freqs) +int vgpu_clk_get_freqs(struct gk20a *g, unsigned long **freqs_out, + int *num_freqs) { struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_get_gpu_freq_table_params *p = &msg.params.get_gpu_freq_table; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + u32 *freqs; + int err = 0; + void *handle = NULL; + size_t oob_size; unsigned int i; - int err; nvgpu_log_fn(g, " "); + nvgpu_mutex_acquire(&priv->vgpu_clk_get_freq_lock); + + if (priv->freqs != NULL) { + goto done; + } + msg.cmd = TEGRA_VGPU_CMD_GET_GPU_FREQ_TABLE; msg.handle = vgpu_get_handle(g); - p->num_freqs = TEGRA_VGPU_GPU_FREQ_TABLE_SIZE; err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = err ? err : msg.ret; if (err) { nvgpu_err(g, "%s failed - %d", __func__, err); - return err; + goto done; } - /* return frequency in Hz */ - for (i = 0; i < p->num_freqs; i++) - vgpu_freq_table[i] = p->freqs[i] * 1000; + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, (void **)&freqs, &oob_size); + if (!handle) { + nvgpu_err(g, "failed to get ivm handle"); + err = -EINVAL; + goto done; + } - *freqs = vgpu_freq_table; - *num_freqs = p->num_freqs; + priv->freqs = nvgpu_kzalloc(g, sizeof(*priv->freqs) * (p->num_freqs)); + if (!priv->freqs) { + nvgpu_err(g, "failed to allocate memory"); + vgpu_ivc_oob_put_ptr(handle); + err = -ENOMEM; + goto done; + } + priv->num_freqs = p->num_freqs; - return 0; + for (i = 0; i < priv->num_freqs; i++) { + /* store frequency in Hz */ + priv->freqs[i] = (unsigned long)(freqs[i] * 1000); + } + + vgpu_ivc_oob_put_ptr(handle); + +done: + if (err == 0) { + *num_freqs = priv->num_freqs; + *freqs_out = priv->freqs; + } + + nvgpu_mutex_release(&priv->vgpu_clk_get_freq_lock); + + return err; } int vgpu_clk_cap_rate(struct gk20a *g, unsigned long rate) diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 78aef699..2b4b3463 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -59,6 +59,7 @@ #include "gp10b/gr_ctx_gp10b.h" #include "gp10b/fifo_gp10b.h" #include "gp10b/regops_gp10b.h" +#include "gp10b/clk_arb_gp10b.h" #include "gm20b/gr_gm20b.h" #include "gm20b/fifo_gm20b.h" @@ -450,6 +451,15 @@ static const struct gpu_ops vgpu_gp10b_ops = { .reset_engine = NULL, .is_engine_in_reset = NULL, }, + .clk_arb = { + .get_arbiter_clk_domains = gp10b_get_arbiter_clk_domains, + .get_arbiter_f_points = gp10b_get_arbiter_f_points, + .get_arbiter_clk_range = gp10b_get_arbiter_clk_range, + .get_arbiter_clk_default = gp10b_get_arbiter_clk_default, + .arbiter_clk_init = gp10b_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gp10b_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gp10b_clk_arb_cleanup, + }, .regops = { .exec_regops = vgpu_exec_regops, .get_global_whitelist_ranges = @@ -558,6 +568,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { int vgpu_gp10b_init_hal(struct gk20a *g) { struct gpu_ops *gops = &g->ops; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); gops->ltc = vgpu_gp10b_ops.ltc; gops->ce2 = vgpu_gp10b_ops.ce2; @@ -573,6 +584,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g) gops->pramin = vgpu_gp10b_ops.pramin; gops->therm = vgpu_gp10b_ops.therm; gops->pmu = vgpu_gp10b_ops.pmu; + gops->clk_arb = vgpu_gp10b_ops.clk_arb; gops->regops = vgpu_gp10b_ops.regops; gops->mc = vgpu_gp10b_ops.mc; gops->debug = vgpu_gp10b_ops.debug; @@ -642,6 +654,10 @@ int vgpu_gp10b_init_hal(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); g->pmu_lsf_pmu_wpr_init_done = 0; + if (priv->constants.can_set_clkrate) { + gops->clk.support_clk_freq_controller = true; + } + g->name = "gp10b"; return 0; diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 1fc1b0f2..c0e1b1bb 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -68,6 +68,7 @@ #include #include "gp10b/gr_gp10b.h" #include +#include "gp10b/clk_arb_gp10b.h" #include #include @@ -522,6 +523,15 @@ static const struct gpu_ops vgpu_gv11b_ops = { .pmu_pg_set_sub_feature_mask = NULL, .is_pmu_supported = NULL, }, + .clk_arb = { + .get_arbiter_clk_domains = gp10b_get_arbiter_clk_domains, + .get_arbiter_f_points = gp10b_get_arbiter_f_points, + .get_arbiter_clk_range = gp10b_get_arbiter_clk_range, + .get_arbiter_clk_default = gp10b_get_arbiter_clk_default, + .arbiter_clk_init = gp10b_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gp10b_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gp10b_clk_arb_cleanup, + }, .regops = { .exec_regops = vgpu_exec_regops, .get_global_whitelist_ranges = @@ -627,6 +637,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { int vgpu_gv11b_init_hal(struct gk20a *g) { struct gpu_ops *gops = &g->ops; + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); gops->ltc = vgpu_gv11b_ops.ltc; gops->ce2 = vgpu_gv11b_ops.ce2; @@ -641,6 +652,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g) #endif gops->therm = vgpu_gv11b_ops.therm; gops->pmu = vgpu_gv11b_ops.pmu; + gops->clk_arb = vgpu_gv11b_ops.clk_arb; gops->regops = vgpu_gv11b_ops.regops; gops->mc = vgpu_gv11b_ops.mc; gops->debug = vgpu_gv11b_ops.debug; @@ -661,6 +673,10 @@ int vgpu_gv11b_init_hal(struct gk20a *g) gops->get_litter_value = vgpu_gv11b_ops.get_litter_value; gops->semaphore_wakeup = gk20a_channel_semaphore_wakeup; + if (priv->constants.can_set_clkrate) { + gops->clk.support_clk_freq_controller = true; + } + g->name = "gv11b"; return 0; diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 9ad0468e..7d7df9b3 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "fecs_trace_vgpu.h" @@ -235,6 +236,13 @@ void vgpu_remove_support_common(struct gk20a *g) &msg, sizeof(msg)); WARN_ON(err); nvgpu_thread_stop(&priv->intr_handler); + + nvgpu_clk_arb_cleanup_arbiter(g); + + nvgpu_mutex_destroy(&g->clk_arb_enable_lock); + nvgpu_mutex_destroy(&priv->vgpu_clk_get_freq_lock); + + nvgpu_kfree(g, priv->freqs); } void vgpu_detect_chip(struct gk20a *g) -- cgit v1.2.2