From 01e6fac4d61fdd7fff5433942ec93fc2ea1e4df1 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Wed, 28 Jun 2023 18:24:25 -0400 Subject: Include nvgpu headers These are needed to build on NVIDIA's Jetson boards for the time being. Only a couple structs are required, so it should be fairly easy to remove this dependency at some point in the future. --- include/os/linux/platform_gp10b_tegra.c | 510 ++++++++++++++++++++++++++++++++ 1 file changed, 510 insertions(+) create mode 100644 include/os/linux/platform_gp10b_tegra.c (limited to 'include/os/linux/platform_gp10b_tegra.c') diff --git a/include/os/linux/platform_gp10b_tegra.c b/include/os/linux/platform_gp10b_tegra.c new file mode 100644 index 0000000..9bf8d63 --- /dev/null +++ b/include/os/linux/platform_gp10b_tegra.c @@ -0,0 +1,510 @@ +/* + * GP10B Tegra Platform Interface + * + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include "os_linux.h" + +#include "clk.h" + +#include "platform_gk20a.h" +#include "platform_gk20a_tegra.h" +#include "platform_gp10b.h" +#include "platform_gp10b_tegra.h" +#include "scale.h" + +/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ +#define GP10B_FREQ_SELECT_STEP 8 +/* Allow limited set of frequencies to be available */ +#define GP10B_NUM_SUPPORTED_FREQS 15 +/* Max number of freq supported in h/w */ +#define GP10B_MAX_SUPPORTED_FREQS 120 +static unsigned long +gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP]; + +static bool freq_table_init_complete; +static int num_supported_freq; + +#define TEGRA_GP10B_BW_PER_FREQ 64 +#define TEGRA_DDR4_BW_PER_FREQ 16 + +#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ) + +#define GPCCLK_INIT_RATE 1000000000 + +static struct { + char *name; + unsigned long default_rate; +} tegra_gp10b_clocks[] = { + {"gpu", GPCCLK_INIT_RATE}, + {"gpu_sys", 204000000} }; + +/* + * gp10b_tegra_get_clocks() + * + * This function finds clocks in tegra platform and populates + * the clock information to gp10b platform data. + */ + +int gp10b_tegra_get_clocks(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + unsigned int i; + + platform->num_clks = 0; + for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) { + long rate = tegra_gp10b_clocks[i].default_rate; + struct clk *c; + + c = clk_get(dev, tegra_gp10b_clocks[i].name); + if (IS_ERR(c)) { + nvgpu_err(platform->g, "cannot get clock %s", + tegra_gp10b_clocks[i].name); + } else { + clk_set_rate(c, rate); + platform->clk[i] = c; + } + } + platform->num_clks = i; + + if (platform->clk[0]) { + i = tegra_bpmp_dvfs_get_clk_id(dev->of_node, + tegra_gp10b_clocks[0].name); + if (i > 0) + platform->maxmin_clk_id = i; + } + + return 0; +} + +void gp10b_tegra_scale_init(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct tegra_bwmgr_client *bwmgr_handle; + + if (!profile) + return; + + if ((struct tegra_bwmgr_client *)profile->private_data) + return; + + bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU); + if (!bwmgr_handle) + return; + + profile->private_data = (void *)bwmgr_handle; +} + +static void gp10b_tegra_scale_exit(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + if (profile && profile->private_data) + tegra_bwmgr_unregister( + (struct tegra_bwmgr_client *)profile->private_data); +} + +static int gp10b_tegra_probe(struct device *dev) +{ + struct gk20a_platform *platform = dev_get_drvdata(dev); + bool joint_xpu_rail = false; + struct gk20a *g = platform->g; +#ifdef CONFIG_TEGRA_GK20A_NVHOST + int ret; + + ret = nvgpu_get_nvhost_dev(platform->g); + if (ret) + return ret; +#endif + + ret = gk20a_tegra_init_secure_alloc(platform); + if (ret) + return ret; + + platform->disable_bigpage = !device_is_iommuable(dev); + + platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + = false; + platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + = false; + + platform->g->gr.ctx_vars.force_preemption_gfxp = false; + platform->g->gr.ctx_vars.force_preemption_cilp = false; + +#ifdef CONFIG_OF + joint_xpu_rail = of_property_read_bool(of_chosen, + "nvidia,tegra-joint_xpu_rail"); +#endif + + if (joint_xpu_rail) { + nvgpu_log_info(g, "XPU rails are joint\n"); + platform->can_railgate_init = false; + __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, false); + } + + gp10b_tegra_get_clocks(dev); + nvgpu_linux_init_clk_support(platform->g); + + nvgpu_mutex_init(&platform->clk_get_freq_lock); + + platform->g->ops.clk.support_clk_freq_controller = true; + + return 0; +} + +static int gp10b_tegra_late_probe(struct device *dev) +{ + return 0; +} + +static int gp10b_tegra_remove(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + + /* deinitialise tegra specific scaling quirks */ + gp10b_tegra_scale_exit(dev); + +#ifdef CONFIG_TEGRA_GK20A_NVHOST + nvgpu_free_nvhost_dev(get_gk20a(dev)); +#endif + + nvgpu_mutex_destroy(&platform->clk_get_freq_lock); + + return 0; +} + +static bool gp10b_tegra_is_railgated(struct device *dev) +{ + bool ret = false; + + if (tegra_bpmp_running()) + ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU); + + return ret; +} + +static int gp10b_tegra_railgate(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + /* remove emc frequency floor */ + if (profile) + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + 0, TEGRA_BWMGR_SET_EMC_FLOOR); + + if (tegra_bpmp_running() && + tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) { + int i; + for (i = 0; i < platform->num_clks; i++) { + if (platform->clk[i]) + clk_disable_unprepare(platform->clk[i]); + } + tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU); + } + return 0; +} + +static int gp10b_tegra_unrailgate(struct device *dev) +{ + int ret = 0; + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + + if (tegra_bpmp_running()) { + int i; + ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU); + for (i = 0; i < platform->num_clks; i++) { + if (platform->clk[i]) + clk_prepare_enable(platform->clk[i]); + } + } + + /* to start with set emc frequency floor to max rate*/ + if (profile) + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + tegra_bwmgr_get_max_emc_rate(), + TEGRA_BWMGR_SET_EMC_FLOOR); + return ret; +} + +static int gp10b_tegra_suspend(struct device *dev) +{ + return 0; +} + +int gp10b_tegra_reset_assert(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + int ret = 0; + + if (!platform->reset_control) + return -EINVAL; + + ret = reset_control_assert(platform->reset_control); + + return ret; +} + +int gp10b_tegra_reset_deassert(struct device *dev) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + int ret = 0; + + if (!platform->reset_control) + return -EINVAL; + + ret = reset_control_deassert(platform->reset_control); + + return ret; +} + +void gp10b_tegra_prescale(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + u32 avg = 0; + + nvgpu_log_fn(g, " "); + + nvgpu_pmu_load_norm(g, &avg); + + nvgpu_log_fn(g, "done"); +} + +void gp10b_tegra_postscale(struct device *pdev, + unsigned long freq) +{ + struct gk20a_platform *platform = gk20a_get_platform(pdev); + struct gk20a_scale_profile *profile = platform->g->scale_profile; + struct gk20a *g = get_gk20a(pdev); + unsigned long emc_rate; + + nvgpu_log_fn(g, " "); + if (profile && profile->private_data && + !platform->is_railgated(pdev)) { + unsigned long emc_scale; + + if (freq <= gp10b_freq_table[0]) + emc_scale = 0; + else + emc_scale = g->emc3d_ratio; + + emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000; + + if (emc_rate > tegra_bwmgr_get_max_emc_rate()) + emc_rate = tegra_bwmgr_get_max_emc_rate(); + + tegra_bwmgr_set_emc( + (struct tegra_bwmgr_client *)profile->private_data, + emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR); + } + nvgpu_log_fn(g, "done"); +} + +long gp10b_round_clk_rate(struct device *dev, unsigned long rate) +{ + struct gk20a *g = get_gk20a(dev); + struct gk20a_scale_profile *profile = g->scale_profile; + unsigned long *freq_table = profile->devfreq_profile.freq_table; + int max_states = profile->devfreq_profile.max_state; + int i; + + for (i = 0; i < max_states; ++i) + if (freq_table[i] >= rate) + return freq_table[i]; + + return freq_table[max_states - 1]; +} + +int gp10b_clk_get_freqs(struct device *dev, + unsigned long **freqs, int *num_freqs) +{ + struct gk20a_platform *platform = gk20a_get_platform(dev); + struct gk20a *g = platform->g; + unsigned long max_rate; + unsigned long new_rate = 0, prev_rate = 0; + int i, freq_counter = 0; + int sel_freq_cnt; + unsigned long loc_freq_table[GP10B_MAX_SUPPORTED_FREQS]; + + nvgpu_mutex_acquire(&platform->clk_get_freq_lock); + + if (freq_table_init_complete) { + + *freqs = gp10b_freq_table; + *num_freqs = num_supported_freq; + + nvgpu_mutex_release(&platform->clk_get_freq_lock); + + return 0; + } + + max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1)); + + /* + * Walk the h/w frequency table and update the local table + */ + for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { + prev_rate = new_rate; + new_rate = clk_round_rate(platform->clk[0], + prev_rate + 1); + loc_freq_table[i] = new_rate; + if (new_rate == max_rate) + break; + } + freq_counter = i + 1; + WARN_ON(freq_counter == GP10B_MAX_SUPPORTED_FREQS); + + /* + * If the number of achievable frequencies is less than or + * equal to GP10B_NUM_SUPPORTED_FREQS, select all frequencies + * else, select one out of every 8 frequencies + */ + if (freq_counter <= GP10B_NUM_SUPPORTED_FREQS) { + for (sel_freq_cnt = 0; sel_freq_cnt < freq_counter; ++sel_freq_cnt) + gp10b_freq_table[sel_freq_cnt] = + loc_freq_table[sel_freq_cnt]; + } else { + /* + * Walk the h/w frequency table and only select + * GP10B_FREQ_SELECT_STEP'th frequencies and + * add MAX freq to last + */ + sel_freq_cnt = 0; + for (i = 0; i < GP10B_MAX_SUPPORTED_FREQS; ++i) { + new_rate = loc_freq_table[i]; + + if (i % GP10B_FREQ_SELECT_STEP == 0 || + new_rate == max_rate) { + gp10b_freq_table[sel_freq_cnt++] = + new_rate; + + if (new_rate == max_rate) + break; + } + } + WARN_ON(sel_freq_cnt == GP10B_MAX_SUPPORTED_FREQS); + } + + /* Fill freq table */ + *freqs = gp10b_freq_table; + *num_freqs = sel_freq_cnt; + num_supported_freq = sel_freq_cnt; + + freq_table_init_complete = true; + + nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n", + gp10b_freq_table[0], max_rate, *num_freqs); + + nvgpu_mutex_release(&platform->clk_get_freq_lock); + + return 0; +} + +struct gk20a_platform gp10b_tegra_platform = { + .has_syncpoints = true, + + /* power management configuration */ + .railgate_delay_init = 500, + + /* ldiv slowdown factor */ + .ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16, + + /* power management configuration */ + .can_railgate_init = true, + .enable_elpg = true, + .can_elpg_init = true, + .enable_blcg = true, + .enable_slcg = true, + .enable_elcg = true, + .can_slcg = true, + .can_blcg = true, + .can_elcg = true, + .enable_aelpg = true, + .enable_perfmon = true, + + /* ptimer src frequency in hz*/ + .ptimer_src_freq = 31250000, + + .ch_wdt_timeout_ms = 5000, + + .probe = gp10b_tegra_probe, + .late_probe = gp10b_tegra_late_probe, + .remove = gp10b_tegra_remove, + + /* power management callbacks */ + .suspend = gp10b_tegra_suspend, + .railgate = gp10b_tegra_railgate, + .unrailgate = gp10b_tegra_unrailgate, + .is_railgated = gp10b_tegra_is_railgated, + + .busy = gk20a_tegra_busy, + .idle = gk20a_tegra_idle, + + .dump_platform_dependencies = gk20a_tegra_debug_dump, + +#ifdef CONFIG_NVGPU_SUPPORT_CDE + .has_cde = true, +#endif + + .clk_round_rate = gp10b_round_clk_rate, + .get_clk_freqs = gp10b_clk_get_freqs, + + /* frequency scaling configuration */ + .initscale = gp10b_tegra_scale_init, + .prescale = gp10b_tegra_prescale, + .postscale = gp10b_tegra_postscale, + .devfreq_governor = "nvhost_podgov", + + .qos_notify = gk20a_scale_qos_notify, + + .reset_assert = gp10b_tegra_reset_assert, + .reset_deassert = gp10b_tegra_reset_deassert, + + .force_reset_in_do_idle = false, + + .soc_name = "tegra18x", + + .unified_memory = true, + .dma_mask = DMA_BIT_MASK(36), + + .ltc_streamid = TEGRA_SID_GPUB, + + .secure_buffer_size = 401408, +}; -- cgit v1.2.2