From 05805ec65b2cd6413c9d0d711d3798fd457fed6e Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Tue, 13 Sep 2016 14:25:28 -0700 Subject: gpu: nvgpu: ioctls for clock controls Add ioctls for clock range and VF points query. Add ioctls to set target mhz, and get actual mhz. Jira DNVGPU-125 Change-Id: I7639789bb15eabd8c98adc468201dba3a6e19ade Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1223473 Reviewed-by: mobile promotions Tested-by: mobile promotions (cherry picked from commit 5e635ae34221c99a739321bcfc1418db56c1051d) Reviewed-on: http://git-master/r/1243107 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 332 ++++++++++++++++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/gk20a.c | 8 + drivers/gpu/nvgpu/gk20a/gk20a.h | 10 ++ include/uapi/linux/nvgpu.h | 173 ++++++++++++++++++ 4 files changed, 520 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 56bc2c7a..7d344e9a 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -29,25 +30,55 @@ #include "hw_gr_gk20a.h" #include "hw_fb_gk20a.h" #include "hw_timer_gk20a.h" +#include "clk/clk_arb.h" + + +struct gk20a_ctrl_priv { + struct device *dev; + struct nvgpu_clk_session *clk_session; +}; int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) { struct gk20a *g; + struct gk20a_ctrl_priv *priv; + int err; gk20a_dbg_fn(""); g = container_of(inode->i_cdev, struct gk20a, ctrl.cdev); - filp->private_data = g->dev; + priv = kzalloc(sizeof(struct gk20a_ctrl_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; - return 0; + filp->private_data = priv; + priv->dev = g->dev; + + if (!g->gr.sw_ready) { + err = gk20a_busy(g->dev); + if (err) + return err; + + gk20a_idle(g->dev); + } + + return nvgpu_clk_arb_init_session(g, &priv->clk_session); } int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) { + struct gk20a_ctrl_priv *priv = filp->private_data; + struct gk20a *g = gk20a_from_dev(priv->dev); + struct nvgpu_clk_session *clk_session = priv->clk_session; + gk20a_dbg_fn(""); + if (clk_session) + nvgpu_clk_arb_cleanup_session(g, clk_session); + kfree(priv); + return 0; } @@ -789,9 +820,284 @@ static int nvgpu_gpu_get_memory_state(struct gk20a *g, return err; } +static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_vf_points_args *args) +{ + struct nvgpu_gpu_clk_vf_point clk_point; + struct nvgpu_gpu_clk_vf_point __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + u32 clk_domains = 0; + int err; + u16 last_mhz; + u16 *fpoints; + u32 i; + u32 max_points = 0; + u32 num_points = 0; + u16 min_mhz; + u16 max_mhz; + + gk20a_dbg_fn(""); + + if (!session || args->flags) + return -EINVAL; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + args->num_entries = 0; + + if ((args->clk_domain & clk_domains) == 0) + return -EINVAL; + + err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, + args->clk_domain, &max_points, NULL); + if (err) + return err; + + if (!args->max_entries) { + args->max_entries = max_points; + return 0; + } + + if (args->max_entries < max_points) + return -EINVAL; + + err = nvgpu_clk_arb_get_arbiter_clk_range(g, args->clk_domain, + &min_mhz, &max_mhz); + if (err) + return err; + + fpoints = kcalloc(max_points, sizeof(u16), GFP_KERNEL); + if (!fpoints) + return -ENOMEM; + + err = nvgpu_clk_arb_get_arbiter_clk_f_points(g, + args->clk_domain, &max_points, fpoints); + if (err) + goto fail; + + entry = (struct nvgpu_gpu_clk_vf_point __user *) + (uintptr_t)args->clk_vf_point_entries; + + last_mhz = 0; + num_points = 0; + for (i = 0; (i < max_points) && !err; i++) { + + /* filter out duplicate frequencies */ + if (fpoints[i] == last_mhz) + continue; + + /* filter out out-of-range frequencies */ + if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz)) + continue; + + last_mhz = fpoints[i]; + clk_point.freq_mhz = fpoints[i]; + + err = copy_to_user((void __user *)entry, &clk_point, + sizeof(clk_point)); + + num_points++; + entry++; + } + + args->num_entries = num_points; + +fail: + kfree(fpoints); + return err; +} + +static int nvgpu_gpu_clk_get_range(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_range_args *args) +{ + struct nvgpu_gpu_clk_range clk_range; + struct nvgpu_gpu_clk_range __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + + u32 clk_domains = 0; + u32 num_domains; + int bit; + u16 min_mhz, max_mhz; + int err; + + gk20a_dbg_fn(""); + + if (!session || args->flags) + return -EINVAL; + + args->num_entries = 0; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + num_domains = hweight_long(clk_domains); + + if (!args->max_entries) { + args->max_entries = num_domains; + return 0; + } + + if (args->max_entries < num_domains) + return -EINVAL; + + entry = (struct nvgpu_gpu_clk_range __user *) + (uintptr_t)args->clk_range_entries; + + memset(&clk_range, 0, sizeof(clk_range)); + + while (clk_domains) { + bit = ffs(clk_domains) - 1; + + clk_range.clk_domain = BIT(bit); + + err = nvgpu_clk_arb_get_arbiter_clk_range(g, + clk_range.clk_domain, &min_mhz, &max_mhz); + if (err) + return err; + + clk_range.min_mhz = min_mhz; + clk_range.max_mhz = max_mhz; + + err = copy_to_user(entry, &clk_range, sizeof(clk_range)); + if (err) + return -EFAULT; + + entry++; + clk_domains &= ~BIT(bit); + } + + args->num_entries = num_domains; + + return 0; +} + + +static int nvgpu_gpu_clk_set_info(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_set_info_args *args) +{ + struct nvgpu_gpu_clk_info clk_info; + struct nvgpu_gpu_clk_info __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + u32 clk_domains = 0; + u32 i; + int fd; + + gk20a_dbg_fn(""); + + if (!session || args->flags) + return -EINVAL; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + if (!clk_domains) + return -EINVAL; + + fd = nvgpu_clk_arb_install_session_fd(g, session); + if (fd < 0) + return fd; + + entry = (struct nvgpu_gpu_clk_info __user *) + (uintptr_t)args->clk_info_entries; + + for (i = 0; i < args->num_entries; i++, entry++) { + + if (copy_from_user(&clk_info, entry, sizeof(clk_info))) + return -EFAULT; + + if ((clk_info.clk_domain & clk_domains) != clk_info.clk_domain) + return -EINVAL; + + if (hweight_long(clk_info.clk_domain) != 1) + return -EINVAL; + } + + entry = (struct nvgpu_gpu_clk_info __user *) + (uintptr_t)args->clk_info_entries; + + for (i = 0; i < args->num_entries; i++, entry++) { + + if (copy_from_user(&clk_info, (void __user *)entry, + sizeof(clk_info))) + return -EFAULT; + + nvgpu_clk_arb_set_session_target_mhz(session, + clk_info.clk_domain, clk_info.target_mhz); + } + + nvgpu_clk_arb_apply_session_constraints(g, session); + + args->req_nr = nvgpu_clk_arb_get_session_req_nr(g, session); + args->fd = fd; + + return 0; +} + + +static int nvgpu_gpu_clk_get_info(struct gk20a *g, + struct gk20a_ctrl_priv *priv, + struct nvgpu_gpu_clk_get_info_args *args) +{ + struct nvgpu_gpu_clk_info clk_info; + struct nvgpu_gpu_clk_info __user *entry; + struct nvgpu_clk_session *session = priv->clk_session; + u32 clk_domains = 0; + u32 num_domains; + u16 actual_mhz; + u16 target_mhz; + int err; + u32 i; + + gk20a_dbg_fn(""); + + if (!session || args->flags) + return -EINVAL; + + clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); + if (!clk_domains) + return -EINVAL; + + args->last_req_nr = nvgpu_clk_arb_get_arbiter_req_nr(g); + + num_domains = hweight_long(clk_domains); + if (!args->num_entries) { + args->num_entries = num_domains; + return 0; + } + + entry = (struct nvgpu_gpu_clk_info __user *) + (uintptr_t)args->clk_info_entries; + + for (i = 0; i < args->num_entries; i++, entry++) { + + if (copy_from_user(&clk_info, (void __user *)entry, + sizeof(clk_info))) + return -EFAULT; + + err = nvgpu_clk_arb_get_arbiter_actual_mhz(g, + clk_info.clk_domain, &actual_mhz); + if (err) + return err; + + err = nvgpu_clk_arb_get_session_target_mhz(session, + clk_info.clk_domain, &target_mhz); + if (err) + return err; + + clk_info.actual_mhz = actual_mhz; + clk_info.target_mhz = target_mhz; + + err = copy_to_user((void __user *)entry, &clk_info, + sizeof(clk_info)); + if (err) + return -EFAULT; + } + + return 0; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - struct device *dev = filp->private_data; + struct gk20a_ctrl_priv *priv = filp->private_data; + struct device *dev = priv->dev; struct gk20a *g = get_gk20a(dev); struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; struct nvgpu_gpu_zcull_get_info_args *get_info_args; @@ -1050,6 +1356,26 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg (struct nvgpu_gpu_get_memory_state_args *)buf); break; + case NVGPU_GPU_IOCTL_CLK_GET_RANGE: + err = nvgpu_gpu_clk_get_range(g, priv, + (struct nvgpu_gpu_clk_range_args *)buf); + break; + + case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS: + err = nvgpu_gpu_clk_get_vf_points(g, priv, + (struct nvgpu_gpu_clk_vf_points_args *)buf); + break; + + case NVGPU_GPU_IOCTL_CLK_SET_INFO: + err = nvgpu_gpu_clk_set_info(g, priv, + (struct nvgpu_gpu_clk_set_info_args *)buf); + break; + + case NVGPU_GPU_IOCTL_CLK_GET_INFO: + err = nvgpu_gpu_clk_get_info(g, priv, + (struct nvgpu_gpu_clk_get_info_args *)buf); + break; + default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 99d2fe63..03bbf72f 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -974,6 +974,12 @@ int gk20a_pm_finalize_poweron(struct device *dev) } #endif + err = nvgpu_clk_arb_init_arbiter(g); + if (err) { + gk20a_err(dev, "failed to init clk arb"); + goto done; + } + if (g->ops.pmu.is_pmu_supported(g)) { err = gk20a_init_pmu_support(g); if (err) { @@ -1644,6 +1650,8 @@ static int __exit gk20a_remove(struct platform_device *pdev) if (platform->has_ce) gk20a_ce_destroy(g); + nvgpu_clk_arb_cleanup_arbiter(g); + gk20a_user_deinit(dev, &nvgpu_class); debugfs_remove_recursive(platform->debugfs); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 024be4db..564026a4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -56,6 +56,7 @@ struct acr_desc; #include "gm206/bios_gm206.h" #ifdef CONFIG_ARCH_TEGRA_18x_SOC #include "clk/clk.h" +#include "clk/clk_arb.h" #include "perf/perf.h" #include "pmgr/pmgr.h" #include "therm/thrm.h" @@ -632,6 +633,13 @@ struct gpu_ops { int (*suspend_clk_support)(struct gk20a *g); u32 (*get_crystal_clk_hz)(struct gk20a *g); } clk; + struct { + u32 (*get_arbiter_clk_domains)(struct gk20a *g); + int (*get_arbiter_clk_range)(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz); + int (*get_arbiter_clk_default)(struct gk20a *g, u32 api_domain, + u16 *default_mhz); + } clk_arb; bool privsecurity; bool securegpccs; bool pmupstate; @@ -956,6 +964,8 @@ struct gk20a { struct nvgpu_bios bios; struct debugfs_blob_wrapper bios_blob; + struct nvgpu_clk_arb *clk_arb; + struct gk20a_ce_app ce_app; /* PCI device identifier */ diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index b4c7e829..d4582036 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -510,6 +510,171 @@ struct nvgpu_gpu_alloc_vidmem_args { }; }; +#define NVGPU_GPU_CLK_DOMAIN_MCLK (0x00000010) +#define NVGPU_GPU_CLK_DOMAIN_GPC2CLK (0x00010000) + +struct nvgpu_gpu_clk_range { + + /* Flags (not currently used) */ + __u32 flags; + + /* NVGPU_GPU_CLK_DOMAIN_* */ + __u32 clk_domain; + __u32 min_mhz; + __u32 max_mhz; +}; + +struct nvgpu_gpu_clk_range_args { + + /* Flags (not currently used) */ + __u32 flags; + + /* in/out: max number of entries in clk_range_entries buffer. If zero, + NVGPU_GPU_IOCTL_CLK_GET_RANGE will return 0 and max_entries will be + set to the max number of clock domains. If there are more entries + than max_entries, then ioctl will return -EINVAL. + */ + __u16 max_entries; + + /* out: number of nvgpu_gpu_clk_range entries contained in + clk_range_entries */ + __u16 num_entries; + + /* in: Pointer to clock range entries in the caller's address space. + size must be >= max_entries * sizeof(struct nvgpu_gpu_clk_range) + */ + __u64 clk_range_entries; +}; + +struct nvgpu_gpu_clk_vf_point { + + /* Flags (not currently used) */ + __u32 flags; + __u32 freq_mhz; +}; + +struct nvgpu_gpu_clk_vf_points_args { + + /* in: Flags (not currently used) */ + __u32 flags; + + /* in: NVGPU_GPU_CLK_DOMAIN_* */ + __u32 clk_domain; + + /* in/out: max number of nvgpu_gpu_clk_vf_point entries in + clk_vf_point_entries. If max_entries is zero, + NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS will return 0 and max_entries will + be set to the max number of VF entries for this clock domain. If + there are more entries than max_entires, then ioctl will return + -EINVAL. + */ + __u16 max_entries; + + /* out: Number of nvgpu_gpu_clk_vf_point entries returned in + clk_vf_point_entries. Number of entries might vary depending on + thermal conditions. + */ + __u16 num_entries; + + __u32 reserved; + + /* in: Pointer to clock VF point entries in the caller's address space. + size must be >= max_entries * sizeof(struct nvgpu_gpu_clk_vf_point). + */ + __u64 clk_vf_point_entries; +}; + +struct nvgpu_gpu_clk_info { + + /* Flags (not currently used) */ + __u32 flags; + + /* NVGPU_GPU_CLK_DOMAIN_* */ + __u32 clk_domain; + + /* target clock frequency for the domain in MHz. Should be + specified with a non-zero value in NVGPU_GPU_IOCTL_CLK_SET_INFO. + */ + __u32 target_mhz; + + /* actual clock frequency for the domain in MHz. This value + may deviate from the desired target frequency due to PLL constraints. + Not used in NVGPU_GPU_IOCTL_CLK_SET_INFO. + */ + __u32 actual_mhz; +}; + +struct nvgpu_gpu_clk_get_info_args { + + /* in: Flags (not currently used). */ + __u32 flags; + + __u16 pad0; + + /* in/out: Number of clock info entries contained in clk_info_entries. + If zero, NVGPU_GPU_IOCTL_CLK_GET_INFO will return 0 and + max_entries will be set to number of clock domains. Also, + last_req_nr will be updated, which allows checking if a given + request has completed. If there are more entries than max_entries, + then ioctl will return -EINVAL. + */ + __u16 num_entries; + + /* in: Pointer to nvgpu_gpu_clk_info entries in the caller's address + space. Buffer size must be at least: + num_entries * sizeof(struct nvgpu_gpu_clk_info) + For each entry, the clk_domain to be queried should be set. Note + that clk_info_entries passed to an NVGPU_GPU_IOCTL_CLK_SET_INFO, + can be re-used on completion for a NVGPU_GPU_IOCTL_CLK_GET_INFO. + This allows checking actual_mhz. + */ + __u64 clk_info_entries; + + __u32 pad1; + + /* out: sequence number of last processed request. sequence numbers + are per-user. + */ + __u32 last_req_nr; +}; + +struct nvgpu_gpu_clk_set_info_args { + + /* in: Flags (not currently used). */ + __u32 flags; + + __u16 pad0; + + /* Number of clock info entries contained in clk_info_entries. + Must be > 0. + */ + __u16 num_entries; + + /* Pointer to clock info entries in the caller's address space. Buffer + size must be at least + num_entries * sizeof(struct nvgpu_gpu_clk_info) + */ + __u64 clk_info_entries; + + /* out: File descriptor for completions and event notifications. + If application does not close this fd after completion, then the + same fd will be returned for subsequent request (recommended). + */ + int fd; + + /* out: sequence number for this request. In order to determine that + a request has completed, an application should check this sequence + number against last_req_nr from NVGPU_GPU_IOCTL_CLK_GET_INFO, using + nvgpu_clk_req_complete(req_nr, last_req_nr); + */ + __u32 req_nr; +}; + +static inline int nvgpu_clk_req_complete(__u32 req_nr, __u32 last_req_nr) +{ + return ((long)(last_req_nr - req_nr) >= 0); +} + struct nvgpu_gpu_get_memory_state_args { /* * Current free space for this device; may change even when any @@ -596,6 +761,14 @@ struct nvgpu_gpu_get_fbp_l2_masks_args { #define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \ struct nvgpu_gpu_alloc_vidmem_args) +#define NVGPU_GPU_IOCTL_CLK_GET_RANGE \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 28, struct nvgpu_gpu_clk_range_args) +#define NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 29, struct nvgpu_gpu_clk_vf_points_args) +#define NVGPU_GPU_IOCTL_CLK_GET_INFO \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 30, struct nvgpu_gpu_clk_get_info_args) +#define NVGPU_GPU_IOCTL_CLK_SET_INFO \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 31, struct nvgpu_gpu_clk_set_info_args) #define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \ struct nvgpu_gpu_get_memory_state_args) -- cgit v1.2.2