From 519948a9c664020fd0b37118749faad2dfd73d97 Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Tue, 4 Sep 2018 10:55:33 +0530 Subject: gpu: nvgpu: add igpu support for clk_arbiter. This patch constructs clk_arbiter specific code for gp10b as well as gv11b and does the necessary plumbing in the clk_arbiter code. The changes made are as follows. 1) Constructed clk_arb_gp10b.* files which add support for clk_arb related HALS including the nvgpu_clk_arb_init and nvgpu_clk_arb_cb. This doesn't have support for debugfs nor the VFUpdateEvent yet and consequently no support for arb->notifications. 2) Added gpcclk specific variables corresponding to every gpc2clk in a given clk_arb related struct. 3) Linux specific support_clk_freq_controller is assigned true in platform_gp10b.c and platform_gv11b.c files. 4) Incremented the clk_arb_worker.put atomic variable during worker_deinit so as to allow the worker thread to be stopped. 5) Added the flag clk_arb_events_supported as part of struct nvgpu_clk_arb. This flag is used to selectively account for the extra refcounting present in OS specific code i.e. nvgpu_clk_arb_commit_request_fd. For igpus, the extra refcount is reduced during nvgpu_clk_arb_release_completion_dev. Bug 2061372 Change-Id: Id00acb106db2b46e55aa0324034a16a73723c078 Signed-off-by: Debarshi Dutta Reviewed-on: https://git-master.nvidia.com/r/1774281 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c | 417 ++++++++++++++++++++++++++++++++ drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h | 39 +++ drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 11 + 3 files changed, 467 insertions(+) create mode 100644 drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c create mode 100644 drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h (limited to 'drivers/gpu/nvgpu/gp10b') diff --git a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c new file mode 100644 index 00000000..4dcc3ca5 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c @@ -0,0 +1,417 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gk20a/gk20a.h" +#include + +#include "clk_arb_gp10b.h" + +u32 gp10b_get_arbiter_clk_domains(struct gk20a *g) +{ + (void)g; + clk_arb_dbg(g, " "); + return CTRL_CLK_DOMAIN_GPC2CLK; +} + +int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz) +{ + int ret = 0; + u32 i; + bool is_freq_list_available = false; + + if (*num_points != 0U) { + is_freq_list_available = true; + } + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPC2CLK: + ret = g->ops.clk.clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPCCLK, + num_points, freqs_in_mhz); + + /* multiply by 2 for GPC2CLK */ + if (ret == 0 && is_freq_list_available) { + for (i = 0U; i < *num_points; i++) { + freqs_in_mhz[i] *= 2U; + } + } + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + int ret = 0; + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPC2CLK: + ret = g->ops.clk.get_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, + min_mhz, max_mhz); + + if (ret == 0) { + *min_mhz *= 2U; + *max_mhz *= 2U; + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz) +{ + int ret = 0; + u16 min_mhz, max_mhz; + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPC2CLK: + ret = gp10b_get_arbiter_clk_range(g, api_domain, + &min_mhz, &max_mhz); + + if (ret == 0) { + *default_mhz = min_mhz; + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_init_clk_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb = NULL; + u16 default_mhz; + int err; + int index; + struct nvgpu_clk_vf_table *table; + + clk_arb_dbg(g, " "); + + if(g->clk_arb != NULL) { + return 0; + } + + arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); + if (arb == NULL) { + return -ENOMEM; + } + + arb->clk_arb_events_supported = false; + + err = nvgpu_mutex_init(&arb->pstate_lock); + if (err != 0) { + goto mutex_fail; + } + + nvgpu_spinlock_init(&arb->sessions_lock); + nvgpu_spinlock_init(&arb->users_lock); + nvgpu_spinlock_init(&arb->requests_lock); + + arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->gpc2clk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + for (index = 0; index < 2; index++) { + table = &arb->vf_table_pool[index]; + table->gpc2clk_num_points = MAX_F_POINTS; + + table->gpc2clk_points = (struct nvgpu_clk_vf_point *) + nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->gpc2clk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + } + + g->clk_arb = arb; + arb->g = g; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->gpc2clk_default_mhz = default_mhz; + + err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK, + &arb->gpc2clk_min, &arb->gpc2clk_max); + + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->actual = &arb->actual_pool[0]; + + nvgpu_atomic_set(&arb->req_nr, 0); + + nvgpu_atomic64_set(&arb->alarm_mask, 0); + err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, + DEFAULT_EVENT_NUMBER); + if (err < 0) { + goto init_fail; + } + + nvgpu_init_list_node(&arb->users); + nvgpu_init_list_node(&arb->sessions); + nvgpu_init_list_node(&arb->requests); + + err = nvgpu_cond_init(&arb->request_wq); + if (err < 0) { + goto init_fail; + } + + nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); + arb->update_arb_work_item.arb = arb; + arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; + + err = nvgpu_clk_arb_worker_init(g); + if (err < 0) { + goto init_fail; + } + + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); + + do { + /* Check that first run is completed */ + nvgpu_smp_mb(); + NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, + nvgpu_atomic_read(&arb->req_nr) != 0, 0); + } while (nvgpu_atomic_read(&arb->req_nr) == 0); + + + return arb->status; + +init_fail: + nvgpu_kfree(g, arb->gpc2clk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); + } + + nvgpu_mutex_destroy(&arb->pstate_lock); + +mutex_fail: + nvgpu_kfree(g, arb); + + return err; +} + +void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) +{ + struct nvgpu_clk_session *session; + struct nvgpu_clk_dev *dev; + struct nvgpu_clk_dev *tmp; + struct nvgpu_clk_arb_target *target, *actual; + struct gk20a *g = arb->g; + + bool gpc2clk_set; + + int status = 0; + unsigned long rounded_rate = 0; + + u16 gpc2clk_target, gpc2clk_session_target; + + clk_arb_dbg(g, " "); + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_for_each_entry(session, &arb->sessions, + nvgpu_clk_session, link) { + if (session->zombie) { + continue; + } + gpc2clk_set = false; + target = (session->target == &session->target_pool[0] ? + &session->target_pool[1] : + &session->target_pool[0]); + nvgpu_spinlock_acquire(&session->session_lock); + if (nvgpu_list_empty(&session->targets) == 0) { + /* Copy over state */ + target->gpc2clk = session->target->gpc2clk; + /* Query the latest committed request */ + nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets, + nvgpu_clk_dev, node) { + if (!gpc2clk_set && + dev->gpc2clk_target_mhz != (u16)0) { + target->gpc2clk = + dev->gpc2clk_target_mhz; + gpc2clk_set = true; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_list_del(&dev->node); + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_add(&dev->node, &arb->requests); + nvgpu_spinlock_release(&arb->requests_lock); + } + session->target = target; + } + nvgpu_spinlock_release(&session->session_lock); + + gpc2clk_target = + gpc2clk_target > session->target->gpc2clk ? + gpc2clk_target : session->target->gpc2clk; + } + nvgpu_spinlock_release(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > (u16)0) ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + if (gpc2clk_target < arb->gpc2clk_min) { + gpc2clk_target = arb->gpc2clk_min; + } + + if (gpc2clk_target > arb->gpc2clk_max) { + gpc2clk_target = arb->gpc2clk_max; + } + + gpc2clk_session_target = gpc2clk_target; + + if (arb->actual->gpc2clk == gpc2clk_target) { + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + nvgpu_mutex_acquire(&arb->pstate_lock); + + /* get the rounded_rate in terms of Hz for igpu + * pass (gpcclk) freq = (gpc2clk) freq / 2 + */ + status = g->ops.clk.clk_get_round_rate(g, + CTRL_CLK_DOMAIN_GPCCLK, (gpc2clk_session_target/2) * 1000000UL, &rounded_rate); + + clk_arb_dbg(g, "rounded_rate: %lu\n", + rounded_rate); + + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + /* the igpu set_rate accepts freq in Hz */ + status = g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); + + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + actual = ((NV_ACCESS_ONCE(arb->actual)) == &arb->actual_pool[0] ? + &arb->actual_pool[1] : &arb->actual_pool[0]); + + /* do not reorder this pointer */ + nvgpu_smp_rmb(); + actual->gpc2clk = gpc2clk_target; + arb->status = 0; + + /* Make changes visible to other threads */ + nvgpu_smp_wmb(); + arb->actual = actual; + + /* status must be visible before atomic inc */ + nvgpu_smp_wmb(); + nvgpu_atomic_inc(&arb->req_nr); + + /* Unlock pstate change for PG */ + nvgpu_mutex_release(&arb->pstate_lock); + + nvgpu_cond_signal_interruptible(&arb->request_wq); + +exit_arb: + if (status < 0) { + nvgpu_err(g, "Error in arbiter update"); + } + + /* notify completion for all requests */ + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, + nvgpu_clk_dev, node) { + nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM); + nvgpu_clk_arb_event_post_event(dev); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&arb->requests_lock); + + clk_arb_dbg(g, "done"); +} + +void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int index; + + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, + arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + + g->clk_arb = NULL; +} \ No newline at end of file diff --git a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h new file mode 100644 index 00000000..6b9966c5 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef CLK_ARB_GP10B_H +#define CLK_ARB_GP10B_H + +struct nvgpu_clk_session; +struct nvgpu_clk_arb; + +u32 gp10b_get_arbiter_clk_domains(struct gk20a *g); +int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); +int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz); +int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz); +int gp10b_init_clk_arbiter(struct gk20a *g); +void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb); +void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb); + +#endif /* CLK_ARB_GP106_H */ diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 769cab74..1f9e84d3 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -68,6 +68,7 @@ #include "gp10b/fifo_gp10b.h" #include "gp10b/regops_gp10b.h" #include "gp10b/ecc_gp10b.h" +#include "gp10b/clk_arb_gp10b.h" #include "gm20b/gr_gm20b.h" #include "gm20b/fifo_gm20b.h" @@ -611,6 +612,15 @@ static const struct gpu_ops gp10b_ops = { .get_irqdest = gk20a_pmu_get_irqdest, .is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en, }, + .clk_arb = { + .get_arbiter_clk_domains = gp10b_get_arbiter_clk_domains, + .get_arbiter_f_points = gp10b_get_arbiter_f_points, + .get_arbiter_clk_range = gp10b_get_arbiter_clk_range, + .get_arbiter_clk_default = gp10b_get_arbiter_clk_default, + .arbiter_clk_init = gp10b_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gp10b_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gp10b_clk_arb_cleanup, + }, .regops = { .exec_regops = exec_regops_gk20a, .get_global_whitelist_ranges = @@ -735,6 +745,7 @@ int gp10b_init_hal(struct gk20a *g) gops->pramin = gp10b_ops.pramin; gops->therm = gp10b_ops.therm; gops->pmu = gp10b_ops.pmu; + gops->clk_arb = gp10b_ops.clk_arb; gops->regops = gp10b_ops.regops; gops->mc = gp10b_ops.mc; gops->debug = gp10b_ops.debug; -- cgit v1.2.2