From 519948a9c664020fd0b37118749faad2dfd73d97 Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Tue, 4 Sep 2018 10:55:33 +0530 Subject: gpu: nvgpu: add igpu support for clk_arbiter. This patch constructs clk_arbiter specific code for gp10b as well as gv11b and does the necessary plumbing in the clk_arbiter code. The changes made are as follows. 1) Constructed clk_arb_gp10b.* files which add support for clk_arb related HALS including the nvgpu_clk_arb_init and nvgpu_clk_arb_cb. This doesn't have support for debugfs nor the VFUpdateEvent yet and consequently no support for arb->notifications. 2) Added gpcclk specific variables corresponding to every gpc2clk in a given clk_arb related struct. 3) Linux specific support_clk_freq_controller is assigned true in platform_gp10b.c and platform_gv11b.c files. 4) Incremented the clk_arb_worker.put atomic variable during worker_deinit so as to allow the worker thread to be stopped. 5) Added the flag clk_arb_events_supported as part of struct nvgpu_clk_arb. This flag is used to selectively account for the extra refcounting present in OS specific code i.e. nvgpu_clk_arb_commit_request_fd. For igpus, the extra refcount is reduced during nvgpu_clk_arb_release_completion_dev. Bug 2061372 Change-Id: Id00acb106db2b46e55aa0324034a16a73723c078 Signed-off-by: Debarshi Dutta Reviewed-on: https://git-master.nvidia.com/r/1774281 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.sources | 1 + drivers/gpu/nvgpu/boardobj/boardobj.h | 1 + drivers/gpu/nvgpu/clk/clk_arb.c | 38 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 3 +- drivers/gpu/nvgpu/gp106/clk_arb_gp106.c | 9 + drivers/gpu/nvgpu/gp106/clk_arb_gp106.h | 2 + drivers/gpu/nvgpu/gp106/hal_gp106.c | 1 + drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c | 417 ++++++++++++++++++++++ drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h | 39 ++ drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 11 + drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 11 + drivers/gpu/nvgpu/include/nvgpu/clk_arb.h | 2 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 + drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c | 11 + drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c | 2 + drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | 2 + 17 files changed, 544 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c create mode 100644 drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index d59c3f74..926ce056 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -287,6 +287,7 @@ nvgpu-$(CONFIG_GK20A_CYCLE_STATS) += \ nvgpu-y += \ gp10b/gr_gp10b.o \ + gp10b/clk_arb_gp10b.o \ gp10b/gr_ctx_gp10b.o \ gp10b/ce_gp10b.o \ gp10b/fifo_gp10b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index fce8ea71..a26f827d 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -176,6 +176,7 @@ srcs := os/posix/nvgpu.c \ gp10b/pmu_gp10b.c \ gp10b/hal_gp10b.c \ gp10b/regops_gp10b.c \ + gp10b/clk_arb_gp10b.c \ gp10b/fecs_trace_gp10b.c \ gp10b/gp10b.c \ gp10b/ecc_gp10b.c \ diff --git a/drivers/gpu/nvgpu/boardobj/boardobj.h b/drivers/gpu/nvgpu/boardobj/boardobj.h index 4df14aaa..b2ab990c 100644 --- a/drivers/gpu/nvgpu/boardobj/boardobj.h +++ b/drivers/gpu/nvgpu/boardobj/boardobj.h @@ -23,6 +23,7 @@ #ifndef _BOARDOBJ_H_ #define _BOARDOBJ_H_ +#include #include #include "ctrl/ctrlboardobj.h" diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 81220c6e..452d9de3 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -56,10 +56,12 @@ int nvgpu_clk_notification_queue_alloc(struct gk20a *g, void nvgpu_clk_notification_queue_free(struct gk20a *g, struct nvgpu_clk_notification_queue *queue) { - nvgpu_kfree(g, queue->notifications); - queue->size = 0; - nvgpu_atomic_set(&queue->head, 0); - nvgpu_atomic_set(&queue->tail, 0); + if (queue->size > 0) { + nvgpu_kfree(g, queue->notifications); + queue->size = 0; + nvgpu_atomic_set(&queue->head, 0); + nvgpu_atomic_set(&queue->tail, 0); + } } static void nvgpu_clk_arb_queue_notification(struct gk20a *g, @@ -651,6 +653,10 @@ static int nvgpu_clk_arb_poll_worker(void *arg) &worker->wq, nvgpu_clk_arb_worker_pending(g, get), 0); + if (nvgpu_thread_should_stop(&worker->poll_task)) { + break; + } + if (ret == 0) nvgpu_clk_arb_worker_process(g, &get); } @@ -782,6 +788,8 @@ void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm) static void nvgpu_clk_arb_worker_deinit(struct gk20a *g) { + nvgpu_atomic_inc(&g->clk_arb_worker.put); + nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock); nvgpu_thread_stop(&g->clk_arb_worker.poll_task); nvgpu_mutex_release(&g->clk_arb_worker.start_lock); @@ -847,6 +855,8 @@ void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount) struct nvgpu_clk_dev, refcount); struct nvgpu_clk_session *session = dev->session; + nvgpu_clk_notification_queue_free(session->g, &dev->queue); + nvgpu_kfree(session->g, dev); } @@ -964,16 +974,16 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, switch (api_domain) { case NVGPU_CLK_DOMAIN_GPCCLK: - err = g->ops.clk.clk_domain_get_f_points(g, - CTRL_CLK_DOMAIN_GPC2CLK, max_points, fpoints); + err = g->ops.clk_arb.get_arbiter_f_points(g, + CTRL_CLK_DOMAIN_GPC2CLK, max_points, fpoints); if (err || !fpoints) return err; for (i = 0; i < *max_points; i++) fpoints[i] /= 2; return 0; case NVGPU_CLK_DOMAIN_MCLK: - return g->ops.clk.clk_domain_get_f_points(g, - CTRL_CLK_DOMAIN_MCLK, max_points, fpoints); + return g->ops.clk_arb.get_arbiter_f_points(g, + CTRL_CLK_DOMAIN_MCLK, max_points, fpoints); default: return -EINVAL; } @@ -985,6 +995,10 @@ int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, int err = 0; struct nvgpu_clk_arb_target *target = session->target; + if (!nvgpu_clk_arb_is_valid_domain(session->g, api_domain)) { + return -EINVAL; + } + switch (api_domain) { case NVGPU_CLK_DOMAIN_MCLK: *freq_mhz = target->mclk; @@ -1008,6 +1022,10 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, int err = 0; struct nvgpu_clk_arb_target *actual = arb->actual; + if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) { + return -EINVAL; + } + switch (api_domain) { case NVGPU_CLK_DOMAIN_MCLK: *freq_mhz = actual->mclk; @@ -1027,6 +1045,10 @@ int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, u32 api_domain, u16 *freq_mhz) { + if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) { + return -EINVAL; + } + switch (api_domain) { case NVGPU_CLK_DOMAIN_MCLK: *freq_mhz = g->ops.clk.measure_freq(g, CTRL_CLK_DOMAIN_MCLK) / diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 74fc991d..1caa1dcf 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -486,7 +486,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true); - if (g->ops.clk_arb.get_arbiter_clk_domains) { + if (g->ops.clk_arb.get_arbiter_clk_domains != NULL && + g->ops.clk.support_clk_freq_controller) { __nvgpu_set_enabled(g, NVGPU_SUPPORT_CLOCK_CONTROLS, true); } diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c index 001f2bfc..87fc6a1f 100644 --- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c +++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c @@ -31,6 +31,13 @@ u32 gp106_get_arbiter_clk_domains(struct gk20a *g) return (CTRL_CLK_DOMAIN_MCLK|CTRL_CLK_DOMAIN_GPC2CLK); } +int gp106_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz) +{ + return g->ops.clk.clk_domain_get_f_points(g, + api_domain, num_points, freqs_in_mhz); +} + int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, u16 *min_mhz, u16 *max_mhz) { @@ -129,6 +136,8 @@ int gp106_init_clk_arbiter(struct gk20a *g) if (!arb) return -ENOMEM; + arb->clk_arb_events_supported = true; + err = nvgpu_mutex_init(&arb->pstate_lock); if (err) goto mutex_fail; diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h index e2b2834c..4c1257e1 100644 --- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h +++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h @@ -26,6 +26,8 @@ struct nvgpu_clk_session; struct nvgpu_clk_arb; u32 gp106_get_arbiter_clk_domains(struct gk20a *g); +int gp106_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, u16 *min_mhz, u16 *max_mhz); int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index ef66be56..d3804f0e 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -679,6 +679,7 @@ static const struct gpu_ops gp106_ops = { }, .clk_arb = { .get_arbiter_clk_domains = gp106_get_arbiter_clk_domains, + .get_arbiter_f_points = gp106_get_arbiter_f_points, .get_arbiter_clk_range = gp106_get_arbiter_clk_range, .get_arbiter_clk_default = gp106_get_arbiter_clk_default, .get_current_pstate = nvgpu_clk_arb_get_current_pstate, diff --git a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c new file mode 100644 index 00000000..4dcc3ca5 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.c @@ -0,0 +1,417 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gk20a/gk20a.h" +#include + +#include "clk_arb_gp10b.h" + +u32 gp10b_get_arbiter_clk_domains(struct gk20a *g) +{ + (void)g; + clk_arb_dbg(g, " "); + return CTRL_CLK_DOMAIN_GPC2CLK; +} + +int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz) +{ + int ret = 0; + u32 i; + bool is_freq_list_available = false; + + if (*num_points != 0U) { + is_freq_list_available = true; + } + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPC2CLK: + ret = g->ops.clk.clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPCCLK, + num_points, freqs_in_mhz); + + /* multiply by 2 for GPC2CLK */ + if (ret == 0 && is_freq_list_available) { + for (i = 0U; i < *num_points; i++) { + freqs_in_mhz[i] *= 2U; + } + } + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz) +{ + int ret = 0; + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPC2CLK: + ret = g->ops.clk.get_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK, + min_mhz, max_mhz); + + if (ret == 0) { + *min_mhz *= 2U; + *max_mhz *= 2U; + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz) +{ + int ret = 0; + u16 min_mhz, max_mhz; + + clk_arb_dbg(g, " "); + + switch (api_domain) { + case CTRL_CLK_DOMAIN_GPC2CLK: + ret = gp10b_get_arbiter_clk_range(g, api_domain, + &min_mhz, &max_mhz); + + if (ret == 0) { + *default_mhz = min_mhz; + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +int gp10b_init_clk_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb = NULL; + u16 default_mhz; + int err; + int index; + struct nvgpu_clk_vf_table *table; + + clk_arb_dbg(g, " "); + + if(g->clk_arb != NULL) { + return 0; + } + + arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); + if (arb == NULL) { + return -ENOMEM; + } + + arb->clk_arb_events_supported = false; + + err = nvgpu_mutex_init(&arb->pstate_lock); + if (err != 0) { + goto mutex_fail; + } + + nvgpu_spinlock_init(&arb->sessions_lock); + nvgpu_spinlock_init(&arb->users_lock); + nvgpu_spinlock_init(&arb->requests_lock); + + arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (arb->gpc2clk_f_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + + for (index = 0; index < 2; index++) { + table = &arb->vf_table_pool[index]; + table->gpc2clk_num_points = MAX_F_POINTS; + + table->gpc2clk_points = (struct nvgpu_clk_vf_point *) + nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (table->gpc2clk_points == NULL) { + err = -ENOMEM; + goto init_fail; + } + } + + g->clk_arb = arb; + arb->g = g; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->gpc2clk_default_mhz = default_mhz; + + err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPC2CLK, + &arb->gpc2clk_min, &arb->gpc2clk_max); + + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->actual = &arb->actual_pool[0]; + + nvgpu_atomic_set(&arb->req_nr, 0); + + nvgpu_atomic64_set(&arb->alarm_mask, 0); + err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, + DEFAULT_EVENT_NUMBER); + if (err < 0) { + goto init_fail; + } + + nvgpu_init_list_node(&arb->users); + nvgpu_init_list_node(&arb->sessions); + nvgpu_init_list_node(&arb->requests); + + err = nvgpu_cond_init(&arb->request_wq); + if (err < 0) { + goto init_fail; + } + + nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); + arb->update_arb_work_item.arb = arb; + arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; + + err = nvgpu_clk_arb_worker_init(g); + if (err < 0) { + goto init_fail; + } + + nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item); + + do { + /* Check that first run is completed */ + nvgpu_smp_mb(); + NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, + nvgpu_atomic_read(&arb->req_nr) != 0, 0); + } while (nvgpu_atomic_read(&arb->req_nr) == 0); + + + return arb->status; + +init_fail: + nvgpu_kfree(g, arb->gpc2clk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); + } + + nvgpu_mutex_destroy(&arb->pstate_lock); + +mutex_fail: + nvgpu_kfree(g, arb); + + return err; +} + +void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) +{ + struct nvgpu_clk_session *session; + struct nvgpu_clk_dev *dev; + struct nvgpu_clk_dev *tmp; + struct nvgpu_clk_arb_target *target, *actual; + struct gk20a *g = arb->g; + + bool gpc2clk_set; + + int status = 0; + unsigned long rounded_rate = 0; + + u16 gpc2clk_target, gpc2clk_session_target; + + clk_arb_dbg(g, " "); + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_for_each_entry(session, &arb->sessions, + nvgpu_clk_session, link) { + if (session->zombie) { + continue; + } + gpc2clk_set = false; + target = (session->target == &session->target_pool[0] ? + &session->target_pool[1] : + &session->target_pool[0]); + nvgpu_spinlock_acquire(&session->session_lock); + if (nvgpu_list_empty(&session->targets) == 0) { + /* Copy over state */ + target->gpc2clk = session->target->gpc2clk; + /* Query the latest committed request */ + nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets, + nvgpu_clk_dev, node) { + if (!gpc2clk_set && + dev->gpc2clk_target_mhz != (u16)0) { + target->gpc2clk = + dev->gpc2clk_target_mhz; + gpc2clk_set = true; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_list_del(&dev->node); + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_add(&dev->node, &arb->requests); + nvgpu_spinlock_release(&arb->requests_lock); + } + session->target = target; + } + nvgpu_spinlock_release(&session->session_lock); + + gpc2clk_target = + gpc2clk_target > session->target->gpc2clk ? + gpc2clk_target : session->target->gpc2clk; + } + nvgpu_spinlock_release(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > (u16)0) ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + if (gpc2clk_target < arb->gpc2clk_min) { + gpc2clk_target = arb->gpc2clk_min; + } + + if (gpc2clk_target > arb->gpc2clk_max) { + gpc2clk_target = arb->gpc2clk_max; + } + + gpc2clk_session_target = gpc2clk_target; + + if (arb->actual->gpc2clk == gpc2clk_target) { + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + nvgpu_mutex_acquire(&arb->pstate_lock); + + /* get the rounded_rate in terms of Hz for igpu + * pass (gpcclk) freq = (gpc2clk) freq / 2 + */ + status = g->ops.clk.clk_get_round_rate(g, + CTRL_CLK_DOMAIN_GPCCLK, (gpc2clk_session_target/2) * 1000000UL, &rounded_rate); + + clk_arb_dbg(g, "rounded_rate: %lu\n", + rounded_rate); + + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + /* the igpu set_rate accepts freq in Hz */ + status = g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate); + + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + nvgpu_atomic_inc(&arb->req_nr); + nvgpu_cond_signal_interruptible(&arb->request_wq); + goto exit_arb; + } + + actual = ((NV_ACCESS_ONCE(arb->actual)) == &arb->actual_pool[0] ? + &arb->actual_pool[1] : &arb->actual_pool[0]); + + /* do not reorder this pointer */ + nvgpu_smp_rmb(); + actual->gpc2clk = gpc2clk_target; + arb->status = 0; + + /* Make changes visible to other threads */ + nvgpu_smp_wmb(); + arb->actual = actual; + + /* status must be visible before atomic inc */ + nvgpu_smp_wmb(); + nvgpu_atomic_inc(&arb->req_nr); + + /* Unlock pstate change for PG */ + nvgpu_mutex_release(&arb->pstate_lock); + + nvgpu_cond_signal_interruptible(&arb->request_wq); + +exit_arb: + if (status < 0) { + nvgpu_err(g, "Error in arbiter update"); + } + + /* notify completion for all requests */ + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, + nvgpu_clk_dev, node) { + nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM); + nvgpu_clk_arb_event_post_event(dev); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&arb->requests_lock); + + clk_arb_dbg(g, "done"); +} + +void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int index; + + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, + arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + + g->clk_arb = NULL; +} \ No newline at end of file diff --git a/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h new file mode 100644 index 00000000..6b9966c5 --- /dev/null +++ b/drivers/gpu/nvgpu/gp10b/clk_arb_gp10b.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef CLK_ARB_GP10B_H +#define CLK_ARB_GP10B_H + +struct nvgpu_clk_session; +struct nvgpu_clk_arb; + +u32 gp10b_get_arbiter_clk_domains(struct gk20a *g); +int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); +int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, + u16 *min_mhz, u16 *max_mhz); +int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, + u16 *default_mhz); +int gp10b_init_clk_arbiter(struct gk20a *g); +void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb); +void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb); + +#endif /* CLK_ARB_GP106_H */ diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 769cab74..1f9e84d3 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -68,6 +68,7 @@ #include "gp10b/fifo_gp10b.h" #include "gp10b/regops_gp10b.h" #include "gp10b/ecc_gp10b.h" +#include "gp10b/clk_arb_gp10b.h" #include "gm20b/gr_gm20b.h" #include "gm20b/fifo_gm20b.h" @@ -611,6 +612,15 @@ static const struct gpu_ops gp10b_ops = { .get_irqdest = gk20a_pmu_get_irqdest, .is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en, }, + .clk_arb = { + .get_arbiter_clk_domains = gp10b_get_arbiter_clk_domains, + .get_arbiter_f_points = gp10b_get_arbiter_f_points, + .get_arbiter_clk_range = gp10b_get_arbiter_clk_range, + .get_arbiter_clk_default = gp10b_get_arbiter_clk_default, + .arbiter_clk_init = gp10b_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gp10b_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gp10b_clk_arb_cleanup, + }, .regops = { .exec_regops = exec_regops_gk20a, .get_global_whitelist_ranges = @@ -735,6 +745,7 @@ int gp10b_init_hal(struct gk20a *g) gops->pramin = gp10b_ops.pramin; gops->therm = gp10b_ops.therm; gops->pmu = gp10b_ops.pmu; + gops->clk_arb = gp10b_ops.clk_arb; gops->regops = gp10b_ops.regops; gops->mc = gp10b_ops.mc; gops->debug = gp10b_ops.debug; diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 591a7786..9444002b 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -67,6 +67,7 @@ #include "gp10b/mm_gp10b.h" #include "gp10b/pmu_gp10b.h" #include "gp10b/gr_gp10b.h" +#include "gp10b/clk_arb_gp10b.h" #include "gp106/pmu_gp106.h" #include "gp106/acr_gp106.h" @@ -708,6 +709,15 @@ static const struct gpu_ops gv11b_ops = { .handle_ext_irq = gv11b_pmu_handle_ext_irq, .is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en, }, + .clk_arb = { + .get_arbiter_clk_domains = gp10b_get_arbiter_clk_domains, + .get_arbiter_f_points = gp10b_get_arbiter_f_points, + .get_arbiter_clk_range = gp10b_get_arbiter_clk_range, + .get_arbiter_clk_default = gp10b_get_arbiter_clk_default, + .arbiter_clk_init = gp10b_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gp10b_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gp10b_clk_arb_cleanup, + }, .regops = { .exec_regops = exec_regops_gk20a, .get_global_whitelist_ranges = @@ -847,6 +857,7 @@ int gv11b_init_hal(struct gk20a *g) gops->falcon = gv11b_ops.falcon; gops->priv_ring = gv11b_ops.priv_ring; gops->fuse = gv11b_ops.fuse; + gops->clk_arb = gv11b_ops.clk_arb; /* Lone functions */ gops->chip_init_gpu_characteristics = diff --git a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h index 46952f1c..81b1df1b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h +++ b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h @@ -217,6 +217,8 @@ struct nvgpu_clk_arb { u16 *gpc2clk_f_points; u32 gpc2clk_f_numpoints; + bool clk_arb_events_supported; + nvgpu_atomic64_t alarm_mask; struct nvgpu_clk_notification_queue notification_queue; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 5821f742..b8ca5754 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1117,6 +1117,8 @@ struct gpu_ops { struct { int (*arbiter_clk_init)(struct gk20a *g); u32 (*get_arbiter_clk_domains)(struct gk20a *g); + int (*get_arbiter_f_points)(struct gk20a *g,u32 api_domain, + u32 *num_points, u16 *freqs_in_mhz); int (*get_arbiter_clk_range)(struct gk20a *g, u32 api_domain, u16 *min_mhz, u16 *max_mhz); int (*get_arbiter_clk_default)(struct gk20a *g, u32 api_domain, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c index 501b5f93..2d9946b1 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c @@ -55,6 +55,13 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, clk_arb_dbg(session->g, " "); + /* This is done to account for the extra refcount taken in + * nvgpu_clk_arb_commit_request_fd without events support in iGPU + */ + if (!session->g->clk_arb->clk_arb_events_supported) { + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + } + nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session); nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); return 0; @@ -425,6 +432,10 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, err = -EINVAL; goto fdput_fd; } + + clk_arb_dbg(g, "requested target = %u\n", + (u32)dev->gpc2clk_target_mhz); + nvgpu_ref_get(&dev->refcount); nvgpu_spinlock_acquire(&session->session_lock); nvgpu_list_add(&dev->node, &session->targets); diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c index 5fdcb05c..a792bfcb 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c @@ -171,6 +171,8 @@ static int gp10b_tegra_probe(struct device *dev) nvgpu_mutex_init(&platform->clk_get_freq_lock); + platform->g->ops.clk.support_clk_freq_controller = true; + return 0; } diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c index b055eb6e..ec93b4c3 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c @@ -99,6 +99,8 @@ static int gv11b_tegra_probe(struct device *dev) nvgpu_mutex_init(&platform->clk_get_freq_lock); + platform->g->ops.clk.support_clk_freq_controller = true; + return 0; } -- cgit v1.2.2