From 16ad9f537979c5f3717fc5781b1c2fad22a76f96 Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Tue, 26 Jun 2018 15:41:12 +0530 Subject: gpu: nvgpu: move gp106 specific clk_arbiter code into HAL Currently, clock arbiter code is extensively using dgpu specific implementation. This patch restructures the clk_arbiter code and moves gp106 specific code into HAL. Following changes are made in this patch 1) clk_domain_get_f_points is now invoked via HAL for gp106 i.e. g->ops.clk.clk_domain_get_f_points. 2) moved nvgpu_clk_arb_change_vf_point and other related static functions to clk_arb_gp106.c. 3) Instead of only checking if get_arbiter_clk_domain is empty, a check for support_clk_freq_controller is also added. This is to enable the clk_arbiter based on support from both the OS and the chips. Bug 2061372 Change-Id: I65b0a4e02145a86fbbfb420ed591b1fa3c86f6dc Signed-off-by: Debarshi Dutta Reviewed-on: https://git-master.nvidia.com/r/1774279 Reviewed-by: svc-misra-checker GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/boardobj/boardobj.h | 1 + drivers/gpu/nvgpu/boardobj/boardobjgrp.h | 3 + drivers/gpu/nvgpu/clk/clk.c | 31 -- drivers/gpu/nvgpu/clk/clk.h | 8 +- drivers/gpu/nvgpu/clk/clk_arb.c | 668 +----------------------------- drivers/gpu/nvgpu/gk20a/gk20a.h | 7 + drivers/gpu/nvgpu/gp106/clk_arb_gp106.c | 649 ++++++++++++++++++++++++++++- drivers/gpu/nvgpu/gp106/clk_arb_gp106.h | 8 +- drivers/gpu/nvgpu/gp106/clk_gp106.c | 33 ++ drivers/gpu/nvgpu/gp106/clk_gp106.h | 7 +- drivers/gpu/nvgpu/gp106/hal_gp106.c | 5 + drivers/gpu/nvgpu/include/nvgpu/clk_arb.h | 12 + drivers/gpu/nvgpu/os/posix/clk_arb.c | 46 +- 13 files changed, 790 insertions(+), 688 deletions(-) diff --git a/drivers/gpu/nvgpu/boardobj/boardobj.h b/drivers/gpu/nvgpu/boardobj/boardobj.h index a433fda8..8fc8d1cc 100644 --- a/drivers/gpu/nvgpu/boardobj/boardobj.h +++ b/drivers/gpu/nvgpu/boardobj/boardobj.h @@ -28,6 +28,7 @@ #include "ctrl/ctrlboardobj.h" struct boardobj; +struct nvgpu_list_node; /* * check whether the specified BOARDOBJ object implements the queried diff --git a/drivers/gpu/nvgpu/boardobj/boardobjgrp.h b/drivers/gpu/nvgpu/boardobj/boardobjgrp.h index 3c28963c..8fc7136f 100644 --- a/drivers/gpu/nvgpu/boardobj/boardobjgrp.h +++ b/drivers/gpu/nvgpu/boardobj/boardobjgrp.h @@ -25,6 +25,9 @@ struct boardobjgrp; struct gk20a; +struct nvgpu_list_node; +struct pmu_surface; + /* ------------------------ Includes ----------------------------------------*/ #include "ctrl/ctrlboardobj.h" diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c index 49087468..731124f7 100644 --- a/drivers/gpu/nvgpu/clk/clk.c +++ b/drivers/gpu/nvgpu/clk/clk.c @@ -869,34 +869,3 @@ u32 clk_domain_get_f_or_v( } return status; } - -u32 clk_domain_get_f_points( - struct gk20a *g, - u32 clkapidomain, - u32 *pfpointscount, - u16 *pfreqpointsinmhz -) -{ - u32 status = -EINVAL; - struct clk_domain *pdomain; - u8 i; - struct clk_pmupstate *pclk = &g->clk_pmu; - - if (pfpointscount == NULL) - return -EINVAL; - - if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0)) - return -EINVAL; - - BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super), - struct clk_domain *, pdomain, i) { - if (pdomain->api_domain == clkapidomain) { - status = pdomain->clkdomainclkgetfpoints(g, pclk, - pdomain, pfpointscount, - pfreqpointsinmhz, - CLK_PROG_VFE_ENTRY_LOGIC); - return status; - } - } - return status; -} diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h index cd65f6f5..afff6963 100644 --- a/drivers/gpu/nvgpu/clk/clk.h +++ b/drivers/gpu/nvgpu/clk/clk.h @@ -1,7 +1,7 @@ /* * general clock structures & definitions * - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -127,12 +127,6 @@ u32 clk_domain_get_f_or_v( u32 *pvoltuv, u8 railidx ); -u32 clk_domain_get_f_points( - struct gk20a *g, - u32 clkapidomain, - u32 *fpointscount, - u16 *freqpointsinmhz -); int clk_get_fll_clks(struct gk20a *g, struct set_fll_clk *fllclk); int clk_set_fll_clks(struct gk20a *g, struct set_fll_clk *fllclk); int clk_pmu_freq_controller_load(struct gk20a *g, bool bload, u8 bit_idx); diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 357a1416..983a82f9 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -78,7 +78,7 @@ static void nvgpu_clk_arb_queue_notification(struct gk20a *g, } -static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) +void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) { struct nvgpu_clk_arb *arb = g->clk_arb; @@ -103,7 +103,7 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) } -static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) +int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) { struct gk20a *g = arb->g; struct nvgpu_clk_vf_table *table; @@ -117,7 +117,6 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) struct clk_set_info *p5_info, *p0_info; - table = NV_ACCESS_ONCE(arb->current_vf_table); /* make flag visible when all data has resolved in the tables */ nvgpu_smp_rmb(); @@ -142,13 +141,13 @@ static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) table->gpc2clk_num_points = MAX_F_POINTS; table->mclk_num_points = MAX_F_POINTS; - if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK, + if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPC2CLK, &table->gpc2clk_num_points, arb->gpc2clk_f_points)) { nvgpu_err(g, "failed to fetch GPC2CLK frequency points"); goto exit_vf_table; } - if (clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK, + if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_MCLK, &table->mclk_num_points, arb->mclk_f_points)) { nvgpu_err(g, "failed to fetch MCLK frequency points"); goto exit_vf_table; @@ -427,185 +426,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb) nvgpu_clk_arb_update_vf_table(arb); } -static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, - u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, - u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram) -{ - u16 gpc2clk_target, mclk_target; - u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; - u32 mclk_voltuv, mclk_voltuv_sram; - u32 pstate = VF_POINT_INVALID_PSTATE; - struct nvgpu_clk_vf_table *table; - u32 index, index_mclk; - struct nvgpu_clk_vf_point *mclk_vf = NULL; - - do { - gpc2clk_target = *gpc2clk; - mclk_target = *mclk; - gpc2clk_voltuv = 0; - gpc2clk_voltuv_sram = 0; - mclk_voltuv = 0; - mclk_voltuv_sram = 0; - - table = NV_ACCESS_ONCE(arb->current_vf_table); - /* pointer to table can be updated by callback */ - nvgpu_smp_rmb(); - - if (!table) - continue; - if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) { - nvgpu_err(arb->g, "found empty table"); - goto find_exit; - } - /* First we check MCLK to find out which PSTATE we are - * are requesting, and from there try to find the minimum - * GPC2CLK on the same PSTATE that satisfies the request. - * If no GPC2CLK can be found, then we need to up the PSTATE - */ - -recalculate_vf_point: - for (index = 0; index < table->mclk_num_points; index++) { - if (table->mclk_points[index].mem_mhz >= mclk_target) { - mclk_vf = &table->mclk_points[index]; - break; - } - } - if (index == table->mclk_num_points) { - mclk_vf = &table->mclk_points[index-1]; - index = table->mclk_num_points - 1; - } - index_mclk = index; - - /* round up the freq requests */ - for (index = 0; index < table->gpc2clk_num_points; index++) { - pstate = VF_POINT_COMMON_PSTATE( - &table->gpc2clk_points[index], mclk_vf); - - if ((table->gpc2clk_points[index].gpc_mhz >= - gpc2clk_target) && - (pstate != VF_POINT_INVALID_PSTATE)) { - gpc2clk_target = - table->gpc2clk_points[index].gpc_mhz; - *sys2clk = - table->gpc2clk_points[index].sys_mhz; - *xbar2clk = - table->gpc2clk_points[index].xbar_mhz; - - gpc2clk_voltuv = - table->gpc2clk_points[index].uvolt; - gpc2clk_voltuv_sram = - table->gpc2clk_points[index].uvolt_sram; - break; - } - } - - if (index == table->gpc2clk_num_points) { - pstate = VF_POINT_COMMON_PSTATE( - &table->gpc2clk_points[index-1], mclk_vf); - if (pstate != VF_POINT_INVALID_PSTATE) { - gpc2clk_target = - table->gpc2clk_points[index-1].gpc_mhz; - *sys2clk = - table->gpc2clk_points[index-1].sys_mhz; - *xbar2clk = - table->gpc2clk_points[index-1].xbar_mhz; - - gpc2clk_voltuv = - table->gpc2clk_points[index-1].uvolt; - gpc2clk_voltuv_sram = - table->gpc2clk_points[index-1]. - uvolt_sram; - } else if (index_mclk >= table->mclk_num_points - 1) { - /* There is no available combination of MCLK - * and GPC2CLK, we need to fail this - */ - gpc2clk_target = 0; - mclk_target = 0; - pstate = VF_POINT_INVALID_PSTATE; - goto find_exit; - } else { - /* recalculate with higher PSTATE */ - gpc2clk_target = *gpc2clk; - mclk_target = table->mclk_points[index_mclk+1]. - mem_mhz; - goto recalculate_vf_point; - } - } - - mclk_target = mclk_vf->mem_mhz; - mclk_voltuv = mclk_vf->uvolt; - mclk_voltuv_sram = mclk_vf->uvolt_sram; - - } while (!table || - (NV_ACCESS_ONCE(arb->current_vf_table) != table)); - -find_exit: - *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; - *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? - gpc2clk_voltuv_sram : mclk_voltuv_sram; - /* noise unaware vmin */ - *nuvmin = mclk_voltuv; - *nuvmin_sram = mclk_voltuv_sram; - *gpc2clk = gpc2clk_target; - *mclk = mclk_target; - return pstate; -} - -static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, - u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, - u32 voltuv_sram) -{ - struct set_fll_clk fllclk; - struct nvgpu_clk_arb *arb = g->clk_arb; - int status; - - fllclk.gpc2clkmhz = gpc2clk_target; - fllclk.sys2clkmhz = sys2clk_target; - fllclk.xbar2clkmhz = xbar2clk_target; - - fllclk.voltuv = voltuv; - - /* if voltage ascends we do: - * (1) FLL change - * (2) Voltage change - * (3) MCLK change - * If it goes down - * (1) MCLK change - * (2) Voltage change - * (3) FLL change - */ - - /* descending */ - if (voltuv < arb->voltuv_actual) { - status = g->ops.clk.mclk_change(g, mclk_target); - if (status < 0) - return status; - - status = volt_set_voltage(g, voltuv, voltuv_sram); - if (status < 0) - return status; - - status = clk_set_fll_clks(g, &fllclk); - if (status < 0) - return status; - } else { - status = clk_set_fll_clks(g, &fllclk); - if (status < 0) - return status; - - status = volt_set_voltage(g, voltuv, voltuv_sram); - if (status < 0) - return status; - - status = g->ops.clk.mclk_change(g, mclk_target); - if (status < 0) - return status; - } - - return 0; -} - -static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, +u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, struct nvgpu_clk_arb_target *target, u32 alarm) { @@ -704,7 +525,7 @@ static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, return new_alarms_reported; } -static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm) +void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm) { struct nvgpu_clk_arb *arb = g->clk_arb; @@ -726,318 +547,20 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm) current_mask, new_mask))); } -static void nvgpu_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) -{ - struct nvgpu_clk_session *session; - struct nvgpu_clk_dev *dev; - struct nvgpu_clk_dev *tmp; - struct nvgpu_clk_arb_target *target, *actual; - struct gk20a *g = arb->g; - - u32 pstate = VF_POINT_INVALID_PSTATE; - u32 voltuv, voltuv_sram; - bool mclk_set, gpc2clk_set; - u32 nuvmin, nuvmin_sram; - - u32 alarms_notified = 0; - u32 current_alarm; - int status = 0; - - /* Temporary variables for checking target frequency */ - u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target; - u16 gpc2clk_session_target, mclk_session_target; - -#ifdef CONFIG_DEBUG_FS - u64 t0, t1; - struct nvgpu_clk_arb_debug *debug; - -#endif - - clk_arb_dbg(g, " "); - - /* bail out if gpu is down */ - if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST)) - goto exit_arb; - -#ifdef CONFIG_DEBUG_FS - g->ops.ptimer.read_ptimer(g, &t0); -#endif - - /* Only one arbiter should be running */ - gpc2clk_target = 0; - mclk_target = 0; - - nvgpu_spinlock_acquire(&arb->sessions_lock); - nvgpu_list_for_each_entry(session, &arb->sessions, - nvgpu_clk_session, link) { - if (!session->zombie) { - mclk_set = false; - gpc2clk_set = false; - target = (session->target == &session->target_pool[0] ? - &session->target_pool[1] : - &session->target_pool[0]); - nvgpu_spinlock_acquire(&session->session_lock); - if (!nvgpu_list_empty(&session->targets)) { - /* Copy over state */ - target->mclk = session->target->mclk; - target->gpc2clk = session->target->gpc2clk; - /* Query the latest committed request */ - nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets, - nvgpu_clk_dev, node) { - if (!mclk_set && dev->mclk_target_mhz) { - target->mclk = - dev->mclk_target_mhz; - mclk_set = true; - } - if (!gpc2clk_set && - dev->gpc2clk_target_mhz) { - target->gpc2clk = - dev->gpc2clk_target_mhz; - gpc2clk_set = true; - } - nvgpu_ref_get(&dev->refcount); - nvgpu_list_del(&dev->node); - nvgpu_spinlock_acquire(&arb->requests_lock); - nvgpu_list_add(&dev->node, &arb->requests); - nvgpu_spinlock_release(&arb->requests_lock); - } - session->target = target; - } - nvgpu_spinlock_release(&session->session_lock); - - mclk_target = mclk_target > session->target->mclk ? - mclk_target : session->target->mclk; - - gpc2clk_target = - gpc2clk_target > session->target->gpc2clk ? - gpc2clk_target : session->target->gpc2clk; - } - } - nvgpu_spinlock_release(&arb->sessions_lock); - - gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : - arb->gpc2clk_default_mhz; - - if (gpc2clk_target < arb->gpc2clk_min) - gpc2clk_target = arb->gpc2clk_min; - - if (gpc2clk_target > arb->gpc2clk_max) - gpc2clk_target = arb->gpc2clk_max; - - mclk_target = (mclk_target > 0) ? mclk_target : - arb->mclk_default_mhz; - - if (mclk_target < arb->mclk_min) - mclk_target = arb->mclk_min; - - if (mclk_target > arb->mclk_max) - mclk_target = arb->mclk_max; - - sys2clk_target = 0; - xbar2clk_target = 0; - - gpc2clk_session_target = gpc2clk_target; - mclk_session_target = mclk_target; - - /* Query the table for the closest vf point to program */ - pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, - &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv, - &voltuv_sram, &nuvmin, &nuvmin_sram); - - if (pstate == VF_POINT_INVALID_PSTATE) { - arb->status = -EINVAL; - /* make status visible */ - nvgpu_smp_mb(); - goto exit_arb; - } - - if ((gpc2clk_target < gpc2clk_session_target) || - (mclk_target < mclk_session_target)) - nvgpu_clk_arb_set_global_alarm(g, - EVENT(ALARM_TARGET_VF_NOT_POSSIBLE)); - - if ((arb->actual->gpc2clk == gpc2clk_target) && - (arb->actual->mclk == mclk_target) && - (arb->voltuv_actual == voltuv)) { - goto exit_arb; - } - - /* Program clocks */ - /* A change in both mclk of gpc2clk may require a change in voltage */ - - nvgpu_mutex_acquire(&arb->pstate_lock); - status = nvgpu_lpwr_disable_pg(g, false); - - status = clk_pmu_freq_controller_load(g, false, - CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL); - if (status < 0) { - arb->status = status; - nvgpu_mutex_release(&arb->pstate_lock); - - /* make status visible */ - nvgpu_smp_mb(); - goto exit_arb; - } - status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); - if (status < 0) { - arb->status = status; - nvgpu_mutex_release(&arb->pstate_lock); - - /* make status visible */ - nvgpu_smp_mb(); - goto exit_arb; - } - - status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, - sys2clk_target, xbar2clk_target, mclk_target, voltuv, - voltuv_sram); - if (status < 0) { - arb->status = status; - nvgpu_mutex_release(&arb->pstate_lock); - - /* make status visible */ - nvgpu_smp_mb(); - goto exit_arb; - } - - status = clk_pmu_freq_controller_load(g, true, - CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL); - if (status < 0) { - arb->status = status; - nvgpu_mutex_release(&arb->pstate_lock); - - /* make status visible */ - nvgpu_smp_mb(); - goto exit_arb; - } - - status = nvgpu_lwpr_mclk_change(g, pstate); - if (status < 0) { - arb->status = status; - nvgpu_mutex_release(&arb->pstate_lock); - - /* make status visible */ - nvgpu_smp_mb(); - goto exit_arb; - } - - actual = NV_ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ? - &arb->actual_pool[1] : &arb->actual_pool[0]; - - /* do not reorder this pointer */ - nvgpu_smp_rmb(); - actual->gpc2clk = gpc2clk_target; - actual->mclk = mclk_target; - arb->voltuv_actual = voltuv; - actual->pstate = pstate; - arb->status = status; - - /* Make changes visible to other threads */ - nvgpu_smp_wmb(); - arb->actual = actual; - - status = nvgpu_lpwr_enable_pg(g, false); - if (status < 0) { - arb->status = status; - nvgpu_mutex_release(&arb->pstate_lock); - - /* make status visible */ - nvgpu_smp_mb(); - goto exit_arb; - } - - /* status must be visible before atomic inc */ - nvgpu_smp_wmb(); - nvgpu_atomic_inc(&arb->req_nr); - - /* Unlock pstate change for PG */ - nvgpu_mutex_release(&arb->pstate_lock); - - /* VF Update complete */ - nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE)); - - nvgpu_cond_signal_interruptible(&arb->request_wq); - -#ifdef CONFIG_DEBUG_FS - g->ops.ptimer.read_ptimer(g, &t1); - - debug = arb->debug == &arb->debug_pool[0] ? - &arb->debug_pool[1] : &arb->debug_pool[0]; - - memcpy(debug, arb->debug, sizeof(arb->debug_pool[0])); - debug->switch_num++; - - if (debug->switch_num == 1) { - debug->switch_max = debug->switch_min = - debug->switch_avg = (t1-t0)/1000; - debug->switch_std = 0; - } else { - s64 prev_avg; - s64 curr = (t1-t0)/1000; - - debug->switch_max = curr > debug->switch_max ? - curr : debug->switch_max; - debug->switch_min = debug->switch_min ? - (curr < debug->switch_min ? - curr : debug->switch_min) : curr; - prev_avg = debug->switch_avg; - debug->switch_avg = (curr + - (debug->switch_avg * (debug->switch_num-1))) / - debug->switch_num; - debug->switch_std += - (curr - debug->switch_avg) * (curr - prev_avg); - } - /* commit changes before exchanging debug pointer */ - nvgpu_smp_wmb(); - arb->debug = debug; -#endif - -exit_arb: - if (status < 0) { - nvgpu_err(g, "Error in arbiter update"); - nvgpu_clk_arb_set_global_alarm(g, - EVENT(ALARM_CLOCK_ARBITER_FAILED)); - } - - current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); - /* notify completion for all requests */ - nvgpu_spinlock_acquire(&arb->requests_lock); - nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, - nvgpu_clk_dev, node) { - nvgpu_atomic_set(&dev->poll_mask, NVGPU_POLLIN | NVGPU_POLLRDNORM); - nvgpu_clk_arb_event_post_event(dev); - nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); - nvgpu_list_del(&dev->node); - } - nvgpu_spinlock_release(&arb->requests_lock); - - nvgpu_atomic_set(&arb->notification_queue.head, - nvgpu_atomic_read(&arb->notification_queue.tail)); - /* notify event for all users */ - nvgpu_spinlock_acquire(&arb->users_lock); - nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) { - alarms_notified |= - nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); - } - nvgpu_spinlock_release(&arb->users_lock); - - /* clear alarms */ - nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & - ~EVENT(ALARM_GPU_LOST)); -} - /* * Process one scheduled work item. */ static void nvgpu_clk_arb_worker_process_item( struct nvgpu_clk_arb_work_item *work_item) { - clk_arb_dbg(work_item->arb->g, " "); + struct gk20a *g = work_item->arb->g; + + clk_arb_dbg(g, " "); if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE) nvgpu_clk_arb_run_vf_table_cb(work_item->arb); else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB) - nvgpu_clk_arb_run_arbiter_cb(work_item->arb); + g->ops.clk_arb.clk_arb_run_arbiter_cb(work_item->arb); } /** @@ -1204,7 +727,7 @@ void nvgpu_clk_arb_worker_enqueue(struct gk20a *g, /** * Initialize the clk arb worker's metadata and start the background thread. */ -static int nvgpu_clk_arb_worker_init(struct gk20a *g) +int nvgpu_clk_arb_worker_init(struct gk20a *g) { int err; @@ -1227,149 +750,12 @@ error_check: int nvgpu_clk_arb_init_arbiter(struct gk20a *g) { - struct nvgpu_clk_arb *arb; - u16 default_mhz; - int err; - int index; - struct nvgpu_clk_vf_table *table; - - clk_arb_dbg(g, " "); - - if (!g->ops.clk_arb.get_arbiter_clk_domains) + if (!g->ops.clk.support_clk_freq_controller || + !g->ops.clk_arb.get_arbiter_clk_domains) { return 0; - - arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); - if (!arb) - return -ENOMEM; - - err = nvgpu_mutex_init(&arb->pstate_lock); - if (err) - goto mutex_fail; - nvgpu_spinlock_init(&arb->sessions_lock); - nvgpu_spinlock_init(&arb->users_lock); - nvgpu_spinlock_init(&arb->requests_lock); - - arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); - if (!arb->mclk_f_points) { - err = -ENOMEM; - goto init_fail; - } - - arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); - if (!arb->gpc2clk_f_points) { - err = -ENOMEM; - goto init_fail; - } - - for (index = 0; index < 2; index++) { - table = &arb->vf_table_pool[index]; - table->gpc2clk_num_points = MAX_F_POINTS; - table->mclk_num_points = MAX_F_POINTS; - - table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS, - sizeof(struct nvgpu_clk_vf_point)); - if (!table->gpc2clk_points) { - err = -ENOMEM; - goto init_fail; - } - - - table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS, - sizeof(struct nvgpu_clk_vf_point)); - if (!table->mclk_points) { - err = -ENOMEM; - goto init_fail; - } - } - - g->clk_arb = arb; - arb->g = g; - - err = g->ops.clk_arb.get_arbiter_clk_default(g, - CTRL_CLK_DOMAIN_MCLK, &default_mhz); - if (err < 0) { - err = -EINVAL; - goto init_fail; } - arb->mclk_default_mhz = default_mhz; - - err = g->ops.clk_arb.get_arbiter_clk_default(g, - CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); - if (err < 0) { - err = -EINVAL; - goto init_fail; - } - - arb->gpc2clk_default_mhz = default_mhz; - - arb->actual = &arb->actual_pool[0]; - - nvgpu_atomic_set(&arb->req_nr, 0); - - nvgpu_atomic64_set(&arb->alarm_mask, 0); - err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, - DEFAULT_EVENT_NUMBER); - if (err < 0) - goto init_fail; - - nvgpu_init_list_node(&arb->users); - nvgpu_init_list_node(&arb->sessions); - nvgpu_init_list_node(&arb->requests); - - nvgpu_cond_init(&arb->request_wq); - - nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item); - nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); - arb->update_vf_table_work_item.arb = arb; - arb->update_arb_work_item.arb = arb; - arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE; - arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; - - err = nvgpu_clk_arb_worker_init(g); - if (err < 0) - goto init_fail; - -#ifdef CONFIG_DEBUG_FS - arb->debug = &arb->debug_pool[0]; - - if (!arb->debugfs_set) { - if (nvgpu_clk_arb_debugfs_init(g)) - arb->debugfs_set = true; - } -#endif - err = clk_vf_point_cache(g); - if (err < 0) - goto init_fail; - - err = nvgpu_clk_arb_update_vf_table(arb); - if (err < 0) - goto init_fail; - do { - /* Check that first run is completed */ - nvgpu_smp_mb(); - NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, - nvgpu_atomic_read(&arb->req_nr), 0); - } while (!nvgpu_atomic_read(&arb->req_nr)); - - - return arb->status; - -init_fail: - nvgpu_kfree(g, arb->gpc2clk_f_points); - nvgpu_kfree(g, arb->mclk_f_points); - - for (index = 0; index < 2; index++) { - nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); - nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); - } - - nvgpu_mutex_destroy(&arb->pstate_lock); - -mutex_fail: - nvgpu_kfree(g, arb); - - return err; + return g->ops.clk_arb.arbiter_clk_init(g); } void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g) @@ -1396,22 +782,10 @@ static void nvgpu_clk_arb_worker_deinit(struct gk20a *g) void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) { struct nvgpu_clk_arb *arb = g->clk_arb; - int index; if (arb) { nvgpu_clk_arb_worker_deinit(g); - - nvgpu_kfree(g, arb->gpc2clk_f_points); - nvgpu_kfree(g, arb->mclk_f_points); - - for (index = 0; index < 2; index++) { - nvgpu_kfree(g, - arb->vf_table_pool[index].gpc2clk_points); - nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); - } - nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); - nvgpu_kfree(g, g->clk_arb); - g->clk_arb = NULL; + g->ops.clk_arb.clk_arb_cleanup(g->clk_arb); } } @@ -1423,8 +797,10 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, clk_arb_dbg(g, " "); - if (!g->ops.clk_arb.get_arbiter_clk_domains) + if (!g->ops.clk.support_clk_freq_controller || + !g->ops.clk_arb.get_arbiter_clk_domains) { return 0; + } session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session)); if (!session) @@ -1576,16 +952,16 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, switch (api_domain) { case NVGPU_CLK_DOMAIN_GPCCLK: - err = clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPC2CLK, - max_points, fpoints); + err = g->ops.clk.clk_domain_get_f_points(g, + CTRL_CLK_DOMAIN_GPC2CLK, max_points, fpoints); if (err || !fpoints) return err; for (i = 0; i < *max_points; i++) fpoints[i] /= 2; return 0; case NVGPU_CLK_DOMAIN_MCLK: - return clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_MCLK, - max_points, fpoints); + return g->ops.clk.clk_domain_get_f_points(g, + CTRL_CLK_DOMAIN_MCLK, max_points, fpoints); default: return -EINVAL; } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ac1226fc..bdf3a168 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -41,6 +41,7 @@ struct nvgpu_nvhost_dev; struct nvgpu_cpu_time_correlation_sample; struct nvgpu_mem_sgt; struct nvgpu_warpstate; +struct nvgpu_clk_session; struct nvgpu_clk_arb; #ifdef CONFIG_GK20A_CTXSW_TRACE struct nvgpu_gpu_ctxsw_trace_filter; @@ -1083,6 +1084,9 @@ struct gpu_ops { int (*init_clk_support)(struct gk20a *g); int (*suspend_clk_support)(struct gk20a *g); u32 (*get_crystal_clk_hz)(struct gk20a *g); + int (*clk_domain_get_f_points)(struct gk20a *g, + u32 clkapidomain, u32 *pfpointscount, + u16 *pfreqpointsinmhz); unsigned long (*measure_freq)(struct gk20a *g, u32 api_domain); unsigned long (*get_rate)(struct gk20a *g, u32 api_domain); int (*set_rate)(struct gk20a *g, u32 api_domain, unsigned long rate); @@ -1107,15 +1111,18 @@ struct gpu_ops { bool support_lpwr_pg; } clk; struct { + int (*arbiter_clk_init)(struct gk20a *g); u32 (*get_arbiter_clk_domains)(struct gk20a *g); int (*get_arbiter_clk_range)(struct gk20a *g, u32 api_domain, u16 *min_mhz, u16 *max_mhz); int (*get_arbiter_clk_default)(struct gk20a *g, u32 api_domain, u16 *default_mhz); + void (*clk_arb_run_arbiter_cb)(struct nvgpu_clk_arb *arb); /* This function is inherently unsafe to call while * arbiter is running arbiter must be blocked * before calling this function */ int (*get_current_pstate)(struct gk20a *g); + void (*clk_arb_cleanup)(struct nvgpu_clk_arb *arb); } clk_arb; struct { int (*handle_pmu_perf_event)(struct gk20a *g, void *pmu_msg); diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c index ca8015d6..860344d0 100644 --- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c +++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,6 +21,7 @@ */ #include "gk20a/gk20a.h" +#include #include "clk_arb_gp106.h" @@ -109,3 +110,649 @@ int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, return 0; } + +int gp106_init_clk_arbiter(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb; + u16 default_mhz; + int err; + int index; + struct nvgpu_clk_vf_table *table; + + clk_arb_dbg(g, " "); + + arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb)); + if (!arb) + return -ENOMEM; + + err = nvgpu_mutex_init(&arb->pstate_lock); + if (err) + goto mutex_fail; + nvgpu_spinlock_init(&arb->sessions_lock); + nvgpu_spinlock_init(&arb->users_lock); + nvgpu_spinlock_init(&arb->requests_lock); + + arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (!arb->mclk_f_points) { + err = -ENOMEM; + goto init_fail; + } + + arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16)); + if (!arb->gpc2clk_f_points) { + err = -ENOMEM; + goto init_fail; + } + + for (index = 0; index < 2; index++) { + table = &arb->vf_table_pool[index]; + table->gpc2clk_num_points = MAX_F_POINTS; + table->mclk_num_points = MAX_F_POINTS; + + table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (!table->gpc2clk_points) { + err = -ENOMEM; + goto init_fail; + } + + + table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point)); + if (!table->mclk_points) { + err = -ENOMEM; + goto init_fail; + } + } + + g->clk_arb = arb; + arb->g = g; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_MCLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->mclk_default_mhz = default_mhz; + + err = g->ops.clk_arb.get_arbiter_clk_default(g, + CTRL_CLK_DOMAIN_GPC2CLK, &default_mhz); + if (err < 0) { + err = -EINVAL; + goto init_fail; + } + + arb->gpc2clk_default_mhz = default_mhz; + + arb->actual = &arb->actual_pool[0]; + + nvgpu_atomic_set(&arb->req_nr, 0); + + nvgpu_atomic64_set(&arb->alarm_mask, 0); + err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue, + DEFAULT_EVENT_NUMBER); + if (err < 0) + goto init_fail; + + nvgpu_init_list_node(&arb->users); + nvgpu_init_list_node(&arb->sessions); + nvgpu_init_list_node(&arb->requests); + + nvgpu_cond_init(&arb->request_wq); + + nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item); + nvgpu_init_list_node(&arb->update_arb_work_item.worker_item); + arb->update_vf_table_work_item.arb = arb; + arb->update_arb_work_item.arb = arb; + arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE; + arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB; + + err = nvgpu_clk_arb_worker_init(g); + if (err < 0) + goto init_fail; + +#ifdef CONFIG_DEBUG_FS + arb->debug = &arb->debug_pool[0]; + + if (!arb->debugfs_set) { + if (nvgpu_clk_arb_debugfs_init(g)) + arb->debugfs_set = true; + } +#endif + err = clk_vf_point_cache(g); + if (err < 0) + goto init_fail; + + err = nvgpu_clk_arb_update_vf_table(arb); + if (err < 0) + goto init_fail; + do { + /* Check that first run is completed */ + nvgpu_smp_mb(); + NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq, + nvgpu_atomic_read(&arb->req_nr), 0); + } while (!nvgpu_atomic_read(&arb->req_nr)); + + + return arb->status; + +init_fail: + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&arb->pstate_lock); + +mutex_fail: + nvgpu_kfree(g, arb); + + return err; +} + +static u8 nvgpu_clk_arb_find_vf_point(struct nvgpu_clk_arb *arb, + u16 *gpc2clk, u16 *sys2clk, u16 *xbar2clk, u16 *mclk, + u32 *voltuv, u32 *voltuv_sram, u32 *nuvmin, u32 *nuvmin_sram) +{ + u16 gpc2clk_target, mclk_target; + u32 gpc2clk_voltuv, gpc2clk_voltuv_sram; + u32 mclk_voltuv, mclk_voltuv_sram; + u32 pstate = VF_POINT_INVALID_PSTATE; + struct nvgpu_clk_vf_table *table; + u32 index, index_mclk; + struct nvgpu_clk_vf_point *mclk_vf = NULL; + + do { + gpc2clk_target = *gpc2clk; + mclk_target = *mclk; + gpc2clk_voltuv = 0; + gpc2clk_voltuv_sram = 0; + mclk_voltuv = 0; + mclk_voltuv_sram = 0; + + table = NV_ACCESS_ONCE(arb->current_vf_table); + /* pointer to table can be updated by callback */ + nvgpu_smp_rmb(); + + if (!table) + continue; + if ((!table->gpc2clk_num_points) || (!table->mclk_num_points)) { + nvgpu_err(arb->g, "found empty table"); + goto find_exit; + } + /* First we check MCLK to find out which PSTATE we are + * are requesting, and from there try to find the minimum + * GPC2CLK on the same PSTATE that satisfies the request. + * If no GPC2CLK can be found, then we need to up the PSTATE + */ + +recalculate_vf_point: + for (index = 0; index < table->mclk_num_points; index++) { + if (table->mclk_points[index].mem_mhz >= mclk_target) { + mclk_vf = &table->mclk_points[index]; + break; + } + } + if (index == table->mclk_num_points) { + mclk_vf = &table->mclk_points[index-1]; + index = table->mclk_num_points - 1; + } + index_mclk = index; + + /* round up the freq requests */ + for (index = 0; index < table->gpc2clk_num_points; index++) { + pstate = VF_POINT_COMMON_PSTATE( + &table->gpc2clk_points[index], mclk_vf); + + if ((table->gpc2clk_points[index].gpc_mhz >= + gpc2clk_target) && + (pstate != VF_POINT_INVALID_PSTATE)) { + gpc2clk_target = + table->gpc2clk_points[index].gpc_mhz; + *sys2clk = + table->gpc2clk_points[index].sys_mhz; + *xbar2clk = + table->gpc2clk_points[index].xbar_mhz; + + gpc2clk_voltuv = + table->gpc2clk_points[index].uvolt; + gpc2clk_voltuv_sram = + table->gpc2clk_points[index].uvolt_sram; + break; + } + } + + if (index == table->gpc2clk_num_points) { + pstate = VF_POINT_COMMON_PSTATE( + &table->gpc2clk_points[index-1], mclk_vf); + if (pstate != VF_POINT_INVALID_PSTATE) { + gpc2clk_target = + table->gpc2clk_points[index-1].gpc_mhz; + *sys2clk = + table->gpc2clk_points[index-1].sys_mhz; + *xbar2clk = + table->gpc2clk_points[index-1].xbar_mhz; + + gpc2clk_voltuv = + table->gpc2clk_points[index-1].uvolt; + gpc2clk_voltuv_sram = + table->gpc2clk_points[index-1]. + uvolt_sram; + } else if (index_mclk >= table->mclk_num_points - 1) { + /* There is no available combination of MCLK + * and GPC2CLK, we need to fail this + */ + gpc2clk_target = 0; + mclk_target = 0; + pstate = VF_POINT_INVALID_PSTATE; + goto find_exit; + } else { + /* recalculate with higher PSTATE */ + gpc2clk_target = *gpc2clk; + mclk_target = table->mclk_points[index_mclk+1]. + mem_mhz; + goto recalculate_vf_point; + } + } + + mclk_target = mclk_vf->mem_mhz; + mclk_voltuv = mclk_vf->uvolt; + mclk_voltuv_sram = mclk_vf->uvolt_sram; + + } while (!table || + (NV_ACCESS_ONCE(arb->current_vf_table) != table)); + +find_exit: + *voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; + *voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? + gpc2clk_voltuv_sram : mclk_voltuv_sram; + /* noise unaware vmin */ + *nuvmin = mclk_voltuv; + *nuvmin_sram = mclk_voltuv_sram; + *gpc2clk = gpc2clk_target < *gpc2clk ? gpc2clk_target : *gpc2clk; + *mclk = mclk_target; + return pstate; +} + +static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target, + u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv, + u32 voltuv_sram) +{ + struct set_fll_clk fllclk; + struct nvgpu_clk_arb *arb = g->clk_arb; + int status; + + fllclk.gpc2clkmhz = gpc2clk_target; + fllclk.sys2clkmhz = sys2clk_target; + fllclk.xbar2clkmhz = xbar2clk_target; + + fllclk.voltuv = voltuv; + + /* if voltage ascends we do: + * (1) FLL change + * (2) Voltage change + * (3) MCLK change + * If it goes down + * (1) MCLK change + * (2) Voltage change + * (3) FLL change + */ + + /* descending */ + if (voltuv < arb->voltuv_actual) { + status = g->ops.clk.mclk_change(g, mclk_target); + if (status < 0) + return status; + + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) + return status; + + status = clk_set_fll_clks(g, &fllclk); + if (status < 0) + return status; + } else { + status = clk_set_fll_clks(g, &fllclk); + if (status < 0) + return status; + + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) + return status; + + status = g->ops.clk.mclk_change(g, mclk_target); + if (status < 0) + return status; + } + + return 0; +} + +void gp106_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) +{ + struct nvgpu_clk_session *session; + struct nvgpu_clk_dev *dev; + struct nvgpu_clk_dev *tmp; + struct nvgpu_clk_arb_target *target, *actual; + struct gk20a *g = arb->g; + + u32 pstate = VF_POINT_INVALID_PSTATE; + u32 voltuv, voltuv_sram; + bool mclk_set, gpc2clk_set; + u32 nuvmin, nuvmin_sram; + + u32 alarms_notified = 0; + u32 current_alarm; + int status = 0; + + /* Temporary variables for checking target frequency */ + u16 gpc2clk_target, sys2clk_target, xbar2clk_target, mclk_target; + u16 gpc2clk_session_target, mclk_session_target; + +#ifdef CONFIG_DEBUG_FS + u64 t0, t1; + struct nvgpu_clk_arb_debug *debug; + +#endif + + clk_arb_dbg(g, " "); + + /* bail out if gpu is down */ + if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST)) + goto exit_arb; + +#ifdef CONFIG_DEBUG_FS + g->ops.ptimer.read_ptimer(g, &t0); +#endif + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + mclk_target = 0; + + nvgpu_spinlock_acquire(&arb->sessions_lock); + nvgpu_list_for_each_entry(session, &arb->sessions, + nvgpu_clk_session, link) { + if (!session->zombie) { + mclk_set = false; + gpc2clk_set = false; + target = (session->target == &session->target_pool[0] ? + &session->target_pool[1] : + &session->target_pool[0]); + nvgpu_spinlock_acquire(&session->session_lock); + if (!nvgpu_list_empty(&session->targets)) { + /* Copy over state */ + target->mclk = session->target->mclk; + target->gpc2clk = session->target->gpc2clk; + /* Query the latest committed request */ + nvgpu_list_for_each_entry_safe(dev, tmp, + &session->targets, nvgpu_clk_dev, node) { + if (!mclk_set && dev->mclk_target_mhz) { + target->mclk = + dev->mclk_target_mhz; + mclk_set = true; + } + if (!gpc2clk_set && + dev->gpc2clk_target_mhz) { + target->gpc2clk = + dev->gpc2clk_target_mhz; + gpc2clk_set = true; + } + nvgpu_ref_get(&dev->refcount); + nvgpu_list_del(&dev->node); + nvgpu_spinlock_acquire( + &arb->requests_lock); + nvgpu_list_add( + &dev->node, &arb->requests); + nvgpu_spinlock_release(&arb->requests_lock); + } + session->target = target; + } + nvgpu_spinlock_release( + &session->session_lock); + + mclk_target = mclk_target > session->target->mclk ? + mclk_target : session->target->mclk; + + gpc2clk_target = + gpc2clk_target > session->target->gpc2clk ? + gpc2clk_target : session->target->gpc2clk; + } + } + nvgpu_spinlock_release(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + if (gpc2clk_target < arb->gpc2clk_min) + gpc2clk_target = arb->gpc2clk_min; + + if (gpc2clk_target > arb->gpc2clk_max) + gpc2clk_target = arb->gpc2clk_max; + + mclk_target = (mclk_target > 0) ? mclk_target : + arb->mclk_default_mhz; + + if (mclk_target < arb->mclk_min) + mclk_target = arb->mclk_min; + + if (mclk_target > arb->mclk_max) + mclk_target = arb->mclk_max; + + sys2clk_target = 0; + xbar2clk_target = 0; + + gpc2clk_session_target = gpc2clk_target; + mclk_session_target = mclk_target; + + /* Query the table for the closest vf point to program */ + pstate = nvgpu_clk_arb_find_vf_point(arb, &gpc2clk_target, + &sys2clk_target, &xbar2clk_target, &mclk_target, &voltuv, + &voltuv_sram, &nuvmin, &nuvmin_sram); + + if (pstate == VF_POINT_INVALID_PSTATE) { + arb->status = -EINVAL; + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + + if ((gpc2clk_target < gpc2clk_session_target) || + (mclk_target < mclk_session_target)) + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_TARGET_VF_NOT_POSSIBLE)); + + if ((arb->actual->gpc2clk == gpc2clk_target) && + (arb->actual->mclk == mclk_target) && + (arb->voltuv_actual == voltuv)) { + goto exit_arb; + } + + /* Program clocks */ + /* A change in both mclk of gpc2clk may require a change in voltage */ + + nvgpu_mutex_acquire(&arb->pstate_lock); + status = nvgpu_lpwr_disable_pg(g, false); + + status = clk_pmu_freq_controller_load(g, false, + CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + status = volt_set_noiseaware_vmin(g, nuvmin, nuvmin_sram); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + + status = nvgpu_clk_arb_change_vf_point(g, gpc2clk_target, + sys2clk_target, xbar2clk_target, mclk_target, voltuv, + voltuv_sram); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + + status = clk_pmu_freq_controller_load(g, true, + CTRL_CLK_CLK_FREQ_CONTROLLER_ID_ALL); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + + status = nvgpu_lwpr_mclk_change(g, pstate); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + + actual = NV_ACCESS_ONCE(arb->actual) == &arb->actual_pool[0] ? + &arb->actual_pool[1] : &arb->actual_pool[0]; + + /* do not reorder this pointer */ + nvgpu_smp_rmb(); + actual->gpc2clk = gpc2clk_target; + actual->mclk = mclk_target; + arb->voltuv_actual = voltuv; + actual->pstate = pstate; + arb->status = status; + + /* Make changes visible to other threads */ + nvgpu_smp_wmb(); + arb->actual = actual; + + status = nvgpu_lpwr_enable_pg(g, false); + if (status < 0) { + arb->status = status; + nvgpu_mutex_release(&arb->pstate_lock); + + /* make status visible */ + nvgpu_smp_mb(); + goto exit_arb; + } + + /* status must be visible before atomic inc */ + nvgpu_smp_wmb(); + nvgpu_atomic_inc(&arb->req_nr); + + /* Unlock pstate change for PG */ + nvgpu_mutex_release(&arb->pstate_lock); + + /* VF Update complete */ + nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE)); + + nvgpu_cond_signal_interruptible(&arb->request_wq); + +#ifdef CONFIG_DEBUG_FS + g->ops.ptimer.read_ptimer(g, &t1); + + debug = arb->debug == &arb->debug_pool[0] ? + &arb->debug_pool[1] : &arb->debug_pool[0]; + + memcpy(debug, arb->debug, sizeof(arb->debug_pool[0])); + debug->switch_num++; + + if (debug->switch_num == 1) { + debug->switch_max = debug->switch_min = + debug->switch_avg = (t1-t0)/1000; + debug->switch_std = 0; + } else { + s64 prev_avg; + s64 curr = (t1-t0)/1000; + + debug->switch_max = curr > debug->switch_max ? + curr : debug->switch_max; + debug->switch_min = debug->switch_min ? + (curr < debug->switch_min ? + curr : debug->switch_min) : curr; + prev_avg = debug->switch_avg; + debug->switch_avg = (curr + + (debug->switch_avg * (debug->switch_num-1))) / + debug->switch_num; + debug->switch_std += + (curr - debug->switch_avg) * (curr - prev_avg); + } + /* commit changes before exchanging debug pointer */ + nvgpu_smp_wmb(); + arb->debug = debug; +#endif + +exit_arb: + if (status < 0) { + nvgpu_err(g, "Error in arbiter update"); + nvgpu_clk_arb_set_global_alarm(g, + EVENT(ALARM_CLOCK_ARBITER_FAILED)); + } + + current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask); + /* notify completion for all requests */ + nvgpu_spinlock_acquire(&arb->requests_lock); + nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests, + nvgpu_clk_dev, node) { + nvgpu_atomic_set(&dev->poll_mask, + NVGPU_POLLIN | NVGPU_POLLRDNORM); + nvgpu_clk_arb_event_post_event(dev); + nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd); + nvgpu_list_del(&dev->node); + } + nvgpu_spinlock_release(&arb->requests_lock); + + nvgpu_atomic_set(&arb->notification_queue.head, + nvgpu_atomic_read(&arb->notification_queue.tail)); + /* notify event for all users */ + nvgpu_spinlock_acquire(&arb->users_lock); + nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) { + alarms_notified |= + nvgpu_clk_arb_notify(dev, arb->actual, current_alarm); + } + nvgpu_spinlock_release(&arb->users_lock); + + /* clear alarms */ + nvgpu_clk_arb_clear_global_alarm(g, alarms_notified & + ~EVENT(ALARM_GPU_LOST)); +} + +void gp106_clk_arb_cleanup(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + int index; + + nvgpu_kfree(g, arb->gpc2clk_f_points); + nvgpu_kfree(g, arb->mclk_f_points); + + for (index = 0; index < 2; index++) { + nvgpu_kfree(g, + arb->vf_table_pool[index].gpc2clk_points); + nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points); + } + + nvgpu_mutex_destroy(&g->clk_arb->pstate_lock); + nvgpu_kfree(g, g->clk_arb); + + g->clk_arb = NULL; +} \ No newline at end of file diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h index fc4657f5..e2b2834c 100644 --- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h +++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,10 +22,16 @@ #ifndef CLK_ARB_GP106_H #define CLK_ARB_GP106_H +struct nvgpu_clk_session; +struct nvgpu_clk_arb; + u32 gp106_get_arbiter_clk_domains(struct gk20a *g); int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, u16 *min_mhz, u16 *max_mhz); int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, u16 *default_mhz); +int gp106_init_clk_arbiter(struct gk20a *g); +void gp106_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb); +void gp106_clk_arb_cleanup(struct nvgpu_clk_arb *arb); #endif /* CLK_ARB_GP106_H */ diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.c b/drivers/gpu/nvgpu/gp106/clk_gp106.c index 24b07112..dd7a2dd6 100644 --- a/drivers/gpu/nvgpu/gp106/clk_gp106.c +++ b/drivers/gpu/nvgpu/gp106/clk_gp106.c @@ -32,7 +32,9 @@ #include #include #include +#include +#include "clk/clk.h" #include "gk20a/gk20a.h" #include "gp106/mclk_gp106.h" @@ -243,6 +245,37 @@ read_err: } +int gp106_clk_domain_get_f_points( + struct gk20a *g, + u32 clkapidomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz) +{ + int status = -EINVAL; + struct clk_domain *pdomain; + u8 i; + struct clk_pmupstate *pclk = &g->clk_pmu; + + if (pfpointscount == NULL) + return -EINVAL; + + if ((pfreqpointsinmhz == NULL) && (*pfpointscount != 0)) + return -EINVAL; + + BOARDOBJGRP_FOR_EACH(&(pclk->clk_domainobjs.super.super), + struct clk_domain *, pdomain, i) { + if (pdomain->api_domain == clkapidomain) { + status = pdomain->clkdomainclkgetfpoints(g, pclk, + pdomain, pfpointscount, + pfreqpointsinmhz, + CLK_PROG_VFE_ENTRY_LOGIC); + return status; + } + } + return status; +} + + #ifdef CONFIG_DEBUG_FS static int gp106_get_rate_show(void *data , u64 *val) { struct namemap_cfg *c = (struct namemap_cfg *) data; diff --git a/drivers/gpu/nvgpu/gp106/clk_gp106.h b/drivers/gpu/nvgpu/gp106/clk_gp106.h index 97baa224..b7ab3164 100644 --- a/drivers/gpu/nvgpu/gp106/clk_gp106.h +++ b/drivers/gpu/nvgpu/gp106/clk_gp106.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,5 +56,10 @@ int gp106_init_clk_support(struct gk20a *g); u32 gp106_crystal_clk_hz(struct gk20a *g); unsigned long gp106_clk_measure_freq(struct gk20a *g, u32 api_domain); int gp106_suspend_clk_support(struct gk20a *g); +int gp106_clk_domain_get_f_points( + struct gk20a *g, + u32 clkapidomain, + u32 *pfpointscount, + u16 *pfreqpointsinmhz); #endif /* CLK_GP106_H */ diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 78a3ea63..167bfaac 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -675,6 +675,7 @@ static const struct gpu_ops gp106_ops = { .get_crystal_clk_hz = gp106_crystal_clk_hz, .measure_freq = gp106_clk_measure_freq, .suspend_clk_support = gp106_suspend_clk_support, + .clk_domain_get_f_points = gp106_clk_domain_get_f_points, .mclk_init = gp106_mclk_init, .mclk_change = gp106_mclk_change, .mclk_deinit = gp106_mclk_deinit, @@ -684,6 +685,9 @@ static const struct gpu_ops gp106_ops = { .get_arbiter_clk_range = gp106_get_arbiter_clk_range, .get_arbiter_clk_default = gp106_get_arbiter_clk_default, .get_current_pstate = nvgpu_clk_arb_get_current_pstate, + .arbiter_clk_init = gp106_init_clk_arbiter, + .clk_arb_run_arbiter_cb = gp106_clk_arb_run_arbiter_cb, + .clk_arb_cleanup = gp106_clk_arb_cleanup, }, .regops = { .exec_regops = exec_regops_gk20a, @@ -849,6 +853,7 @@ int gp106_init_hal(struct gk20a *g) gops->clk.mclk_init = gp106_ops.clk.mclk_init; gops->clk.mclk_change = gp106_ops.clk.mclk_change; gops->clk.mclk_deinit = gp106_ops.clk.mclk_deinit; + gops->clk.clk_domain_get_f_points = gp106_ops.clk.clk_domain_get_f_points; gops->clk_arb = gp106_ops.clk_arb; gops->regops = gp106_ops.regops; diff --git a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h index 328e03b5..e63545df 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h +++ b/drivers/gpu/nvgpu/include/nvgpu/clk_arb.h @@ -287,6 +287,10 @@ nvgpu_clk_arb_work_item_from_worker_item(struct nvgpu_list_node *node) void nvgpu_clk_arb_worker_enqueue(struct gk20a *g, struct nvgpu_clk_arb_work_item *work_item); +int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb); + +int nvgpu_clk_arb_worker_init(struct gk20a *g); + int nvgpu_clk_arb_init_arbiter(struct gk20a *g); int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, @@ -338,12 +342,20 @@ void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock); void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g); +void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm); + void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm); +void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm); + void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount); void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount); +u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, + struct nvgpu_clk_arb_target *target, + u32 alarm); + int nvgpu_clk_notification_queue_alloc(struct gk20a *g, struct nvgpu_clk_notification_queue *queue, size_t events_number); diff --git a/drivers/gpu/nvgpu/os/posix/clk_arb.c b/drivers/gpu/nvgpu/os/posix/clk_arb.c index 2214b37b..63ab0f13 100644 --- a/drivers/gpu/nvgpu/os/posix/clk_arb.c +++ b/drivers/gpu/nvgpu/os/posix/clk_arb.c @@ -42,6 +42,16 @@ int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, return -ENOSYS; } +int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb) +{ + return -ENOSYS; +} + +int nvgpu_clk_arb_worker_init(struct gk20a *g) +{ + return -ENOSYS; +} + int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, u32 api_domain, u16 *actual_mhz) { @@ -54,7 +64,6 @@ int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, return -ENOSYS; } - int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, u32 api_domain, u32 *max_points, u16 *fpoints) @@ -127,6 +136,17 @@ int nvgpu_clk_arb_install_request_fd(struct gk20a *g, return -ENOSYS; } +u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev, + struct nvgpu_clk_arb_target *target, + u32 alarm) +{ + return 0; +} + +void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount) +{ +} + void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g) { } @@ -147,3 +167,27 @@ void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g) void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm) { } + +void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm) +{ +} + +void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm) +{ +} + +void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev) +{ +} + +void nvgpu_clk_arb_worker_enqueue(struct gk20a *g, + struct nvgpu_clk_arb_work_item *work_item) +{ +} + +int nvgpu_clk_notification_queue_alloc(struct gk20a *g, + struct nvgpu_clk_notification_queue *queue, + size_t events_number) +{ + return -ENOSYS; +} -- cgit v1.2.2