gpu: nvgpu: tegra gpu to emc frequency mapping

o emc clock scaling Only take the gpu load into account for gpu frequencies below fmax @ Vmin. The granularity of frequency steps is much larger in the gpu frequency range below fmax @ Vmin than in the upper frequency range. Above fmax @ Vmin, keep the gpu unblocked and disregard the gpu load when evaluating the emc target. o tegra_postscale() Round the new emc target to nearest discrete frequency. Set the emc frequency only if the new emc target is different from the previously requested emc frequency to avoid the penalty of the locks inside clk_set_rate(). Bug 1591643 Change-Id: I1a1a8734a74569c4d57b6e2bda4c11b2bda3f5f3 Signed-off-by: Anders Kugler <akugler@nvidia.com> Reviewed-on: http://git-master/r/680937 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Ilan Aelion <iaelion@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Anders Kugler <akugler@nvidia.com> 2015-02-03 21:58:19 -0500
committer: Dan Willemsen <dwillemsen@nvidia.com> 2015-04-04 22:00:48 -0400
commit: da39577432c482830ffebf9318ae968a570391e8 (patch)
tree: ac495875939ecccdad757f480982333858c8f353 /drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
parent: 29ff732702dbf146e99e69dc062a4d82f2040f6e (diff)
1 files changed, 63 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index 8609e3ae..fea2c774 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -25,6 +25,10 @@
 #include <linux/nvmap.h>
 #include <linux/tegra_pm_domains.h>
 #include <linux/platform/tegra/clock.h>
+#include <linux/platform/tegra/dvfs.h>
+#include <linux/platform/tegra/common.h>
+#include <linux/clk/tegra.h>
+#include <mach/tegra_emc.h>
 #include "gk20a.h"
 #include "hal_gk20a.h"
@@ -41,6 +45,7 @@ static struct gk20a_platform t132_gk20a_tegra_platform;
 struct gk20a_emc_params {
        long bw_ratio;
+        long freq_last_set;
 };
 #define MHZ_TO_HZ(x) ((x) * 1000000)
@@ -150,19 +155,26 @@ fail:
 * This function returns the minimum emc clock based on gpu frequency
 */
-static long gk20a_tegra_get_emc_rate(struct gk20a *g,
+static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
-                              struct gk20a_emc_params *emc_params, long freq)
+                                struct gk20a_emc_params *emc_params)
 {
-        long hz;
+        unsigned long gpu_freq, gpu_fmax_at_vmin;
+        unsigned long emc_rate, emc_scale;
-        freq = HZ_TO_MHZ(freq);
+        gpu_freq = clk_get_rate(g->clk.tegra_clk);
+        gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
+                clk_get_parent(g->clk.tegra_clk));
-        hz = (freq * emc_params->bw_ratio);
+        /* When scaling emc, only account for the gpu load below fmax@vmin */
-        hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000;
+        if (gpu_freq < gpu_fmax_at_vmin)
+                emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
+        else
+                emc_scale = g->emc3d_ratio;
-        hz = MHZ_TO_HZ(hz);
+        emc_rate =
+                (HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
-        return hz;
+        return MHZ_TO_HZ(emc_rate);
 }
 /*
@@ -178,11 +190,50 @@ static void gk20a_tegra_postscale(struct platform_device *pdev,
        struct gk20a_scale_profile *profile = platform->g->scale_profile;
        struct gk20a_emc_params *emc_params = profile->private_data;
        struct gk20a *g = get_gk20a(pdev);
+        struct clk *emc_clk = platform->clk[2];
+        enum tegra_chipid chip_id = tegra_get_chip_id();
+        unsigned long emc_target;
+        long emc_freq_lower, emc_freq_upper, emc_freq_rounded;
-        long after = clk_get_rate(g->clk.tegra_clk);
+        emc_target = gk20a_tegra_get_emc_rate(g, emc_params);
-        long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after);
-        clk_set_rate(platform->clk[2], emc_target);
+        switch (chip_id) {
+        case TEGRA_CHIPID_TEGRA12:
+        case TEGRA_CHIPID_TEGRA13:
+                /* T124 and T132 don't apply any rounding. The resulting
+                 * emc frequency gets implicitly rounded up after issuing
+                 * the clock_set_request.
+                 * So explicitly round up the emc target here to achieve
+                 * the same outcome. */
+                emc_freq_rounded =
+                        tegra_emc_round_rate_updown(emc_target, true);
+                break;
+        case TEGRA_CHIPID_TEGRA21:
+                emc_freq_lower = tegra_emc_round_rate_updown(emc_target, false);
+                emc_freq_upper = tegra_emc_round_rate_updown(emc_target, true);
+                /* round to the nearest frequency step */
+                if (emc_target < (emc_freq_lower + emc_freq_upper) / 2)
+                        emc_freq_rounded = emc_freq_lower;
+                else
+                        emc_freq_rounded = emc_freq_upper;
+                break;
+        case TEGRA_CHIPID_UNKNOWN:
+        default:
+                /* a proper rounding function needs to be implemented
+                 * for emc in t18x */
+                emc_freq_rounded = clk_round_rate(emc_clk, emc_target);
+                break;
+        }
+        /* only change the emc clock if new rounded frequency is different
+         * from previously set emc rate */
+        if (emc_freq_rounded != emc_params->freq_last_set) {
+                clk_set_rate(emc_clk, emc_freq_rounded);
+                emc_params->freq_last_set = emc_freq_rounded;
+        }
 }
 /*
@@ -384,6 +435,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
        if (!emc_params)
                return;
+        emc_params->freq_last_set = -1;
        gk20a_tegra_calibrate_emc(pdev, emc_params);
        profile->private_data = emc_params;
author	Anders Kugler <akugler@nvidia.com>	2015-02-03 21:58:19 -0500
committer	Dan Willemsen <dwillemsen@nvidia.com>	2015-04-04 22:00:48 -0400
commit	da39577432c482830ffebf9318ae968a570391e8 (patch)
tree	ac495875939ecccdad757f480982333858c8f353 /drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
parent	29ff732702dbf146e99e69dc062a4d82f2040f6e (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index 8609e3ae..fea2c774 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -25,6 +25,10 @@
25	#include <linux/nvmap.h>	25	#include <linux/nvmap.h>
26	#include <linux/tegra_pm_domains.h>	26	#include <linux/tegra_pm_domains.h>
27	#include <linux/platform/tegra/clock.h>	27	#include <linux/platform/tegra/clock.h>
		28	#include <linux/platform/tegra/dvfs.h>
		29	#include <linux/platform/tegra/common.h>
		30	#include <linux/clk/tegra.h>
		31	#include <mach/tegra_emc.h>
28		32
29	#include "gk20a.h"	33	#include "gk20a.h"
30	#include "hal_gk20a.h"	34	#include "hal_gk20a.h"
@@ -41,6 +45,7 @@ static struct gk20a_platform t132_gk20a_tegra_platform;
41		45
42	struct gk20a_emc_params {	46	struct gk20a_emc_params {
43	long bw_ratio;	47	long bw_ratio;
		48	long freq_last_set;
44	};	49	};
45		50
46	#define MHZ_TO_HZ(x) ((x) * 1000000)	51	#define MHZ_TO_HZ(x) ((x) * 1000000)
@@ -150,19 +155,26 @@ fail:
150	* This function returns the minimum emc clock based on gpu frequency	155	* This function returns the minimum emc clock based on gpu frequency
151	*/	156	*/
152		157
153	static long gk20a_tegra_get_emc_rate(struct gk20a *g,	158	static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
154	struct gk20a_emc_params *emc_params, long freq)	159	struct gk20a_emc_params *emc_params)
155	{	160	{
156	long hz;	161	unsigned long gpu_freq, gpu_fmax_at_vmin;
		162	unsigned long emc_rate, emc_scale;
157		163
158	freq = HZ_TO_MHZ(freq);	164	gpu_freq = clk_get_rate(g->clk.tegra_clk);
		165	gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
		166	clk_get_parent(g->clk.tegra_clk));
159		167
160	hz = (freq * emc_params->bw_ratio);	168	/* When scaling emc, only account for the gpu load below fmax@vmin */
161	hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000;	169	if (gpu_freq < gpu_fmax_at_vmin)
		170	emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
		171	else
		172	emc_scale = g->emc3d_ratio;
162		173
163	hz = MHZ_TO_HZ(hz);	174	emc_rate =
		175	(HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
164		176
165	return hz;	177	return MHZ_TO_HZ(emc_rate);
166	}	178	}
167		179
168	/*	180	/*
@@ -178,11 +190,50 @@ static void gk20a_tegra_postscale(struct platform_device *pdev,
178	struct gk20a_scale_profile *profile = platform->g->scale_profile;	190	struct gk20a_scale_profile *profile = platform->g->scale_profile;
179	struct gk20a_emc_params *emc_params = profile->private_data;	191	struct gk20a_emc_params *emc_params = profile->private_data;
180	struct gk20a *g = get_gk20a(pdev);	192	struct gk20a *g = get_gk20a(pdev);
		193	struct clk *emc_clk = platform->clk[2];
		194	enum tegra_chipid chip_id = tegra_get_chip_id();
		195	unsigned long emc_target;
		196	long emc_freq_lower, emc_freq_upper, emc_freq_rounded;
181		197
182	long after = clk_get_rate(g->clk.tegra_clk);	198	emc_target = gk20a_tegra_get_emc_rate(g, emc_params);
183	long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after);
184		199
185	clk_set_rate(platform->clk[2], emc_target);	200	switch (chip_id) {
		201	case TEGRA_CHIPID_TEGRA12:
		202	case TEGRA_CHIPID_TEGRA13:
		203	/* T124 and T132 don't apply any rounding. The resulting
		204	* emc frequency gets implicitly rounded up after issuing
		205	* the clock_set_request.
		206	* So explicitly round up the emc target here to achieve
		207	* the same outcome. */
		208	emc_freq_rounded =
		209	tegra_emc_round_rate_updown(emc_target, true);
		210	break;
		211
		212	case TEGRA_CHIPID_TEGRA21:
		213	emc_freq_lower = tegra_emc_round_rate_updown(emc_target, false);
		214	emc_freq_upper = tegra_emc_round_rate_updown(emc_target, true);
		215
		216	/* round to the nearest frequency step */
		217	if (emc_target < (emc_freq_lower + emc_freq_upper) / 2)
		218	emc_freq_rounded = emc_freq_lower;
		219	else
		220	emc_freq_rounded = emc_freq_upper;
		221	break;
		222
		223	case TEGRA_CHIPID_UNKNOWN:
		224	default:
		225	/* a proper rounding function needs to be implemented
		226	* for emc in t18x */
		227	emc_freq_rounded = clk_round_rate(emc_clk, emc_target);
		228	break;
		229	}
		230
		231	/* only change the emc clock if new rounded frequency is different
		232	* from previously set emc rate */
		233	if (emc_freq_rounded != emc_params->freq_last_set) {
		234	clk_set_rate(emc_clk, emc_freq_rounded);
		235	emc_params->freq_last_set = emc_freq_rounded;
		236	}
186	}	237	}
187		238
188	/*	239	/*
@@ -384,6 +435,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
384	if (!emc_params)	435	if (!emc_params)
385	return;	436	return;
386		437
		438	emc_params->freq_last_set = -1;
387	gk20a_tegra_calibrate_emc(pdev, emc_params);	439	gk20a_tegra_calibrate_emc(pdev, emc_params);
388		440
389	profile->private_data = emc_params;	441	profile->private_data = emc_params;