From 08dc7c3584e696f06f10ce496febed0bf4afef05 Mon Sep 17 00:00:00 2001 From: Samuel Russell Date: Tue, 22 Jul 2014 10:55:54 -0700 Subject: gpu: nvgpu: 3d.emc bandwidth ratio policy Modify the 3d.emc policy to use a formula based on bandwidth and utilization instead of the current sku-dependent policy. Bug 1364894 Change-Id: Id97f765a48f0aa9f5ebeb0c82bccb22db474a1ae Signed-off-by: Samuel Russell Reviewed-on: http://git-master/r/453586 Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/gk20a.c | 4 + drivers/gpu/nvgpu/gk20a/gk20a.h | 2 + drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c | 28 +++++ drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 143 +++++++------------------ drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 1 + drivers/gpu/nvgpu/gk20a/pmu_gk20a.h | 1 + 6 files changed, 75 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9e9a94a0..0816878a 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -71,6 +71,8 @@ #define GK20A_NUM_CDEVS 6 +#define EMC3D_DEFAULT_RATIO 750 + #if defined(GK20A_DEBUG) u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK; u32 gk20a_dbg_ftrace; @@ -1462,6 +1464,8 @@ static int gk20a_probe(struct platform_device *dev) return err; } + gk20a->emc3d_ratio = EMC3D_DEFAULT_RATIO; + /* Initialise scaling */ if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) gk20a_scale_init(dev); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 77300203..730ef43e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -297,6 +297,8 @@ struct gk20a { bool forced_reset; bool allow_all; + u32 emc3d_ratio; + #ifdef CONFIG_DEBUG_FS spinlock_t debugfs_lock; struct dentry *debugfs_ltc_enabled; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c index bec18328..fceed5e9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c @@ -469,6 +469,32 @@ static ssize_t allow_all_enable_store(struct device *device, static DEVICE_ATTR(allow_all, ROOTRW, allow_all_enable_read, allow_all_enable_store); +static ssize_t emc3d_ratio_store(struct device *device, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + unsigned long val = 0; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + g->emc3d_ratio = val; + + return count; +} + +static ssize_t emc3d_ratio_read(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ndev = to_platform_device(device); + struct gk20a *g = get_gk20a(ndev); + + return sprintf(buf, "%d\n", g->emc3d_ratio); +} + +static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store); + #ifdef CONFIG_PM_RUNTIME static ssize_t force_idle_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) @@ -566,6 +592,7 @@ void gk20a_remove_sysfs(struct device *dev) device_remove_file(dev, &dev_attr_slcg_enable); device_remove_file(dev, &dev_attr_ptimer_scale_factor); device_remove_file(dev, &dev_attr_elpg_enable); + device_remove_file(dev, &dev_attr_emc3d_ratio); device_remove_file(dev, &dev_attr_counters); device_remove_file(dev, &dev_attr_counters_reset); device_remove_file(dev, &dev_attr_load); @@ -593,6 +620,7 @@ void gk20a_create_sysfs(struct platform_device *dev) error |= device_create_file(&dev->dev, &dev_attr_slcg_enable); error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor); error |= device_create_file(&dev->dev, &dev_attr_elpg_enable); + error |= device_create_file(&dev->dev, &dev_attr_emc3d_ratio); error |= device_create_file(&dev->dev, &dev_attr_counters); error |= device_create_file(&dev->dev, &dev_attr_counters_reset); error |= device_create_file(&dev->dev, &dev_attr_load); diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index f234fdec..e0e034a9 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c @@ -39,16 +39,15 @@ #define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ #define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ +#define TEGRA_GK20A_BW_PER_FREQ 32 +#define TEGRA_GM20B_BW_PER_FREQ 64 +#define TEGRA_DDR3_BW_PER_FREQ 16 + extern struct device tegra_vpr_dev; struct gk20a_platform t132_gk20a_tegra_platform; struct gk20a_emc_params { - long emc_slope; - long emc_offset; - long emc_dip_slope; - long emc_dip_offset; - long emc_xmid; - bool linear; + long bw_ratio; }; /* @@ -189,20 +188,17 @@ fail: * This function returns the minimum emc clock based on gpu frequency */ -long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq) +long gk20a_tegra_get_emc_rate(struct gk20a *g, + struct gk20a_emc_params *emc_params, long freq) { long hz; - freq = INT_TO_FX(HZ_TO_MHZ(freq)); - hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset; + freq = HZ_TO_MHZ(freq); - hz -= FXMUL(emc_params->emc_dip_slope, - FXMUL(freq - emc_params->emc_xmid, - freq - emc_params->emc_xmid)) + - emc_params->emc_dip_offset; + hz = (freq * emc_params->bw_ratio); + hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000; - hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */ - hz = (hz < 0) ? 0 : hz; + hz = MHZ_TO_HZ(hz); return hz; } @@ -222,7 +218,7 @@ static void gk20a_tegra_postscale(struct platform_device *pdev, struct gk20a *g = get_gk20a(pdev); long after = gk20a_clk_get_rate(g); - long emc_target = gk20a_tegra_get_emc_rate(emc_params, after); + long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after); clk_set_rate(platform->clk[2], emc_target); } @@ -245,94 +241,34 @@ static void gk20a_tegra_prescale(struct platform_device *pdev) /* * gk20a_tegra_calibrate_emc() * - * Compute emc scaling parameters - * - * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od) - * - * Remc - 3d.emc rate - * R3d - 3d.cbus rate - * Rm - 3d.cbus 'middle' rate = (max + min)/2 - * S - emc_slope - * O - emc_offset - * Sd - emc_dip_slope - * Od - emc_dip_offset - * - * this superposes a quadratic dip centered around the middle 3d - * frequency over a linear correlation of 3d.emc to 3d clock - * rates. - * - * S, O are chosen so that the maximum 3d rate produces the - * maximum 3d.emc rate exactly, and the minimum 3d rate produces - * at least the minimum 3d.emc rate. - * - * Sd and Od are chosen to produce the largest dip that will - * keep 3d.emc frequencies monotonously decreasing with 3d - * frequencies. To achieve this, the first derivative of Remc - * with respect to R3d should be zero for the minimal 3d rate: - * - * R'emc = S - 2 * Sd * (R3d - Rm) - * R'emc(R3d-min) = 0 - * S = 2 * Sd * (R3d-min - Rm) - * = 2 * Sd * (R3d-min - R3d-max) / 2 - * - * +------------------------------+ - * | Sd = S / (R3d-min - R3d-max) | - * +------------------------------+ - * - * dip = Sd * (R3d - Rm)^2 + Od - * - * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives - * - * Sd * (R3d-min - Rm)^2 + Od = 0 - * Od = -Sd * ((R3d-min - R3d-max) / 2)^2 - * = -Sd * ((R3d-min - R3d-max)^2) / 4 - * - * +------------------------------+ - * | Od = (emc-max - emc-min) / 4 | - * +------------------------------+ - * */ -void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params, - struct clk *clk_3d, struct clk *clk_3d_emc) +void gk20a_tegra_calibrate_emc(struct platform_device *pdev, + struct gk20a_emc_params *emc_params) { - long correction; - unsigned long max_emc; - unsigned long min_emc; - unsigned long min_rate_3d; - unsigned long max_rate_3d; - - max_emc = clk_round_rate(clk_3d_emc, UINT_MAX); - max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc)); - - min_emc = clk_round_rate(clk_3d_emc, 0); - min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc)); - - max_rate_3d = clk_round_rate(clk_3d, UINT_MAX); - max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d)); - - min_rate_3d = clk_round_rate(clk_3d, 0); - min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d)); - - emc_params->emc_slope = - FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d)); - emc_params->emc_offset = max_emc - - FXMUL(emc_params->emc_slope, max_rate_3d); - /* Guarantee max 3d rate maps to max emc rate */ - emc_params->emc_offset += max_emc - - (FXMUL(emc_params->emc_slope, max_rate_3d) + - emc_params->emc_offset); - - emc_params->emc_dip_offset = (max_emc - min_emc) / 4; - emc_params->emc_dip_slope = - -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d); - emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2; - correction = - emc_params->emc_dip_offset + - FXMUL(emc_params->emc_dip_slope, - FXMUL(max_rate_3d - emc_params->emc_xmid, - max_rate_3d - emc_params->emc_xmid)); - emc_params->emc_dip_offset -= correction; + struct gk20a *g = get_gk20a(pdev); + long gpu_bw, emc_bw; + + /* Detect and store gpu bw */ + u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl; + switch (ver) { + case GK20A_GPUID_GK20A: + gpu_bw = TEGRA_GK20A_BW_PER_FREQ; + break; + case GK20A_GPUID_GM20B: + gpu_bw = TEGRA_GM20B_BW_PER_FREQ; + break; + default: + gpu_bw = 0; + break; + } + + /* TODO detect DDR3 vs DDR4 */ + emc_bw = TEGRA_DDR3_BW_PER_FREQ; + + /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq + * NOTE the ratio must come out as an integer */ + emc_params->bw_ratio = (gpu_bw / emc_bw); } /* @@ -427,7 +363,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev) { struct gk20a_platform *platform = gk20a_get_platform(pdev); struct gk20a_scale_profile *profile = platform->g->scale_profile; - struct gk20a_emc_params *emc_params; + struct gk20a_emc_params *emc_params; if (!profile) return; @@ -436,8 +372,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev) if (!emc_params) return; - gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g), - platform->clk[2]); + gk20a_tegra_calibrate_emc(pdev, emc_params); profile->private_data = emc_params; } diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 8bdbb106..177e3525 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -3688,6 +3688,7 @@ int gk20a_pmu_load_update(struct gk20a *g) pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); pmu->load_shadow = _load / 10; + pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10); return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 591ffbc6..81177f5c 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -1080,6 +1080,7 @@ struct pmu_gk20a { u32 sample_buffer; u32 load_shadow; + u32 load_avg; struct mutex isr_mutex; bool isr_enabled; -- cgit v1.2.2