summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorSamuel Russell <samuelr@nvidia.com>2014-07-22 13:55:54 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:10:56 -0400
commit08dc7c3584e696f06f10ce496febed0bf4afef05 (patch)
treeb079819f2cbfce4731d9e6248c8b38fd07eeb0fb /drivers/gpu/nvgpu/gk20a
parent04efcaf97ee08a460deee192134ba30402c577be (diff)
gpu: nvgpu: 3d.emc bandwidth ratio policy
Modify the 3d.emc policy to use a formula based on bandwidth and utilization instead of the current sku-dependent policy. Bug 1364894 Change-Id: Id97f765a48f0aa9f5ebeb0c82bccb22db474a1ae Signed-off-by: Samuel Russell <samuelr@nvidia.com> Reviewed-on: http://git-master/r/453586 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c28
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c143
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/pmu_gk20a.h1
6 files changed, 75 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 9e9a94a0..0816878a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -71,6 +71,8 @@
71 71
72#define GK20A_NUM_CDEVS 6 72#define GK20A_NUM_CDEVS 6
73 73
74#define EMC3D_DEFAULT_RATIO 750
75
74#if defined(GK20A_DEBUG) 76#if defined(GK20A_DEBUG)
75u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK; 77u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
76u32 gk20a_dbg_ftrace; 78u32 gk20a_dbg_ftrace;
@@ -1462,6 +1464,8 @@ static int gk20a_probe(struct platform_device *dev)
1462 return err; 1464 return err;
1463 } 1465 }
1464 1466
1467 gk20a->emc3d_ratio = EMC3D_DEFAULT_RATIO;
1468
1465 /* Initialise scaling */ 1469 /* Initialise scaling */
1466 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ)) 1470 if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
1467 gk20a_scale_init(dev); 1471 gk20a_scale_init(dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 77300203..730ef43e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -297,6 +297,8 @@ struct gk20a {
297 bool forced_reset; 297 bool forced_reset;
298 bool allow_all; 298 bool allow_all;
299 299
300 u32 emc3d_ratio;
301
300#ifdef CONFIG_DEBUG_FS 302#ifdef CONFIG_DEBUG_FS
301 spinlock_t debugfs_lock; 303 spinlock_t debugfs_lock;
302 struct dentry *debugfs_ltc_enabled; 304 struct dentry *debugfs_ltc_enabled;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
index bec18328..fceed5e9 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_sysfs.c
@@ -469,6 +469,32 @@ static ssize_t allow_all_enable_store(struct device *device,
469static DEVICE_ATTR(allow_all, ROOTRW, 469static DEVICE_ATTR(allow_all, ROOTRW,
470 allow_all_enable_read, allow_all_enable_store); 470 allow_all_enable_read, allow_all_enable_store);
471 471
472static ssize_t emc3d_ratio_store(struct device *device,
473 struct device_attribute *attr, const char *buf, size_t count)
474{
475 struct platform_device *ndev = to_platform_device(device);
476 struct gk20a *g = get_gk20a(ndev);
477 unsigned long val = 0;
478
479 if (kstrtoul(buf, 10, &val) < 0)
480 return -EINVAL;
481
482 g->emc3d_ratio = val;
483
484 return count;
485}
486
487static ssize_t emc3d_ratio_read(struct device *device,
488 struct device_attribute *attr, char *buf)
489{
490 struct platform_device *ndev = to_platform_device(device);
491 struct gk20a *g = get_gk20a(ndev);
492
493 return sprintf(buf, "%d\n", g->emc3d_ratio);
494}
495
496static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
497
472#ifdef CONFIG_PM_RUNTIME 498#ifdef CONFIG_PM_RUNTIME
473static ssize_t force_idle_store(struct device *device, 499static ssize_t force_idle_store(struct device *device,
474 struct device_attribute *attr, const char *buf, size_t count) 500 struct device_attribute *attr, const char *buf, size_t count)
@@ -566,6 +592,7 @@ void gk20a_remove_sysfs(struct device *dev)
566 device_remove_file(dev, &dev_attr_slcg_enable); 592 device_remove_file(dev, &dev_attr_slcg_enable);
567 device_remove_file(dev, &dev_attr_ptimer_scale_factor); 593 device_remove_file(dev, &dev_attr_ptimer_scale_factor);
568 device_remove_file(dev, &dev_attr_elpg_enable); 594 device_remove_file(dev, &dev_attr_elpg_enable);
595 device_remove_file(dev, &dev_attr_emc3d_ratio);
569 device_remove_file(dev, &dev_attr_counters); 596 device_remove_file(dev, &dev_attr_counters);
570 device_remove_file(dev, &dev_attr_counters_reset); 597 device_remove_file(dev, &dev_attr_counters_reset);
571 device_remove_file(dev, &dev_attr_load); 598 device_remove_file(dev, &dev_attr_load);
@@ -593,6 +620,7 @@ void gk20a_create_sysfs(struct platform_device *dev)
593 error |= device_create_file(&dev->dev, &dev_attr_slcg_enable); 620 error |= device_create_file(&dev->dev, &dev_attr_slcg_enable);
594 error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor); 621 error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor);
595 error |= device_create_file(&dev->dev, &dev_attr_elpg_enable); 622 error |= device_create_file(&dev->dev, &dev_attr_elpg_enable);
623 error |= device_create_file(&dev->dev, &dev_attr_emc3d_ratio);
596 error |= device_create_file(&dev->dev, &dev_attr_counters); 624 error |= device_create_file(&dev->dev, &dev_attr_counters);
597 error |= device_create_file(&dev->dev, &dev_attr_counters_reset); 625 error |= device_create_file(&dev->dev, &dev_attr_counters_reset);
598 error |= device_create_file(&dev->dev, &dev_attr_load); 626 error |= device_create_file(&dev->dev, &dev_attr_load);
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index f234fdec..e0e034a9 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -39,16 +39,15 @@
39#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */ 39#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
40#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */ 40#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
41 41
42#define TEGRA_GK20A_BW_PER_FREQ 32
43#define TEGRA_GM20B_BW_PER_FREQ 64
44#define TEGRA_DDR3_BW_PER_FREQ 16
45
42extern struct device tegra_vpr_dev; 46extern struct device tegra_vpr_dev;
43struct gk20a_platform t132_gk20a_tegra_platform; 47struct gk20a_platform t132_gk20a_tegra_platform;
44 48
45struct gk20a_emc_params { 49struct gk20a_emc_params {
46 long emc_slope; 50 long bw_ratio;
47 long emc_offset;
48 long emc_dip_slope;
49 long emc_dip_offset;
50 long emc_xmid;
51 bool linear;
52}; 51};
53 52
54/* 53/*
@@ -189,20 +188,17 @@ fail:
189 * This function returns the minimum emc clock based on gpu frequency 188 * This function returns the minimum emc clock based on gpu frequency
190 */ 189 */
191 190
192long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq) 191long gk20a_tegra_get_emc_rate(struct gk20a *g,
192 struct gk20a_emc_params *emc_params, long freq)
193{ 193{
194 long hz; 194 long hz;
195 195
196 freq = INT_TO_FX(HZ_TO_MHZ(freq)); 196 freq = HZ_TO_MHZ(freq);
197 hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
198 197
199 hz -= FXMUL(emc_params->emc_dip_slope, 198 hz = (freq * emc_params->bw_ratio);
200 FXMUL(freq - emc_params->emc_xmid, 199 hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000;
201 freq - emc_params->emc_xmid)) +
202 emc_params->emc_dip_offset;
203 200
204 hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */ 201 hz = MHZ_TO_HZ(hz);
205 hz = (hz < 0) ? 0 : hz;
206 202
207 return hz; 203 return hz;
208} 204}
@@ -222,7 +218,7 @@ static void gk20a_tegra_postscale(struct platform_device *pdev,
222 struct gk20a *g = get_gk20a(pdev); 218 struct gk20a *g = get_gk20a(pdev);
223 219
224 long after = gk20a_clk_get_rate(g); 220 long after = gk20a_clk_get_rate(g);
225 long emc_target = gk20a_tegra_get_emc_rate(emc_params, after); 221 long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after);
226 222
227 clk_set_rate(platform->clk[2], emc_target); 223 clk_set_rate(platform->clk[2], emc_target);
228} 224}
@@ -245,94 +241,34 @@ static void gk20a_tegra_prescale(struct platform_device *pdev)
245/* 241/*
246 * gk20a_tegra_calibrate_emc() 242 * gk20a_tegra_calibrate_emc()
247 * 243 *
248 * Compute emc scaling parameters
249 *
250 * Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
251 *
252 * Remc - 3d.emc rate
253 * R3d - 3d.cbus rate
254 * Rm - 3d.cbus 'middle' rate = (max + min)/2
255 * S - emc_slope
256 * O - emc_offset
257 * Sd - emc_dip_slope
258 * Od - emc_dip_offset
259 *
260 * this superposes a quadratic dip centered around the middle 3d
261 * frequency over a linear correlation of 3d.emc to 3d clock
262 * rates.
263 *
264 * S, O are chosen so that the maximum 3d rate produces the
265 * maximum 3d.emc rate exactly, and the minimum 3d rate produces
266 * at least the minimum 3d.emc rate.
267 *
268 * Sd and Od are chosen to produce the largest dip that will
269 * keep 3d.emc frequencies monotonously decreasing with 3d
270 * frequencies. To achieve this, the first derivative of Remc
271 * with respect to R3d should be zero for the minimal 3d rate:
272 *
273 * R'emc = S - 2 * Sd * (R3d - Rm)
274 * R'emc(R3d-min) = 0
275 * S = 2 * Sd * (R3d-min - Rm)
276 * = 2 * Sd * (R3d-min - R3d-max) / 2
277 *
278 * +------------------------------+
279 * | Sd = S / (R3d-min - R3d-max) |
280 * +------------------------------+
281 *
282 * dip = Sd * (R3d - Rm)^2 + Od
283 *
284 * requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
285 *
286 * Sd * (R3d-min - Rm)^2 + Od = 0
287 * Od = -Sd * ((R3d-min - R3d-max) / 2)^2
288 * = -Sd * ((R3d-min - R3d-max)^2) / 4
289 *
290 * +------------------------------+
291 * | Od = (emc-max - emc-min) / 4 |
292 * +------------------------------+
293 *
294 */ 244 */
295 245
296void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params, 246void gk20a_tegra_calibrate_emc(struct platform_device *pdev,
297 struct clk *clk_3d, struct clk *clk_3d_emc) 247 struct gk20a_emc_params *emc_params)
298{ 248{
299 long correction; 249 struct gk20a *g = get_gk20a(pdev);
300 unsigned long max_emc; 250 long gpu_bw, emc_bw;
301 unsigned long min_emc; 251
302 unsigned long min_rate_3d; 252 /* Detect and store gpu bw */
303 unsigned long max_rate_3d; 253 u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
304 254 switch (ver) {
305 max_emc = clk_round_rate(clk_3d_emc, UINT_MAX); 255 case GK20A_GPUID_GK20A:
306 max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc)); 256 gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
307 257 break;
308 min_emc = clk_round_rate(clk_3d_emc, 0); 258 case GK20A_GPUID_GM20B:
309 min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc)); 259 gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
310 260 break;
311 max_rate_3d = clk_round_rate(clk_3d, UINT_MAX); 261 default:
312 max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d)); 262 gpu_bw = 0;
313 263 break;
314 min_rate_3d = clk_round_rate(clk_3d, 0); 264 }
315 min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d)); 265
316 266 /* TODO detect DDR3 vs DDR4 */
317 emc_params->emc_slope = 267 emc_bw = TEGRA_DDR3_BW_PER_FREQ;
318 FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d)); 268
319 emc_params->emc_offset = max_emc - 269 /* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
320 FXMUL(emc_params->emc_slope, max_rate_3d); 270 * NOTE the ratio must come out as an integer */
321 /* Guarantee max 3d rate maps to max emc rate */ 271 emc_params->bw_ratio = (gpu_bw / emc_bw);
322 emc_params->emc_offset += max_emc -
323 (FXMUL(emc_params->emc_slope, max_rate_3d) +
324 emc_params->emc_offset);
325
326 emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
327 emc_params->emc_dip_slope =
328 -FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
329 emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
330 correction =
331 emc_params->emc_dip_offset +
332 FXMUL(emc_params->emc_dip_slope,
333 FXMUL(max_rate_3d - emc_params->emc_xmid,
334 max_rate_3d - emc_params->emc_xmid));
335 emc_params->emc_dip_offset -= correction;
336} 272}
337 273
338/* 274/*
@@ -427,7 +363,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
427{ 363{
428 struct gk20a_platform *platform = gk20a_get_platform(pdev); 364 struct gk20a_platform *platform = gk20a_get_platform(pdev);
429 struct gk20a_scale_profile *profile = platform->g->scale_profile; 365 struct gk20a_scale_profile *profile = platform->g->scale_profile;
430 struct gk20a_emc_params *emc_params; 366 struct gk20a_emc_params *emc_params;
431 367
432 if (!profile) 368 if (!profile)
433 return; 369 return;
@@ -436,8 +372,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
436 if (!emc_params) 372 if (!emc_params)
437 return; 373 return;
438 374
439 gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g), 375 gk20a_tegra_calibrate_emc(pdev, emc_params);
440 platform->clk[2]);
441 376
442 profile->private_data = emc_params; 377 profile->private_data = emc_params;
443} 378}
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 8bdbb106..177e3525 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -3688,6 +3688,7 @@ int gk20a_pmu_load_update(struct gk20a *g)
3688 3688
3689 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0); 3689 pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
3690 pmu->load_shadow = _load / 10; 3690 pmu->load_shadow = _load / 10;
3691 pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
3691 3692
3692 return 0; 3693 return 0;
3693} 3694}
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index 591ffbc6..81177f5c 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -1080,6 +1080,7 @@ struct pmu_gk20a {
1080 1080
1081 u32 sample_buffer; 1081 u32 sample_buffer;
1082 u32 load_shadow; 1082 u32 load_shadow;
1083 u32 load_avg;
1083 1084
1084 struct mutex isr_mutex; 1085 struct mutex isr_mutex;
1085 bool isr_enabled; 1086 bool isr_enabled;